使用 SSIS 脚本组件读取 Azure Data Lake Store 文件
Azure Data Lake Store File read using SSIS Script Component
感谢您的建议。
我的要求是,使用 SSIS 从 ADLS 读取 json 文件并加载到 SQL table
实施:
我已经实现了在 .Net 控制台应用程序中读取 json 文件内容的代码。这在控制台应用程序中运行良好。我在 SSIS 脚本组件中复制了相同的代码,但它在 AdlsClient.CreateClient.
中引发了 "The type initializer for 'Microsoft.Azure.DataLake.Store.AdlsClient' threw an exception" 异常
using Microsoft.Rest;
using Microsoft.Rest.Azure.Authentication;
using Microsoft.Azure.Management.DataLake.Store;
using Microsoft.Azure.DataLake.Store;
using Microsoft.Azure.DataLake.Store.AclTools;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
JObject results = new JObject();
string applicationId = "<appid>;
string secretKey = <secretekey>;
string tenantId = <tenantid>;
string adlsAccountName = "<ADLSNAME>.azuredatalakestore.net";
ServiceClientCredentials creds = ApplicationTokenProvider.LoginSilentAsync(tenantId, applicationId, secretKey).Result;
AdlsClient adlsClient = AdlsClient.CreateClient(adlsAccountName, creds);
string srcPath = @"/InputFiles/1636274001230002_20180621_104427.json";
using (StreamReader readStream = new
StreamReader(adlsClient.GetReadStream(srcPath)))
{
var p2Object = JsonConvert.DeserializeObject(readStream.ReadToEnd());
results = JObject.Parse(p2Object.ToString());
}
date = ((string)results["eeData"][0]["startDate"]);
machine = ((string)results["eeData"][0]["machineName"]);
ppl = ((string)results["eeData"][0]["ppl"]);
问题在于第 3 方 DLL 的 SSIS 脚本组件中缺少引用路径。在控制台应用程序中,我能够安装 NuGet 包管理器。但是在 SSIS 脚本组件中,NuGet 包安装失败并且 SSIS 组件缺少引用。下面的代码将强制脚本组件编译器从给定路径引用 DLL。
在 PreExecute() / Main() 方法上方添加此代码。
static ScriptMain()
{
AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve);
}
static System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args)
{
if (args.Name.Contains("Newtonsoft.Json"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\Microsoft SDKs\Azure\.NET SDK\v2.9\ToolsRef\Newtonsoft.Json.dll");
}
if (args.Name.Contains("Microsoft.Azure.DataLake.Store"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\WindowsPowerShell\Modules\AzureRM.DataLakeStore.2.0\Microsoft.Azure.DataLake.Store.dll");
}
if (args.Name.Contains("Microsoft.Rest.ClientRuntime.Azure.Authentication"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\WindowsPowerShell\Modules\Azure.1.2\StorSimple\Microsoft.Rest.ClientRuntime.Azure.Authentication.dll");
}
if (args.Name.Contains("Microsoft.Rest.ClientRuntime"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\WindowsPowerShell\Modules\Azure.1.2\Services\Microsoft.Rest.ClientRuntime.dll");
}
if (args.Name.Contains("NLog"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Users\<user>\source\repos\Integration Services Project2\NLog.dll");
}
return null;
}
感谢您的建议。
我的要求是,使用 SSIS 从 ADLS 读取 json 文件并加载到 SQL table
实施: 我已经实现了在 .Net 控制台应用程序中读取 json 文件内容的代码。这在控制台应用程序中运行良好。我在 SSIS 脚本组件中复制了相同的代码,但它在 AdlsClient.CreateClient.
中引发了 "The type initializer for 'Microsoft.Azure.DataLake.Store.AdlsClient' threw an exception" 异常using Microsoft.Rest;
using Microsoft.Rest.Azure.Authentication;
using Microsoft.Azure.Management.DataLake.Store;
using Microsoft.Azure.DataLake.Store;
using Microsoft.Azure.DataLake.Store.AclTools;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
JObject results = new JObject();
string applicationId = "<appid>;
string secretKey = <secretekey>;
string tenantId = <tenantid>;
string adlsAccountName = "<ADLSNAME>.azuredatalakestore.net";
ServiceClientCredentials creds = ApplicationTokenProvider.LoginSilentAsync(tenantId, applicationId, secretKey).Result;
AdlsClient adlsClient = AdlsClient.CreateClient(adlsAccountName, creds);
string srcPath = @"/InputFiles/1636274001230002_20180621_104427.json";
using (StreamReader readStream = new
StreamReader(adlsClient.GetReadStream(srcPath)))
{
var p2Object = JsonConvert.DeserializeObject(readStream.ReadToEnd());
results = JObject.Parse(p2Object.ToString());
}
date = ((string)results["eeData"][0]["startDate"]);
machine = ((string)results["eeData"][0]["machineName"]);
ppl = ((string)results["eeData"][0]["ppl"]);
问题在于第 3 方 DLL 的 SSIS 脚本组件中缺少引用路径。在控制台应用程序中,我能够安装 NuGet 包管理器。但是在 SSIS 脚本组件中,NuGet 包安装失败并且 SSIS 组件缺少引用。下面的代码将强制脚本组件编译器从给定路径引用 DLL。
在 PreExecute() / Main() 方法上方添加此代码。
static ScriptMain()
{
AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve);
}
static System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args)
{
if (args.Name.Contains("Newtonsoft.Json"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\Microsoft SDKs\Azure\.NET SDK\v2.9\ToolsRef\Newtonsoft.Json.dll");
}
if (args.Name.Contains("Microsoft.Azure.DataLake.Store"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\WindowsPowerShell\Modules\AzureRM.DataLakeStore.2.0\Microsoft.Azure.DataLake.Store.dll");
}
if (args.Name.Contains("Microsoft.Rest.ClientRuntime.Azure.Authentication"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\WindowsPowerShell\Modules\Azure.1.2\StorSimple\Microsoft.Rest.ClientRuntime.Azure.Authentication.dll");
}
if (args.Name.Contains("Microsoft.Rest.ClientRuntime"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Program Files\WindowsPowerShell\Modules\Azure.1.2\Services\Microsoft.Rest.ClientRuntime.dll");
}
if (args.Name.Contains("NLog"))
{
return System.Reflection.Assembly.LoadFile(@"C:\Users\<user>\source\repos\Integration Services Project2\NLog.dll");
}
return null;
}