使用 XmlWriter 从 LINQ 创建大型文档到 SQL / LINQPad 抛出内存不足异常
Using XmlWriter to create large document from LINQ to SQL / LINQPad throws Out of Memory Exception
我正在尝试在 LINQPad 脚本中导出数据并不断收到内存不足异常。我觉得脚本正在执行所有 'streamable' 操作,所以不确定为什么我会得到这个。
代码的主循环如下所示。一些注意事项:
1) 第一个查询 returns 大约 60K 行 profileDB.Profiles.Where(p => p.Group.gName == groupName).Select( d => d.pAuthID )
2) 对数据库中每个 pAuthID
returns 行的第二个查询,其中一个字段是存储在字符串字段中的 Xml 数据块。它不是那么大... < 500K 是肯定的。每个 pAuthID
行最多可以有 50 行 FolderItems。查询是profileDB.FolderItems.Where(f => f.Profile.pAuthID == p && ( folderTypes[0] == "*" || folderTypes.Contains(f.fiEntryType) ) ).OrderBy(f => f.fiEntryDate)
3) 当处理开始时,我只在结果窗格中写了一行。
4) 脚本 运行s 很长一段时间,当输出文件大约 600-700MB 时抛出异常。我知道很大,但是要求我们将所有数据转储到 Xml.
5) WriteFolderItems
function/loop 将粘贴在主循环下方。
6) 我在每个 xDataDef 元素后调用 XmlWriter.Flush
。
using (var xw = XmlWriter.Create(fileName, new XmlWriterSettings { Indent = false } ) )
{
xw.WriteStartElement( "xDataDefs" );
foreach( var p in profileDB.Profiles.Where(p => p.Group.gName == groupName).Select( d => d.pAuthID ) )
{
if ( totalRows == 0 ) // first one...
{
string.Format( "Writing results to {0}...", fileName ).Dump( "Progress" );
}
totalRows++;
var folderItems = profileDB.FolderItems.Where(f => f.Profile.pAuthID == p && ( folderTypes[0] == "*" || folderTypes.Contains(f.fiEntryType) ) ).OrderBy(f => f.fiEntryDate);
if ( folderItems.Any() )
{
xw.WriteStartElement("xDataDef");
xw.WriteAttributeString("id-auth", p);
xw.WriteStartElement("FolderItems");
WriteFolderItems(profileDB, datalockerConnectionString, xw, folderItems, documentsDirectory, calcDocumentFolder, exportFileData);
xw.WriteEndElement();
xw.WriteEndElement();
xw.Flush();
}
}
xw.WriteEndElement();
}
WriteFolderItems 也有循环代码,如下所示。一些注意事项:
1) 我希望 foreach( var f in folderItems )
能够流式传输
2) 对于某些 Xml 缓存文档 blob 的 FolderItem 行,我需要对数据库进行 运行 ~ 1-5 次查询以获取一些附加信息以插入Xml 导出:var docInfo = profileDB.Documents.Where( d => d.docfiKey == f.fiKey && d.docFilename == fileName ).FirstOrDefault();
3) 我在每个 FolderItem 行之后调用 XmlWriter.Flush
。
public void WriteFolderItems( BTR.Evolution.Data.DataContexts.Legacy.xDS.DataContext profileDB, string datalockerConnectionString, XmlWriter xw, IEnumerable<BTR.Evolution.Data.DataContexts.Legacy.xDS.FolderItem> folderItems, string documentsOutputDirectory, string calcDocumentFolder, bool exportFileData )
{
foreach( var f in folderItems )
{
// The Xml blob string
var calculation = XElement.Parse( f.fiItem );
// If it contains 'cached-document' elements, need to download the actual document from DataLocker database
foreach( var document in calculation.Elements( "Data" ).Elements( "TabDef" ).Elements( "cache-documents" ).Elements( "cached-document" ) )
{
var fileName = (string)document.Attribute( "name" );
// Get author/token to be used during import
var docInfo = profileDB.Documents.Where( d => d.docfiKey == f.fiKey && d.docFilename == fileName ).FirstOrDefault();
if ( docInfo != null )
{
document.Add( new XElement( "author", docInfo.docUploadAuthID ) );
document.Add( new XElement( "token", docInfo.docDataLockerToken ) );
}
// Export associated document from DataLocker connection...XmlWriter is not affected, simply saves document to local hard drive
if ( exportFileData && DataLockerExtensions.ByConnection( datalockerConnectionString ).Exists( calcDocumentFolder, (string)document.Attribute( "name" ), null ) )
{
using ( var fs = new FileStream( Path.Combine( documentsOutputDirectory, fileName.Replace( "/", "__" ) ), FileMode.Create ) )
{
string contentType;
using ( var ds = DataLockerExtensions.ByConnection( datalockerConnectionString ).Get( calcDocumentFolder, (string)document.Attribute( "name" ), null, out contentType ) )
{
ds.CopyTo( fs );
}
}
}
}
// Write the calculation to the XwlWriter
xw.WriteStartElement( "FolderItem" );
xw.WriteElementString( "Key", f.fiKey.ToString() );
xw.WriteElementString( "EntryDate", XmlConvert.ToString( f.fiEntryDate.Value, XmlDateTimeSerializationMode.Local ) );
xw.WriteElementString( "ItemType", f.fiEntryType );
xw.WriteElementString( "Author", f.fiAuthor );
xw.WriteElementString( "Comment", f.fiComment );
xw.WriteStartElement( "Item" );
calculation.WriteTo( xw );
xw.WriteEndElement();
xw.WriteEndElement();
xw.Flush();
}
}
确保禁用更改跟踪,否则 EF 或 L2S 更改跟踪器将保留对每个加载实体的引用。
我正在尝试在 LINQPad 脚本中导出数据并不断收到内存不足异常。我觉得脚本正在执行所有 'streamable' 操作,所以不确定为什么我会得到这个。
代码的主循环如下所示。一些注意事项:
1) 第一个查询 returns 大约 60K 行 profileDB.Profiles.Where(p => p.Group.gName == groupName).Select( d => d.pAuthID )
2) 对数据库中每个 pAuthID
returns 行的第二个查询,其中一个字段是存储在字符串字段中的 Xml 数据块。它不是那么大... < 500K 是肯定的。每个 pAuthID
行最多可以有 50 行 FolderItems。查询是profileDB.FolderItems.Where(f => f.Profile.pAuthID == p && ( folderTypes[0] == "*" || folderTypes.Contains(f.fiEntryType) ) ).OrderBy(f => f.fiEntryDate)
3) 当处理开始时,我只在结果窗格中写了一行。
4) 脚本 运行s 很长一段时间,当输出文件大约 600-700MB 时抛出异常。我知道很大,但是要求我们将所有数据转储到 Xml.
5) WriteFolderItems
function/loop 将粘贴在主循环下方。
6) 我在每个 xDataDef 元素后调用 XmlWriter.Flush
。
using (var xw = XmlWriter.Create(fileName, new XmlWriterSettings { Indent = false } ) )
{
xw.WriteStartElement( "xDataDefs" );
foreach( var p in profileDB.Profiles.Where(p => p.Group.gName == groupName).Select( d => d.pAuthID ) )
{
if ( totalRows == 0 ) // first one...
{
string.Format( "Writing results to {0}...", fileName ).Dump( "Progress" );
}
totalRows++;
var folderItems = profileDB.FolderItems.Where(f => f.Profile.pAuthID == p && ( folderTypes[0] == "*" || folderTypes.Contains(f.fiEntryType) ) ).OrderBy(f => f.fiEntryDate);
if ( folderItems.Any() )
{
xw.WriteStartElement("xDataDef");
xw.WriteAttributeString("id-auth", p);
xw.WriteStartElement("FolderItems");
WriteFolderItems(profileDB, datalockerConnectionString, xw, folderItems, documentsDirectory, calcDocumentFolder, exportFileData);
xw.WriteEndElement();
xw.WriteEndElement();
xw.Flush();
}
}
xw.WriteEndElement();
}
WriteFolderItems 也有循环代码,如下所示。一些注意事项:
1) 我希望 foreach( var f in folderItems )
能够流式传输
2) 对于某些 Xml 缓存文档 blob 的 FolderItem 行,我需要对数据库进行 运行 ~ 1-5 次查询以获取一些附加信息以插入Xml 导出:var docInfo = profileDB.Documents.Where( d => d.docfiKey == f.fiKey && d.docFilename == fileName ).FirstOrDefault();
3) 我在每个 FolderItem 行之后调用 XmlWriter.Flush
。
public void WriteFolderItems( BTR.Evolution.Data.DataContexts.Legacy.xDS.DataContext profileDB, string datalockerConnectionString, XmlWriter xw, IEnumerable<BTR.Evolution.Data.DataContexts.Legacy.xDS.FolderItem> folderItems, string documentsOutputDirectory, string calcDocumentFolder, bool exportFileData )
{
foreach( var f in folderItems )
{
// The Xml blob string
var calculation = XElement.Parse( f.fiItem );
// If it contains 'cached-document' elements, need to download the actual document from DataLocker database
foreach( var document in calculation.Elements( "Data" ).Elements( "TabDef" ).Elements( "cache-documents" ).Elements( "cached-document" ) )
{
var fileName = (string)document.Attribute( "name" );
// Get author/token to be used during import
var docInfo = profileDB.Documents.Where( d => d.docfiKey == f.fiKey && d.docFilename == fileName ).FirstOrDefault();
if ( docInfo != null )
{
document.Add( new XElement( "author", docInfo.docUploadAuthID ) );
document.Add( new XElement( "token", docInfo.docDataLockerToken ) );
}
// Export associated document from DataLocker connection...XmlWriter is not affected, simply saves document to local hard drive
if ( exportFileData && DataLockerExtensions.ByConnection( datalockerConnectionString ).Exists( calcDocumentFolder, (string)document.Attribute( "name" ), null ) )
{
using ( var fs = new FileStream( Path.Combine( documentsOutputDirectory, fileName.Replace( "/", "__" ) ), FileMode.Create ) )
{
string contentType;
using ( var ds = DataLockerExtensions.ByConnection( datalockerConnectionString ).Get( calcDocumentFolder, (string)document.Attribute( "name" ), null, out contentType ) )
{
ds.CopyTo( fs );
}
}
}
}
// Write the calculation to the XwlWriter
xw.WriteStartElement( "FolderItem" );
xw.WriteElementString( "Key", f.fiKey.ToString() );
xw.WriteElementString( "EntryDate", XmlConvert.ToString( f.fiEntryDate.Value, XmlDateTimeSerializationMode.Local ) );
xw.WriteElementString( "ItemType", f.fiEntryType );
xw.WriteElementString( "Author", f.fiAuthor );
xw.WriteElementString( "Comment", f.fiComment );
xw.WriteStartElement( "Item" );
calculation.WriteTo( xw );
xw.WriteEndElement();
xw.WriteEndElement();
xw.Flush();
}
}
确保禁用更改跟踪,否则 EF 或 L2S 更改跟踪器将保留对每个加载实体的引用。