使用 WPF C# 显示大文本文件
Displaying large text files with WPF C#
我正在尝试编写一个 WPF 应用程序来显示(可能)大型日志文件 (50MB-2GB),以便它们更易于阅读。
我尝试将一个 5 MB 的文件和 ~75k 行加载到带有 TextBlocks 的 GridView 中,但它真的很慢。
我不需要任何编辑功能。
我遇到了 GlyphRun,但不知道如何使用它们。我想我必须用日志文件的每一行的 GlyphRun 填充 canvas 或图像。谁能告诉我该怎么做?不幸的是,关于 GlyphRun 的文档不是很有帮助。
我有这个文件读取算法来自概念验证应用程序(它也是一个日志文件 viewer/diff 查看器)。该实现需要 C# 8.0(.NET Core 3.x 或 .NET 5)。我删除了一些索引、取消等以去除噪音并展示算法的核心业务。
它的执行速度非常快,与 Visual Code 等编辑器相比效果非常好。它不能变得更快。为了保持 UI 响应,我强烈建议使用 UI 虚拟化。如果你实施UI虚拟化,那么瓶颈将是文件读取操作。您可以通过使用不同的分区大小来调整算法的性能(您可以实现一些智能分区来动态计算它们)。
算法的关键部分是
- 使用
Channel
异步实现生产者-消费者模式
- 将源文件分成 n 字节的块
- 文件分区并行处理(并发文件读取)
- 合并结果文档块和重叠行
DocumentBlock.cs
保存已处理文件分区的行的结果结构。
public readonly struct DocumentBlock
{
public DocumentBlock(long rank, IList<string> content, bool hasOverflow)
{
this.Rank = rank;
this.Content = content;
this.HasOverflow = hasOverflow;
}
public long Rank { get; }
public IList<string> Content { get; }
public bool HasOverflow { get; }
}
ViewModel.cs
入口点是 public ViewModel.ReadFileAsync
成员。
class ViewModel : INotifyPropertyChanged
{
public ViewModel() => this.DocumentBlocks = new ConcurrentBag<DocumentBlock>();
// TODO::Make reentrant
// (for example cancel running operations and
// lock/synchronize the method using a SemaphoreSlim)
public async Task ReadFileAsync(string filePath)
{
using var cancellationTokenSource = new CancellationTokenSource();
this.DocumentBlocks.Clear();
this.EndOfFileReached = false;
// Create the channel (Producer-Consumer implementation)
BoundedChannelOptions channeloptions = new BoundedChannelOptions(Environment.ProcessorCount)
{
FullMode = BoundedChannelFullMode.Wait,
AllowSynchronousContinuations = false,
SingleWriter = true
};
var channel = Channel.CreateBounded<(long PartitionLowerBound, long PartitionUpperBound)>(channeloptions);
// Create consumer threads
var tasks = new List<Task>();
for (int threadIndex = 0; threadIndex < Environment.ProcessorCount; threadIndex++)
{
Task task = Task.Run(async () => await ConsumeFilePartitionsAsync(channel.Reader, filePath, cancellationTokenSource));
tasks.Add(task);
}
// Produce document byte blocks
await ProduceFilePartitionsAsync(channel.Writer, cancellationTokenSource.Token);
await Task.WhenAll(tasks);
CreateFileContent();
this.DocumentBlocks.Clear();
}
private void CreateFileContent()
{
var document = new List<string>();
string overflowingLineContent = string.Empty;
bool isOverflowMergePending = false;
var orderedDocumentBlocks = this.DocumentBlocks.OrderBy(documentBlock => documentBlock.Rank);
foreach (var documentBlock in orderedDocumentBlocks)
{
if (isOverflowMergePending)
{
documentBlock.Content[0] += overflowingLineContent;
isOverflowMergePending = false;
}
if (documentBlock.HasOverflow)
{
overflowingLineContent = documentBlock.Content.Last();
documentBlock.Content.RemoveAt(documentBlock.Content.Count - 1);
isOverflowMergePending = true;
}
document.AddRange(documentBlock.Content);
}
this.FileContent = new ObservableCollection<string>(document);
}
private async Task ProduceFilePartitionsAsync(
ChannelWriter<(long PartitionLowerBound, long PartitionUpperBound)> channelWriter,
CancellationToken cancellationToken)
{
var iterationCount = 0;
while (!this.EndOfFileReached)
{
try
{
var partition = (iterationCount++ * ViewModel.PartitionSizeInBytes,
iterationCount * ViewModel.PartitionSizeInBytes);
await channelWriter.WriteAsync(partition, cancellationToken);
}
catch (OperationCanceledException)
{}
}
channelWriter.Complete();
}
private async Task ConsumeFilePartitionsAsync(
ChannelReader<(long PartitionLowerBound, long PartitionUpperBound)> channelReader,
string filePath,
CancellationTokenSource waitingChannelWritertCancellationTokenSource)
{
await using var file = File.OpenRead(filePath);
using var reader = new StreamReader(file);
await foreach ((long PartitionLowerBound, long PartitionUpperBound) filePartitionInfo
in channelReader.ReadAllAsync())
{
if (filePartitionInfo.PartitionLowerBound >= file.Length)
{
this.EndOfFileReached = true;
waitingChannelWritertCancellationTokenSource.Cancel();
return;
}
var documentBlockLines = new List<string>();
file.Seek(filePartitionInfo.PartitionLowerBound, SeekOrigin.Begin);
var filePartition = new byte[filePartitionInfo.PartitionUpperBound - partition.PartitionLowerBound];
await file.ReadAsync(filePartition, 0, filePartition.Length);
// Extract lines
bool isLastLineComplete = ExtractLinesFromFilePartition(documentBlockLines, filePartition);
bool documentBlockHasOverflow = !isLastLineComplete && file.Position != file.Length;
var documentBlock = new DocumentBlock(partition.PartitionLowerBound, documentBlockLines, documentBlockHasOverflow);
this.DocumentBlocks.Add(documentBlock);
}
}
private bool ExtractLinesFromFilePartition(byte[] filePartition, List<string> resultDocumentBlockLines)
{
bool isLineFound = false;
for (int bufferIndex = 0; bufferIndex < filePartition.Length; bufferIndex++)
{
isLineFound = false;
int lineBeginIndex = bufferIndex;
while (bufferIndex < filePartition.Length
&& !(isLineFound = ((char)filePartition[bufferIndex]).Equals('\n')))
{
bufferIndex++;
}
int lineByteCount = bufferIndex - lineBeginIndex;
if (lineByteCount.Equals(0))
{
documentBlockLines.Add(string.Empty);
}
else
{
var lineBytes = new byte[lineByteCount];
Array.Copy(filePartition, lineBeginIndex, lineBytes, 0, lineBytes.Length);
string lineContent = Encoding.UTF8.GetString(lineBytes).Trim('\r');
resultDocumentBlockLines.Add(lineContent);
}
}
return isLineFound;
}
protected virtual void OnPropertyChanged([CallerMemberName] string propertyName = "")
=> this.PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(propertyName));
public event PropertyChangedEventHandler PropertyChanged;
private const long PartitionSizeInBytes = 100000;
private bool EndOfFileReached { get; set; }
private ConcurrentBag<DocumentBlock> DocumentBlocks { get; }
private ObservableCollection<string> fileContent;
public ObservableCollection<string> FileContent
{
get => this.fileContent;
set
{
this.fileContent = value;
OnPropertyChanged();
}
}
}
为了实现非常简单的 UI 虚拟化,此示例使用普通 ListBox
,其中所有鼠标效果都从 ListBoxItem
元素中移除,以便摆脱 ListBox
外观(强烈推荐不确定的进度指示器)。您可以增强示例以允许多行文本选择(例如,允许将文本复制到剪贴板)。
MainWindow.xaml
<Window>
<Window.DataContext>
<ViewModel />
</Window.DataContext>
<ListBox ScrollViewer.VerticalScrollBarVisibility="Visible"
ItemsSource="{Binding FileContent}"
Height="400" >
<ListBox.ItemContainerStyle>
<Style TargetType="ListBoxItem">
<Setter Property="Template">
<Setter.Value>
<ControlTemplate TargetType="ListBoxItem">
<ContentPresenter />
</ControlTemplate>
</Setter.Value>
</Setter>
</Style>
</ListBox.ItemContainerStyle>
</ListBox>
</Window>
如果您更高级,您可以简单地实现自己强大的文档查看器,例如,通过扩展 VirtualizingPanel
和使用低级文本呈现。如果您对文本搜索和突出显示感兴趣(在这种情况下,请远离 RichTextBox
(或 FlowDocument
),因为它太慢了)。
至少你有一个性能良好的文本文件读取算法,可以用来为你的 UI 实现生成数据源。
如果这个查看器不是你的主要产品,而是一个简单的开发工具来帮助你处理日志文件,我不建议实现你自己的日志文件查看器。那里有很多免费和付费应用程序。
我正在尝试编写一个 WPF 应用程序来显示(可能)大型日志文件 (50MB-2GB),以便它们更易于阅读。 我尝试将一个 5 MB 的文件和 ~75k 行加载到带有 TextBlocks 的 GridView 中,但它真的很慢。 我不需要任何编辑功能。
我遇到了 GlyphRun,但不知道如何使用它们。我想我必须用日志文件的每一行的 GlyphRun 填充 canvas 或图像。谁能告诉我该怎么做?不幸的是,关于 GlyphRun 的文档不是很有帮助。
我有这个文件读取算法来自概念验证应用程序(它也是一个日志文件 viewer/diff 查看器)。该实现需要 C# 8.0(.NET Core 3.x 或 .NET 5)。我删除了一些索引、取消等以去除噪音并展示算法的核心业务。
它的执行速度非常快,与 Visual Code 等编辑器相比效果非常好。它不能变得更快。为了保持 UI 响应,我强烈建议使用 UI 虚拟化。如果你实施UI虚拟化,那么瓶颈将是文件读取操作。您可以通过使用不同的分区大小来调整算法的性能(您可以实现一些智能分区来动态计算它们)。
算法的关键部分是
- 使用
Channel
异步实现生产者-消费者模式
- 将源文件分成 n 字节的块
- 文件分区并行处理(并发文件读取)
- 合并结果文档块和重叠行
DocumentBlock.cs
保存已处理文件分区的行的结果结构。
public readonly struct DocumentBlock
{
public DocumentBlock(long rank, IList<string> content, bool hasOverflow)
{
this.Rank = rank;
this.Content = content;
this.HasOverflow = hasOverflow;
}
public long Rank { get; }
public IList<string> Content { get; }
public bool HasOverflow { get; }
}
ViewModel.cs
入口点是 public ViewModel.ReadFileAsync
成员。
class ViewModel : INotifyPropertyChanged
{
public ViewModel() => this.DocumentBlocks = new ConcurrentBag<DocumentBlock>();
// TODO::Make reentrant
// (for example cancel running operations and
// lock/synchronize the method using a SemaphoreSlim)
public async Task ReadFileAsync(string filePath)
{
using var cancellationTokenSource = new CancellationTokenSource();
this.DocumentBlocks.Clear();
this.EndOfFileReached = false;
// Create the channel (Producer-Consumer implementation)
BoundedChannelOptions channeloptions = new BoundedChannelOptions(Environment.ProcessorCount)
{
FullMode = BoundedChannelFullMode.Wait,
AllowSynchronousContinuations = false,
SingleWriter = true
};
var channel = Channel.CreateBounded<(long PartitionLowerBound, long PartitionUpperBound)>(channeloptions);
// Create consumer threads
var tasks = new List<Task>();
for (int threadIndex = 0; threadIndex < Environment.ProcessorCount; threadIndex++)
{
Task task = Task.Run(async () => await ConsumeFilePartitionsAsync(channel.Reader, filePath, cancellationTokenSource));
tasks.Add(task);
}
// Produce document byte blocks
await ProduceFilePartitionsAsync(channel.Writer, cancellationTokenSource.Token);
await Task.WhenAll(tasks);
CreateFileContent();
this.DocumentBlocks.Clear();
}
private void CreateFileContent()
{
var document = new List<string>();
string overflowingLineContent = string.Empty;
bool isOverflowMergePending = false;
var orderedDocumentBlocks = this.DocumentBlocks.OrderBy(documentBlock => documentBlock.Rank);
foreach (var documentBlock in orderedDocumentBlocks)
{
if (isOverflowMergePending)
{
documentBlock.Content[0] += overflowingLineContent;
isOverflowMergePending = false;
}
if (documentBlock.HasOverflow)
{
overflowingLineContent = documentBlock.Content.Last();
documentBlock.Content.RemoveAt(documentBlock.Content.Count - 1);
isOverflowMergePending = true;
}
document.AddRange(documentBlock.Content);
}
this.FileContent = new ObservableCollection<string>(document);
}
private async Task ProduceFilePartitionsAsync(
ChannelWriter<(long PartitionLowerBound, long PartitionUpperBound)> channelWriter,
CancellationToken cancellationToken)
{
var iterationCount = 0;
while (!this.EndOfFileReached)
{
try
{
var partition = (iterationCount++ * ViewModel.PartitionSizeInBytes,
iterationCount * ViewModel.PartitionSizeInBytes);
await channelWriter.WriteAsync(partition, cancellationToken);
}
catch (OperationCanceledException)
{}
}
channelWriter.Complete();
}
private async Task ConsumeFilePartitionsAsync(
ChannelReader<(long PartitionLowerBound, long PartitionUpperBound)> channelReader,
string filePath,
CancellationTokenSource waitingChannelWritertCancellationTokenSource)
{
await using var file = File.OpenRead(filePath);
using var reader = new StreamReader(file);
await foreach ((long PartitionLowerBound, long PartitionUpperBound) filePartitionInfo
in channelReader.ReadAllAsync())
{
if (filePartitionInfo.PartitionLowerBound >= file.Length)
{
this.EndOfFileReached = true;
waitingChannelWritertCancellationTokenSource.Cancel();
return;
}
var documentBlockLines = new List<string>();
file.Seek(filePartitionInfo.PartitionLowerBound, SeekOrigin.Begin);
var filePartition = new byte[filePartitionInfo.PartitionUpperBound - partition.PartitionLowerBound];
await file.ReadAsync(filePartition, 0, filePartition.Length);
// Extract lines
bool isLastLineComplete = ExtractLinesFromFilePartition(documentBlockLines, filePartition);
bool documentBlockHasOverflow = !isLastLineComplete && file.Position != file.Length;
var documentBlock = new DocumentBlock(partition.PartitionLowerBound, documentBlockLines, documentBlockHasOverflow);
this.DocumentBlocks.Add(documentBlock);
}
}
private bool ExtractLinesFromFilePartition(byte[] filePartition, List<string> resultDocumentBlockLines)
{
bool isLineFound = false;
for (int bufferIndex = 0; bufferIndex < filePartition.Length; bufferIndex++)
{
isLineFound = false;
int lineBeginIndex = bufferIndex;
while (bufferIndex < filePartition.Length
&& !(isLineFound = ((char)filePartition[bufferIndex]).Equals('\n')))
{
bufferIndex++;
}
int lineByteCount = bufferIndex - lineBeginIndex;
if (lineByteCount.Equals(0))
{
documentBlockLines.Add(string.Empty);
}
else
{
var lineBytes = new byte[lineByteCount];
Array.Copy(filePartition, lineBeginIndex, lineBytes, 0, lineBytes.Length);
string lineContent = Encoding.UTF8.GetString(lineBytes).Trim('\r');
resultDocumentBlockLines.Add(lineContent);
}
}
return isLineFound;
}
protected virtual void OnPropertyChanged([CallerMemberName] string propertyName = "")
=> this.PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(propertyName));
public event PropertyChangedEventHandler PropertyChanged;
private const long PartitionSizeInBytes = 100000;
private bool EndOfFileReached { get; set; }
private ConcurrentBag<DocumentBlock> DocumentBlocks { get; }
private ObservableCollection<string> fileContent;
public ObservableCollection<string> FileContent
{
get => this.fileContent;
set
{
this.fileContent = value;
OnPropertyChanged();
}
}
}
为了实现非常简单的 UI 虚拟化,此示例使用普通 ListBox
,其中所有鼠标效果都从 ListBoxItem
元素中移除,以便摆脱 ListBox
外观(强烈推荐不确定的进度指示器)。您可以增强示例以允许多行文本选择(例如,允许将文本复制到剪贴板)。
MainWindow.xaml
<Window>
<Window.DataContext>
<ViewModel />
</Window.DataContext>
<ListBox ScrollViewer.VerticalScrollBarVisibility="Visible"
ItemsSource="{Binding FileContent}"
Height="400" >
<ListBox.ItemContainerStyle>
<Style TargetType="ListBoxItem">
<Setter Property="Template">
<Setter.Value>
<ControlTemplate TargetType="ListBoxItem">
<ContentPresenter />
</ControlTemplate>
</Setter.Value>
</Setter>
</Style>
</ListBox.ItemContainerStyle>
</ListBox>
</Window>
如果您更高级,您可以简单地实现自己强大的文档查看器,例如,通过扩展 VirtualizingPanel
和使用低级文本呈现。如果您对文本搜索和突出显示感兴趣(在这种情况下,请远离 RichTextBox
(或 FlowDocument
),因为它太慢了)。
至少你有一个性能良好的文本文件读取算法,可以用来为你的 UI 实现生成数据源。
如果这个查看器不是你的主要产品,而是一个简单的开发工具来帮助你处理日志文件,我不建议实现你自己的日志文件查看器。那里有很多免费和付费应用程序。