C# WebClient - 下载文件后 LOH 大幅增加

C# WebClient - Large increase of LOH after downloading files

我有一个 class 负责在下载管理器中下载文件。这个class负责下载文件并写入给定路径

要下载的文件大小通常在 1 到 5 MB 之间,但也可能更大。我正在使用 WebClient class 的实例从 Internet 获取文件。

public class DownloadItem
{
    #region Events
    public delegate void DownloadItemDownloadCompletedEventHandler(object sender, DownloadCompletedEventArgs args);

    public event DownloadItemDownloadCompletedEventHandler DownloadItemDownloadCompleted;

    protected virtual void OnDownloadItemDownloadCompleted(DownloadCompletedEventArgs e)
    {
        DownloadItemDownloadCompleted?.Invoke(this, e);
    }

    public delegate void DownloadItemDownloadProgressChangedEventHandler(object sender, DownloadProgressChangedEventArgs args);

    public event DownloadItemDownloadProgressChangedEventHandler DownloadItemDownloadProgressChanged;

    protected virtual void OnDownloadItemDownloadProgressChanged(DownloadProgressChangedEventArgs e)
    {
        DownloadItemDownloadProgressChanged?.Invoke(this, e);
    }
    #endregion

    #region Fields
    private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
    private WebClient _client;
    #endregion

    #region Properties
    public PlaylistItem Item { get; }
    public string SavePath { get; }
    public bool Overwrite { get; }
    #endregion

    public DownloadItem(PlaylistItem item, string savePath, bool overwrite = false)
    {
        Item = item;
        SavePath = savePath;
        Overwrite = overwrite;
    }

    public void StartDownload()
    {
        if (File.Exists(SavePath) && !Overwrite)
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true));
            return;
        }

        OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(1));
        Item.RetreiveDownloadUrl();

        if (string.IsNullOrEmpty(Item.DownloadUrl))
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, new InvalidOperationException("Could not retreive download url")));
            return;
        }

        // GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
        using (_client = new WebClient())
        {
            _client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");

            try
            {
                _client.DownloadDataCompleted +=
                    (sender, args) =>
                    {
                        Task.Run(() =>
                        {
                            DownloadCompleted(args);
                        });
                    };
                _client.DownloadProgressChanged += (sender, args) => OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(args.ProgressPercentage));
                _client.DownloadDataAsync(new Uri(Item.DownloadUrl));
            }
            catch (Exception ex)
            {
                Logger.Warn(ex, "Error downloading track {0}", Item.VideoId);
                OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
            }
        }
    }

    private void DownloadCompleted(DownloadDataCompletedEventArgs args)
    {
        // _client = null;

        // GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
        // GC.Collect(2, GCCollectionMode.Forced);

        if (args.Cancelled)
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, args.Error));
            return;
        }

        try
        {
            File.WriteAllBytes(SavePath, args.Result);

            using (var file = TagLib.File.Create(SavePath))
            {
                file.Save();
            }

            try
            {
                MusicFormatConverter.M4AToMp3(SavePath);
            }
            catch (Exception)
            {
                // ignored
            }

            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(false));
        }
        catch (Exception ex)
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
            Logger.Error(ex, "Error writing track file for track {0}", Item.VideoId);
        }
    }

    public void StopDownload()
    {
        _client?.CancelAsync();
    }

    public override int GetHashCode()
    {
        return Item.GetHashCode();
    }

    public override bool Equals(object obj)
    {
        var item = obj as DownloadItem;

        return Item.Equals(item?.Item);
    }
}

与下载项目的文件大小相比,每次下载都会导致内存增加非常大。如果我下载一个大小约为 3 MB 的文件,内存使用量将增加约 8 MB。

如您所见,下载过程中产生了很多 LOH,这些 LOH 在下载后并未清除。即使强制 GC 或设置 GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; 也无助于防止此内存泄漏。

比较快照 1 和 2 可以看出内存量是由字节数组产生的,这可能是下载结果。

进行多次下载可以看出此内存泄漏有多严重。

在我看来,这是由 WebClient 实例以任何方式引起的。但是我无法真正确定到底是什么导致了这个问题。 我是否强制 GC 甚至都没有关系。这里的屏幕显示没有强制 gc:

是什么导致了这种过热,我该如何解决?这是一个主要错误,想象 100 次或更多次下载该过程会 运行 内存不足。

编辑


按照建议,我注释掉了负责设置标签和将 M4A 转换为 MP3 的部分。然而,转换器只是 FFMPEG 的调用,所以它不应该是内存泄漏:

class MusicFormatConverter
{
    public static void M4AToMp3(string filePath, bool deleteOriginal = true)
    {
        if(string.IsNullOrEmpty(filePath) || !filePath.EndsWith(".m4a"))
            throw new ArgumentException(nameof(filePath));

        var toolPath = Path.Combine("tools", "ffmpeg.exe");

        var convertedFilePath = filePath.Replace(".m4a", ".mp3");
        File.Delete(convertedFilePath);

        var process = new Process
        {
            StartInfo =
            {
                FileName = toolPath,
#if !DEBUG
                WindowStyle = ProcessWindowStyle.Hidden,
#endif
                Arguments = $"-i \"{filePath}\" -acodec libmp3lame -ab 128k \"{convertedFilePath}\""
            }
        };

        process.Start();
        process.WaitForExit();

        if(!File.Exists(convertedFilePath))
            throw new InvalidOperationException("File was not converted successfully!");

        if(deleteOriginal)
            File.Delete(filePath);
    }
}

DownloadCompleted() 方法现在看起来像这样:

private void DownloadCompleted(DownloadDataCompletedEventArgs args)
{
    // _client = null;

    // GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
    // GC.Collect(2, GCCollectionMode.Forced);

    if (args.Cancelled)
    {
        OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, args.Error));
        return;
    }

    try
    {
        File.WriteAllBytes(SavePath, args.Result);

        /*
        using (var file = TagLib.File.Create(SavePath))
        {
            file.Save();
        }

        try
        {
            MusicFormatConverter.M4AToMp3(SavePath);
        }
        catch (Exception)
        {
            // ignore
        }
        */

        OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(false));
    }
    catch (Exception ex)
    {
        OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
        Logger.Error(ex, "Error writing track file for track {0}", Item.VideoId);
    }
}

下载7个项目后的结果: 看来这不是内存泄漏。

作为补充,我也提交了 DownloadManager class,因为它正在处理整个下载操作。也许这可能是问题的根源。

public class DownloadManager
{
    #region Fields
    private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
    private readonly Queue<DownloadItem> _queue;
    private readonly List<DownloadItem> _activeDownloads;
    private bool _active;
    private Thread _thread;
    #endregion

    #region Construction
    public DownloadManager()
    {
        _queue = new Queue<DownloadItem>();
        _activeDownloads = new List<DownloadItem>();
    }
    #endregion

    #region Methods
    public void AddToQueue(DownloadItem item)
    {
        _queue.Enqueue(item);

        StartManager();
    }

    public void Abort()
    {
        _thread?.Abort();

        _queue.Clear();
        _activeDownloads.Clear();
    }

    private void StartManager()
    {
        if(_active) return;

        _active = true;

        _thread = new Thread(() =>
        {
            try
            {
                while (_queue.Count > 0 && _queue.Peek() != null)
                {
                    DownloadItem();

                    while (_activeDownloads.Count >= Properties.Settings.Default.ParallelDownloads)
                    {
                        Thread.Sleep(10);
                    }
                }

                _active = false;
            }
            catch (ThreadInterruptedException)
            {
                // ignored
            }
        });
        _thread.Start();
    }

    private void DownloadItem()
    {
        if (_activeDownloads.Count >= Properties.Settings.Default.ParallelDownloads) return;

        DownloadItem item;
        try
        {
            item = _queue.Dequeue();
        }
        catch
        {
            return;
        }

        if (item != null)
        {
            item.DownloadItemDownloadCompleted += (sender, args) =>
            {
                if(args.Error != null)
                    Logger.Error(args.Error, "Error downloading track {0}", ((DownloadItem)sender).Item.VideoId);

                _activeDownloads.Remove((DownloadItem) sender);
            };

            _activeDownloads.Add(item);
            Task.Run(() => item.StartDownload());
        }
    }
    #endregion

最后,经过数十次分析和内存检查,问题现已解决。

正如@SimonMourier 已经指出的,这个问题与 UploadFileDownloadDataDownloadStringDownloadFile 方法的设计有关。查看它们的后端,您可以看到它们都在 WebClient class 中使用私有 DownloadBits 方法,并带有此签名:

private byte[] DownloadBits(WebRequest request, Stream writeStream, CompletionDelegate completionDelegate, AsyncOperation asyncOp)

关于 return 类型,很明显为什么行为就像我发现的那样: 使用上述方法时,内容保存在字节数组中。因此,如果文件大小 > 85,000 字节,则不建议使用这些方法,因为这会导致在达到内存限制之前填充 LOH。如果文件很小,但随着文件大小的增加,LOH 也会成倍增长,这可能并不重要。

作为补充,我的最终解决方案:

public class DownloadItem : DownloadManagerItem
{
    #region Fields

    private static readonly Logger Logger = LogManager.GetCurrentClassLogger();

    private WebClient _webClient;

    #endregion

    #region Properties

    public string SavePath { get; }
    public bool Overwrite { get; }
    public DownloadFormat DownloadFormat { get; }

    #endregion

    public DownloadItem(PlaylistItem item, string savePath, DownloadFormat downloadFormat, bool overwrite = false)
        : base(item)
    {
        SavePath = savePath;
        Overwrite = overwrite;
        DownloadFormat = downloadFormat;
    }

    public override void StartDownload()
    {
        if (File.Exists(SavePath) && !Overwrite)
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true));
            return;
        }

        OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(1));
        Item.RetreiveDownloadUrl();

        if (string.IsNullOrEmpty(Item.DownloadUrl))
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true,
                new InvalidOperationException("Could not retreive download url")));
            return;
        }

        using (_webClient = new WebClient())
        {
            _webClient.Headers.Add("user-agent",
                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");

            try
            {
                _webClient.OpenReadCompleted += WebClientOnOpenReadCompleted;

                _webClient.OpenReadAsync(new Uri(Item.DownloadUrl));
            }
            catch (Exception ex)
            {
                Logger.Warn(ex, "Error downloading track {0}", Item.VideoId);
                OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
            }
        }
    }

    private void WebClientOnOpenReadCompleted(object sender, OpenReadCompletedEventArgs openReadCompletedEventArgs)
    {
        _webClient.Dispose();

        if (openReadCompletedEventArgs.Cancelled)
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, openReadCompletedEventArgs.Error));
            return;
        }

        if (!Overwrite && File.Exists(SavePath))
            return;

        var totalLength = 0;
        try
        {
            totalLength = int.Parse(((WebClient)sender).ResponseHeaders["Content-Length"]);
        }
        catch (Exception)
        {
            // ignored
        }

        try
        {
            long processed = 0;
            var tmpPath = Path.GetTempFileName();

            using (var stream = openReadCompletedEventArgs.Result)
            using (var fs = File.Create(tmpPath))
            {
                var buffer = new byte[16 * 1024];
                int read;

                while ((read = stream.Read(buffer, 0, buffer.Length)) > 0)
                {
                    fs.Write(buffer, 0, read);

                    processed += read;
                    OnDownloadItemDownloadProgressChanged(new DownloadProgressChangedEventArgs(processed, totalLength));
                }
            }

            File.Move(tmpPath, SavePath);

            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(false));
        }
        catch (Exception ex)
        {
            OnDownloadItemDownloadCompleted(new DownloadCompletedEventArgs(true, ex));
        }
    }

    public override void StopDownload()
    {
        _webClient?.CancelAsync();
    }

    public override void Dispose()
    {
        _webClient?.Dispose();
    }

    public override int GetHashCode()
    {
        return Item.GetHashCode();
    }

    public override bool Equals(object obj)
    {
        var item = obj as DownloadItem;

        return Item.Equals(item?.Item);
    }
}

不过谢谢你的帮助!