Detecting/Diagnosing 线程饥饿

Detecting/Diagnosing Thread Starvation

我正在对 IIS 应用程序进行一些 performance/scalability 测试,该应用程序在生产环境中有时似乎会变慢以爬行。我能够使用 NUnit 始终如一地重现缓慢。

CPU 和内存在测试期间或在生产中出现缓慢时不会出现峰值。我强烈怀疑应用程序正在遭受线程饥饿,因为它似乎不是 CPU、内存、I/O 或数据库访问导致瓶颈的原因。我确实看到 出现 线程饥饿的迹象;例如,NLog 的异步日志文件写入往往会出现长时间的静默,然后是带有较旧时间戳的 activity 突发(即低优先级线程正在等待线程释放以便写入)。

我可以采取哪些步骤来明确确定应用程序确实线程不足,并且(假设是这种情况)查明导致问题的系统的确切区域?

编辑

我忘了提到几乎所有代码都是同步的(这是一个遗留系统)。

根据 Sinatr 的评论,我阅读了 ThreadPool.SetMinThreads 和 TaskCreationOptions.LongRunning,包括

的答案

将 MinThreads 设置为更高的默认值对我的情况产生了巨大的影响。我创建了一个简单的后台进程来查看 ThreadPool 中的可用线程在测试过程中是否发生了显着变化 运行 并超过了 MinThreads 值(它是)。

这是我用来诊断的一些代码。这不适用于生产用途,此处显示的线程使用情况报告只会在它们最初增加时才有意义。另请注意,Timer 在结束时需要一个线程,因此也需要等待一个可用的线程。

静态变量:

    private static Timer _timer;
    private static int _lastActiveThreads;
    private static int _lastAvailableThreads;
    private static int _maxThreads;
    private static int _minThreads;

运行 启动时:

    int completionPortThreads;

    ThreadPool.GetMaxThreads(out _maxThreads, out completionPortThreads);
    ThreadPool.GetMinThreads(out _minThreads, out completionPortThreads);

    _timer = new Timer
    {
        AutoReset = true,
        Interval = 500,
    };

    _timer.Elapsed += TimerElasped;
    _timer.Start();

经过的方法:

    private static void TimerElasped(object sender, ElapsedEventArgs e)
    {
        int minWorkerThreads;
        int availWorkerThreads;
        int completionPortThreads;

        ThreadPool.GetMinThreads(out minWorkerThreads, out completionPortThreads);
        ThreadPool.GetAvailableThreads(out availWorkerThreads, out completionPortThreads);

        var activeThreads = _maxThreads - availWorkerThreads;

        if (availWorkerThreads != _lastAvailableThreads)
        {
            _lastAvailableThreads = availWorkerThreads;
            if (activeThreads > _lastActiveThreads)
            {
                _lastActiveThreads = activeThreads;
                Logger.Log($"+++++ Active Threads is now: {activeThreads}");

                if (activeThreads > _minThreads)
                {
                    var diff = activeThreads - _minThreads;
                    Logger.Log($"+++++ Active threads is now {activeThreads}, which is {diff} more than minThread value of {_minThreads}.  This may be causing delays.");
                }
            }
        }
    }

我根据以上想出了这个

using System;
using System.Threading;
using System.Timers;
using log4net;

using Timer = System.Timers.Timer;

namespace somewhere
{
    public class ThreadStatsLogger : IDisposable
    {
        private const int DEPLETION_WARN_LEVEL = 10;
        private const int HISTERESIS_LEVEL = 10;

    private const double SAMPLE_RATE_MILLISECONDS = 500;
    private bool _workerThreadWarned = false;
    private bool _ioThreadWarned = false;
    private bool _minWorkerThreadLevelWarned = false;
    private bool _minIoThreadLevelWarned = false;

    private readonly int _maxWorkerThreadLevel;
    private readonly int _maxIoThreadLevel;
    private readonly int _minWorkerThreadLevel;
    private readonly int _minWorkerThreadLevelRecovery;
    private readonly int _minIoThreadLevel;
    private readonly int _minIoThreadLevelRecovery;
    private Timer _timer;

    private static readonly ILog _logger = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);

    public ThreadStatsLogger()
    {

        _timer = new Timer
        {
            AutoReset = true,
            Interval = SAMPLE_RATE_MILLISECONDS,
        };

        _timer.Elapsed += TimerElasped;
        _timer.Start();
        ThreadPool.GetMinThreads(out _minWorkerThreadLevel, out _minIoThreadLevel);
        ThreadPool.GetMaxThreads(out _maxWorkerThreadLevel, out _maxIoThreadLevel);
        ThreadPool.GetAvailableThreads(out int workerAvailable, out int ioAvailable);

        _logger.InfoFormat("Thread statistics at startup: minimum worker:{0} io:{1}", _minWorkerThreadLevel, _minIoThreadLevel );
        _logger.InfoFormat("Thread statistics at startup: maximum worker:{0} io:{1}", _maxWorkerThreadLevel, _maxIoThreadLevel);
        _logger.InfoFormat("Thread statistics at startup: available worker:{0} io:{1}", workerAvailable, ioAvailable);

        _minWorkerThreadLevelRecovery = (_minWorkerThreadLevel * 3) / 4;
        _minIoThreadLevelRecovery = (_minIoThreadLevel * 3) / 4;
        if (_minWorkerThreadLevelRecovery == _minWorkerThreadLevel) _minWorkerThreadLevelRecovery = _minWorkerThreadLevel - 1;
        if (_minIoThreadLevelRecovery == _minIoThreadLevel) _minIoThreadLevelRecovery = _minIoThreadLevel - 1;
    }

    private void TimerElasped(object sender, ElapsedEventArgs e)
    {

        ThreadPool.GetAvailableThreads(out int availableWorkerThreads, out int availableIoThreads);

        var activeWorkerThreads = _maxWorkerThreadLevel - availableWorkerThreads;
        var activeIoThreads = _maxIoThreadLevel - availableIoThreads;

        _logger.InfoFormat("Thread statistics: active worker:{0} io:{1}", activeWorkerThreads, activeIoThreads);

        if (activeWorkerThreads > _minWorkerThreadLevel && !_minWorkerThreadLevelWarned)
        {
            _logger.InfoFormat("Thread statistics WARN active worker threads above minimum {0}:{1}", activeWorkerThreads, _minWorkerThreadLevel);
            _minWorkerThreadLevelWarned = !_minWorkerThreadLevelWarned;
        }
        if (activeWorkerThreads < _minWorkerThreadLevelRecovery && _minWorkerThreadLevelWarned)
        {
            _logger.InfoFormat("Thread statistics RECOVERY active worker threads below minimum {0}:{1}", activeWorkerThreads, _minWorkerThreadLevel);
            _minWorkerThreadLevelWarned = !_minWorkerThreadLevelWarned;
        }

        if (activeIoThreads > _minIoThreadLevel && !_minIoThreadLevelWarned)
        {
            _logger.InfoFormat("Thread statistics WARN active io threads above minimum {0}:{1}", activeIoThreads, _minIoThreadLevel);
            _minIoThreadLevelWarned = !_minIoThreadLevelWarned;
        }
        if (activeIoThreads < _minIoThreadLevelRecovery && _minIoThreadLevelWarned)
        {
            _logger.InfoFormat("Thread statistics RECOVERY active io threads below minimum {0}:{1}", activeIoThreads, _minIoThreadLevel);
            _minIoThreadLevelWarned = !_minIoThreadLevelWarned;
        }

        if (availableWorkerThreads < DEPLETION_WARN_LEVEL && !_workerThreadWarned)
        {
            _logger.InfoFormat("Thread statistics WARN available worker threads below warning level {0}:{1}", availableWorkerThreads, DEPLETION_WARN_LEVEL);
            _workerThreadWarned = !_workerThreadWarned;
        }

        if (availableWorkerThreads > (DEPLETION_WARN_LEVEL + HISTERESIS_LEVEL) && _workerThreadWarned)
        {
            _logger.InfoFormat("Thread statistics RECOVERY available worker thread recovery {0}:{1}", availableWorkerThreads, DEPLETION_WARN_LEVEL);
            _workerThreadWarned = !_workerThreadWarned;
        }

        if (availableIoThreads < DEPLETION_WARN_LEVEL && !_ioThreadWarned)
        {
            _logger.InfoFormat("Thread statistics WARN available io threads below warning level {0}:{1}", availableIoThreads, DEPLETION_WARN_LEVEL);
            _ioThreadWarned = !_ioThreadWarned;
        }

        if (availableIoThreads > (DEPLETION_WARN_LEVEL + HISTERESIS_LEVEL) && _ioThreadWarned)
        {
            _logger.InfoFormat("Thread statistics RECOVERY available io thread recovery {0}:{1}", availableIoThreads, DEPLETION_WARN_LEVEL);
            _ioThreadWarned = !_ioThreadWarned;
        }
    }

    public void Dispose()
    {
        if (_timer == null) return;
        _timer.Close();
        _timer.Dispose();
        _timer = null;
    }
}

}

What steps can I take to definitively determine that the application is indeed thread starved

在 Windows 操作系统中,进程和线程数仅受创建它们所需的资源的限制。 There is no fixed number that cannot be exceeded。线程饥饿可能会以这种方式发生,但这应该可以通过缺乏底层资源来检测,例如高 CPU 或低 RAM。

IIS 具有由 registry keys 控制的固定限制。

如果超过这些限制,请求将排队。在线程饥饿的情况下,您会看到队列长度增加。此 post 显示队列性能计数器,可以监视这些计数器以查看是否属于这种情况。