Detecting/Diagnosing 线程饥饿
Detecting/Diagnosing Thread Starvation
我正在对 IIS 应用程序进行一些 performance/scalability 测试,该应用程序在生产环境中有时似乎会变慢以爬行。我能够使用 NUnit 始终如一地重现缓慢。
CPU 和内存在测试期间或在生产中出现缓慢时不会出现峰值。我强烈怀疑应用程序正在遭受线程饥饿,因为它似乎不是 CPU、内存、I/O 或数据库访问导致瓶颈的原因。我确实看到 出现 线程饥饿的迹象;例如,NLog 的异步日志文件写入往往会出现长时间的静默,然后是带有较旧时间戳的 activity 突发(即低优先级线程正在等待线程释放以便写入)。
我可以采取哪些步骤来明确确定应用程序确实线程不足,并且(假设是这种情况)查明导致问题的系统的确切区域?
编辑
我忘了提到几乎所有代码都是同步的(这是一个遗留系统)。
根据 Sinatr 的评论,我阅读了 ThreadPool.SetMinThreads 和 TaskCreationOptions.LongRunning,包括
的答案
将 MinThreads 设置为更高的默认值对我的情况产生了巨大的影响。我创建了一个简单的后台进程来查看 ThreadPool 中的可用线程在测试过程中是否发生了显着变化 运行 并超过了 MinThreads 值(它是)。
这是我用来诊断的一些代码。这不适用于生产用途,此处显示的线程使用情况报告只会在它们最初增加时才有意义。另请注意,Timer 在结束时需要一个线程,因此也需要等待一个可用的线程。
静态变量:
private static Timer _timer;
private static int _lastActiveThreads;
private static int _lastAvailableThreads;
private static int _maxThreads;
private static int _minThreads;
运行 启动时:
int completionPortThreads;
ThreadPool.GetMaxThreads(out _maxThreads, out completionPortThreads);
ThreadPool.GetMinThreads(out _minThreads, out completionPortThreads);
_timer = new Timer
{
AutoReset = true,
Interval = 500,
};
_timer.Elapsed += TimerElasped;
_timer.Start();
经过的方法:
private static void TimerElasped(object sender, ElapsedEventArgs e)
{
int minWorkerThreads;
int availWorkerThreads;
int completionPortThreads;
ThreadPool.GetMinThreads(out minWorkerThreads, out completionPortThreads);
ThreadPool.GetAvailableThreads(out availWorkerThreads, out completionPortThreads);
var activeThreads = _maxThreads - availWorkerThreads;
if (availWorkerThreads != _lastAvailableThreads)
{
_lastAvailableThreads = availWorkerThreads;
if (activeThreads > _lastActiveThreads)
{
_lastActiveThreads = activeThreads;
Logger.Log($"+++++ Active Threads is now: {activeThreads}");
if (activeThreads > _minThreads)
{
var diff = activeThreads - _minThreads;
Logger.Log($"+++++ Active threads is now {activeThreads}, which is {diff} more than minThread value of {_minThreads}. This may be causing delays.");
}
}
}
}
我根据以上想出了这个
using System;
using System.Threading;
using System.Timers;
using log4net;
using Timer = System.Timers.Timer;
namespace somewhere
{
public class ThreadStatsLogger : IDisposable
{
private const int DEPLETION_WARN_LEVEL = 10;
private const int HISTERESIS_LEVEL = 10;
private const double SAMPLE_RATE_MILLISECONDS = 500;
private bool _workerThreadWarned = false;
private bool _ioThreadWarned = false;
private bool _minWorkerThreadLevelWarned = false;
private bool _minIoThreadLevelWarned = false;
private readonly int _maxWorkerThreadLevel;
private readonly int _maxIoThreadLevel;
private readonly int _minWorkerThreadLevel;
private readonly int _minWorkerThreadLevelRecovery;
private readonly int _minIoThreadLevel;
private readonly int _minIoThreadLevelRecovery;
private Timer _timer;
private static readonly ILog _logger = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
public ThreadStatsLogger()
{
_timer = new Timer
{
AutoReset = true,
Interval = SAMPLE_RATE_MILLISECONDS,
};
_timer.Elapsed += TimerElasped;
_timer.Start();
ThreadPool.GetMinThreads(out _minWorkerThreadLevel, out _minIoThreadLevel);
ThreadPool.GetMaxThreads(out _maxWorkerThreadLevel, out _maxIoThreadLevel);
ThreadPool.GetAvailableThreads(out int workerAvailable, out int ioAvailable);
_logger.InfoFormat("Thread statistics at startup: minimum worker:{0} io:{1}", _minWorkerThreadLevel, _minIoThreadLevel );
_logger.InfoFormat("Thread statistics at startup: maximum worker:{0} io:{1}", _maxWorkerThreadLevel, _maxIoThreadLevel);
_logger.InfoFormat("Thread statistics at startup: available worker:{0} io:{1}", workerAvailable, ioAvailable);
_minWorkerThreadLevelRecovery = (_minWorkerThreadLevel * 3) / 4;
_minIoThreadLevelRecovery = (_minIoThreadLevel * 3) / 4;
if (_minWorkerThreadLevelRecovery == _minWorkerThreadLevel) _minWorkerThreadLevelRecovery = _minWorkerThreadLevel - 1;
if (_minIoThreadLevelRecovery == _minIoThreadLevel) _minIoThreadLevelRecovery = _minIoThreadLevel - 1;
}
private void TimerElasped(object sender, ElapsedEventArgs e)
{
ThreadPool.GetAvailableThreads(out int availableWorkerThreads, out int availableIoThreads);
var activeWorkerThreads = _maxWorkerThreadLevel - availableWorkerThreads;
var activeIoThreads = _maxIoThreadLevel - availableIoThreads;
_logger.InfoFormat("Thread statistics: active worker:{0} io:{1}", activeWorkerThreads, activeIoThreads);
if (activeWorkerThreads > _minWorkerThreadLevel && !_minWorkerThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics WARN active worker threads above minimum {0}:{1}", activeWorkerThreads, _minWorkerThreadLevel);
_minWorkerThreadLevelWarned = !_minWorkerThreadLevelWarned;
}
if (activeWorkerThreads < _minWorkerThreadLevelRecovery && _minWorkerThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY active worker threads below minimum {0}:{1}", activeWorkerThreads, _minWorkerThreadLevel);
_minWorkerThreadLevelWarned = !_minWorkerThreadLevelWarned;
}
if (activeIoThreads > _minIoThreadLevel && !_minIoThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics WARN active io threads above minimum {0}:{1}", activeIoThreads, _minIoThreadLevel);
_minIoThreadLevelWarned = !_minIoThreadLevelWarned;
}
if (activeIoThreads < _minIoThreadLevelRecovery && _minIoThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY active io threads below minimum {0}:{1}", activeIoThreads, _minIoThreadLevel);
_minIoThreadLevelWarned = !_minIoThreadLevelWarned;
}
if (availableWorkerThreads < DEPLETION_WARN_LEVEL && !_workerThreadWarned)
{
_logger.InfoFormat("Thread statistics WARN available worker threads below warning level {0}:{1}", availableWorkerThreads, DEPLETION_WARN_LEVEL);
_workerThreadWarned = !_workerThreadWarned;
}
if (availableWorkerThreads > (DEPLETION_WARN_LEVEL + HISTERESIS_LEVEL) && _workerThreadWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY available worker thread recovery {0}:{1}", availableWorkerThreads, DEPLETION_WARN_LEVEL);
_workerThreadWarned = !_workerThreadWarned;
}
if (availableIoThreads < DEPLETION_WARN_LEVEL && !_ioThreadWarned)
{
_logger.InfoFormat("Thread statistics WARN available io threads below warning level {0}:{1}", availableIoThreads, DEPLETION_WARN_LEVEL);
_ioThreadWarned = !_ioThreadWarned;
}
if (availableIoThreads > (DEPLETION_WARN_LEVEL + HISTERESIS_LEVEL) && _ioThreadWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY available io thread recovery {0}:{1}", availableIoThreads, DEPLETION_WARN_LEVEL);
_ioThreadWarned = !_ioThreadWarned;
}
}
public void Dispose()
{
if (_timer == null) return;
_timer.Close();
_timer.Dispose();
_timer = null;
}
}
}
What steps can I take to definitively determine that the application is indeed thread starved
在 Windows 操作系统中,进程和线程数仅受创建它们所需的资源的限制。 There is no fixed number that cannot be exceeded。线程饥饿可能会以这种方式发生,但这应该可以通过缺乏底层资源来检测,例如高 CPU 或低 RAM。
IIS 具有由 registry keys 控制的固定限制。
如果超过这些限制,请求将排队。在线程饥饿的情况下,您会看到队列长度增加。此 post 显示队列性能计数器,可以监视这些计数器以查看是否属于这种情况。
我正在对 IIS 应用程序进行一些 performance/scalability 测试,该应用程序在生产环境中有时似乎会变慢以爬行。我能够使用 NUnit 始终如一地重现缓慢。
CPU 和内存在测试期间或在生产中出现缓慢时不会出现峰值。我强烈怀疑应用程序正在遭受线程饥饿,因为它似乎不是 CPU、内存、I/O 或数据库访问导致瓶颈的原因。我确实看到 出现 线程饥饿的迹象;例如,NLog 的异步日志文件写入往往会出现长时间的静默,然后是带有较旧时间戳的 activity 突发(即低优先级线程正在等待线程释放以便写入)。
我可以采取哪些步骤来明确确定应用程序确实线程不足,并且(假设是这种情况)查明导致问题的系统的确切区域?
编辑
我忘了提到几乎所有代码都是同步的(这是一个遗留系统)。
根据 Sinatr 的评论,我阅读了 ThreadPool.SetMinThreads 和 TaskCreationOptions.LongRunning,包括
将 MinThreads 设置为更高的默认值对我的情况产生了巨大的影响。我创建了一个简单的后台进程来查看 ThreadPool 中的可用线程在测试过程中是否发生了显着变化 运行 并超过了 MinThreads 值(它是)。
这是我用来诊断的一些代码。这不适用于生产用途,此处显示的线程使用情况报告只会在它们最初增加时才有意义。另请注意,Timer 在结束时需要一个线程,因此也需要等待一个可用的线程。
静态变量:
private static Timer _timer;
private static int _lastActiveThreads;
private static int _lastAvailableThreads;
private static int _maxThreads;
private static int _minThreads;
运行 启动时:
int completionPortThreads;
ThreadPool.GetMaxThreads(out _maxThreads, out completionPortThreads);
ThreadPool.GetMinThreads(out _minThreads, out completionPortThreads);
_timer = new Timer
{
AutoReset = true,
Interval = 500,
};
_timer.Elapsed += TimerElasped;
_timer.Start();
经过的方法:
private static void TimerElasped(object sender, ElapsedEventArgs e)
{
int minWorkerThreads;
int availWorkerThreads;
int completionPortThreads;
ThreadPool.GetMinThreads(out minWorkerThreads, out completionPortThreads);
ThreadPool.GetAvailableThreads(out availWorkerThreads, out completionPortThreads);
var activeThreads = _maxThreads - availWorkerThreads;
if (availWorkerThreads != _lastAvailableThreads)
{
_lastAvailableThreads = availWorkerThreads;
if (activeThreads > _lastActiveThreads)
{
_lastActiveThreads = activeThreads;
Logger.Log($"+++++ Active Threads is now: {activeThreads}");
if (activeThreads > _minThreads)
{
var diff = activeThreads - _minThreads;
Logger.Log($"+++++ Active threads is now {activeThreads}, which is {diff} more than minThread value of {_minThreads}. This may be causing delays.");
}
}
}
}
我根据以上想出了这个
using System;
using System.Threading;
using System.Timers;
using log4net;
using Timer = System.Timers.Timer;
namespace somewhere
{
public class ThreadStatsLogger : IDisposable
{
private const int DEPLETION_WARN_LEVEL = 10;
private const int HISTERESIS_LEVEL = 10;
private const double SAMPLE_RATE_MILLISECONDS = 500;
private bool _workerThreadWarned = false;
private bool _ioThreadWarned = false;
private bool _minWorkerThreadLevelWarned = false;
private bool _minIoThreadLevelWarned = false;
private readonly int _maxWorkerThreadLevel;
private readonly int _maxIoThreadLevel;
private readonly int _minWorkerThreadLevel;
private readonly int _minWorkerThreadLevelRecovery;
private readonly int _minIoThreadLevel;
private readonly int _minIoThreadLevelRecovery;
private Timer _timer;
private static readonly ILog _logger = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
public ThreadStatsLogger()
{
_timer = new Timer
{
AutoReset = true,
Interval = SAMPLE_RATE_MILLISECONDS,
};
_timer.Elapsed += TimerElasped;
_timer.Start();
ThreadPool.GetMinThreads(out _minWorkerThreadLevel, out _minIoThreadLevel);
ThreadPool.GetMaxThreads(out _maxWorkerThreadLevel, out _maxIoThreadLevel);
ThreadPool.GetAvailableThreads(out int workerAvailable, out int ioAvailable);
_logger.InfoFormat("Thread statistics at startup: minimum worker:{0} io:{1}", _minWorkerThreadLevel, _minIoThreadLevel );
_logger.InfoFormat("Thread statistics at startup: maximum worker:{0} io:{1}", _maxWorkerThreadLevel, _maxIoThreadLevel);
_logger.InfoFormat("Thread statistics at startup: available worker:{0} io:{1}", workerAvailable, ioAvailable);
_minWorkerThreadLevelRecovery = (_minWorkerThreadLevel * 3) / 4;
_minIoThreadLevelRecovery = (_minIoThreadLevel * 3) / 4;
if (_minWorkerThreadLevelRecovery == _minWorkerThreadLevel) _minWorkerThreadLevelRecovery = _minWorkerThreadLevel - 1;
if (_minIoThreadLevelRecovery == _minIoThreadLevel) _minIoThreadLevelRecovery = _minIoThreadLevel - 1;
}
private void TimerElasped(object sender, ElapsedEventArgs e)
{
ThreadPool.GetAvailableThreads(out int availableWorkerThreads, out int availableIoThreads);
var activeWorkerThreads = _maxWorkerThreadLevel - availableWorkerThreads;
var activeIoThreads = _maxIoThreadLevel - availableIoThreads;
_logger.InfoFormat("Thread statistics: active worker:{0} io:{1}", activeWorkerThreads, activeIoThreads);
if (activeWorkerThreads > _minWorkerThreadLevel && !_minWorkerThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics WARN active worker threads above minimum {0}:{1}", activeWorkerThreads, _minWorkerThreadLevel);
_minWorkerThreadLevelWarned = !_minWorkerThreadLevelWarned;
}
if (activeWorkerThreads < _minWorkerThreadLevelRecovery && _minWorkerThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY active worker threads below minimum {0}:{1}", activeWorkerThreads, _minWorkerThreadLevel);
_minWorkerThreadLevelWarned = !_minWorkerThreadLevelWarned;
}
if (activeIoThreads > _minIoThreadLevel && !_minIoThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics WARN active io threads above minimum {0}:{1}", activeIoThreads, _minIoThreadLevel);
_minIoThreadLevelWarned = !_minIoThreadLevelWarned;
}
if (activeIoThreads < _minIoThreadLevelRecovery && _minIoThreadLevelWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY active io threads below minimum {0}:{1}", activeIoThreads, _minIoThreadLevel);
_minIoThreadLevelWarned = !_minIoThreadLevelWarned;
}
if (availableWorkerThreads < DEPLETION_WARN_LEVEL && !_workerThreadWarned)
{
_logger.InfoFormat("Thread statistics WARN available worker threads below warning level {0}:{1}", availableWorkerThreads, DEPLETION_WARN_LEVEL);
_workerThreadWarned = !_workerThreadWarned;
}
if (availableWorkerThreads > (DEPLETION_WARN_LEVEL + HISTERESIS_LEVEL) && _workerThreadWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY available worker thread recovery {0}:{1}", availableWorkerThreads, DEPLETION_WARN_LEVEL);
_workerThreadWarned = !_workerThreadWarned;
}
if (availableIoThreads < DEPLETION_WARN_LEVEL && !_ioThreadWarned)
{
_logger.InfoFormat("Thread statistics WARN available io threads below warning level {0}:{1}", availableIoThreads, DEPLETION_WARN_LEVEL);
_ioThreadWarned = !_ioThreadWarned;
}
if (availableIoThreads > (DEPLETION_WARN_LEVEL + HISTERESIS_LEVEL) && _ioThreadWarned)
{
_logger.InfoFormat("Thread statistics RECOVERY available io thread recovery {0}:{1}", availableIoThreads, DEPLETION_WARN_LEVEL);
_ioThreadWarned = !_ioThreadWarned;
}
}
public void Dispose()
{
if (_timer == null) return;
_timer.Close();
_timer.Dispose();
_timer = null;
}
}
}
What steps can I take to definitively determine that the application is indeed thread starved
在 Windows 操作系统中,进程和线程数仅受创建它们所需的资源的限制。 There is no fixed number that cannot be exceeded。线程饥饿可能会以这种方式发生,但这应该可以通过缺乏底层资源来检测,例如高 CPU 或低 RAM。
IIS 具有由 registry keys 控制的固定限制。
如果超过这些限制,请求将排队。在线程饥饿的情况下,您会看到队列长度增加。此 post 显示队列性能计数器,可以监视这些计数器以查看是否属于这种情况。