在 C 中使用 clock()、time()、clock_gettimes() 和 rdtsc() 内部返回混淆值对进程进行计时
Timing a process in C using clock(), time(), clock_gettimes() and the rdtsc() intrinsic returning confusing values
我正在计时一些 C 代码并比较 times()
clock()
和 clock_gettime()
之间的输出,并使用 rdtsc
内在函数来计算时钟数周期过去了。
注意到时钟总是给我一个比 what time() returns 大 0.02 秒的值(在对用户 + 系统求和之后)。
当我在程序开头 运行 时钟时,它已经显示 ~2k CLOCKS_PER_SECOND 而不是 0,如下面的输出所示。
OS 是 Ubuntu 20.04LTS
处理器速度为 3.50 GHz
示例输出
前两行打印在程序开始处,最后两行打印在程序末尾。
clock() returns 2593 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 8616 clocks-per sec (0.01 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 2448 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 8403 clocks-per sec (0.01 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 2541 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 5915366 clocks-per sec (5.92 secs)
times() yields: user CPU: 5.49; system CPU: 0.41
在此比较中添加了 rdtsc 和 clock_gettimes()
clock() returns 2341 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
resolution: 0.000000001
clockTtime: 143071.541191700
clock() returns 11560076 clocks-per sec (11.56 secs)
times() yields: user CPU: 10.77; system CPU: 0.78
resolution: 0.000000001
clockTtime: 143083.561227466
RTDSC COUNTER: 41973477274 CPU cycles
clock() returns 2325 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
resolution: 0.000000001
clockTtime: 143570.023404324
clock() returns 12039250 clocks-per sec (12.04 secs)
times() yields: user CPU: 11.00; system CPU: 1.03
resolution: 0.000000001
clockTtime: 143583.562080061
RTDSC COUNTER: 47277160370 CPU cycles
要点 计时偏差了 .02 - .01 秒,尽管我希望 clock() 返回的时间具有最低值,因为它是在 times() 或 clock_gettimes() 之前测量的.
对于上面的输出之一
时钟 returns:11.56 秒
次 returns:11.55 秒
clock_gettime() returns: 12.020035766
rdtsc returns 41973477274 周期,对于配备 3.5GHzz 处理器的计算机 = 11.9924220783 秒,并且 rdtsc 是首先和最后测量的,因此它应该具有最高值,因为它包括 clock()、time() 和clock_gettime() 也调用。
相关代码:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <sys/time.h>
#include <sys/times.h>
#include <unistd.h>
#include <x86intrin.h>
void timeAlgorithm(const char* msg) {
// Three ways to get system process times - times(), clock(), clock_gettime()
struct tms t;
struct timespec tp;
static struct timespec res;
clock_t clockTime;
static long clockTicks = 0;
// Fetch clock ticks on first call
if (clockTicks == 0) {
clockTicks = sysconf(_SC_CLK_TCK);
if (clockTicks == -1) {
perror("Error getting sysconf(_SC_CLK_TCK) value program will now exit");
return exit(EXIT_FAILURE);
}
}
clockTime = clock();
if (clockTime == -1) {
perror("Error getting process clock time using clock(), Program will now exit");
return exit(EXIT_FAILURE);
}
printf("\t clock() returns %ld clocks-per sec (%.2f secs)\n",
(long)clockTime, (double) clockTime / CLOCKS_PER_SEC);
if (times(&t) == -1) {
perror("The time call failed, this program will now exit.");
return exit(EXIT_FAILURE);
}
printf("\t times() yields: user CPU: %.2f; system CPU: %.2f\n",
(double) t.tms_utime / clockTicks,
(double) t.tms_stime / clockTicks);
if (!res.tv_sec) {
if (clock_getres(CLOCK_MONOTONIC, &res) == -1) {
perror("clock_getres() call failed, this program will now exit");
return exit(EXIT_FAILURE);
}
}
if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1) {
perror("clocl_gettime() call failed, this program will now exit.");
return exit(EXIT_FAILURE);
}
printf("\tresolution: %10jd.%09ld\n",
(intmax_t)res.tv_sec, res.tv_nsec);
printf("\tclockTtime: %10jd.%09ld\n",
(intmax_t)tp.tv_sec, tp.tv_nsec);
}
int main(int argc, char* argv[]) {
printf("CLOCKS_PER_SEC=%ld\tsysconf(_SC_CLK_TCK)=%ld\n\n",
(long)CLOCKS_PER_SEC, sysconf(_SC_CLK_TCK));
uint64_t start = __rdtsc();
timeAlgorithm("At program start");
// 1 less than 4 gigs because 2^32 - 1 is the max value for a uint32_t and 2^32 = 4gb
// Not 2 or 4 gigs right now so I can debug faster
uint32_t TWOGIGS = (uint32_t)(2147483648 / 1) - 1;
// Set seed for random
srandom(time(NULL));
uint32_t TWOGIGSOFLONGS = (uint32_t)(TWOGIGS / sizeof(long));
long* unsortedData = malloc(sizeof(long) * TWOGIGSOFLONGS);
for (uint32_t i = 0; i < TWOGIGSOFLONGS; ++i) {
unsortedData[i] = random();
//printf("%d\t%ld\n", i, unsortedData[i]);
}
timeAlgorithm("At program end");
uint64_t end = __rdtsc();
printf("RTDSC COUNTER: %lu CPU cycles\n", end - start);
return EXIT_SUCCESS;
这种差异在实践中应该无关紧要,我可以选择使用其中一个函数并继续前进,但我很好奇是否有人知道为什么所有这些值都以这种奇怪的方式不同。
如果 clock() 的值低于 times() 低于 clock_gettimes() 低于 __rdtsc() 这将是有意义的,因为这是顺序这些时间是经过测量的,但事实并非如此,而且令人困惑。
如果其他人看到了这个,我看到了这篇论文,我用它来计时我的代码 https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
我正在计时一些 C 代码并比较 times()
clock()
和 clock_gettime()
之间的输出,并使用 rdtsc
内在函数来计算时钟数周期过去了。
注意到时钟总是给我一个比 what time() returns 大 0.02 秒的值(在对用户 + 系统求和之后)。
当我在程序开头 运行 时钟时,它已经显示 ~2k CLOCKS_PER_SECOND 而不是 0,如下面的输出所示。
OS 是 Ubuntu 20.04LTS
处理器速度为 3.50 GHz
示例输出 前两行打印在程序开始处,最后两行打印在程序末尾。
clock() returns 2593 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 8616 clocks-per sec (0.01 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 2448 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 8403 clocks-per sec (0.01 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 2541 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
clock() returns 5915366 clocks-per sec (5.92 secs)
times() yields: user CPU: 5.49; system CPU: 0.41
在此比较中添加了 rdtsc 和 clock_gettimes()
clock() returns 2341 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
resolution: 0.000000001
clockTtime: 143071.541191700
clock() returns 11560076 clocks-per sec (11.56 secs)
times() yields: user CPU: 10.77; system CPU: 0.78
resolution: 0.000000001
clockTtime: 143083.561227466
RTDSC COUNTER: 41973477274 CPU cycles
clock() returns 2325 clocks-per sec (0.00 secs)
times() yields: user CPU: 0.00; system CPU: 0.00
resolution: 0.000000001
clockTtime: 143570.023404324
clock() returns 12039250 clocks-per sec (12.04 secs)
times() yields: user CPU: 11.00; system CPU: 1.03
resolution: 0.000000001
clockTtime: 143583.562080061
RTDSC COUNTER: 47277160370 CPU cycles
要点 计时偏差了 .02 - .01 秒,尽管我希望 clock() 返回的时间具有最低值,因为它是在 times() 或 clock_gettimes() 之前测量的.
对于上面的输出之一 时钟 returns:11.56 秒 次 returns:11.55 秒 clock_gettime() returns: 12.020035766 rdtsc returns 41973477274 周期,对于配备 3.5GHzz 处理器的计算机 = 11.9924220783 秒,并且 rdtsc 是首先和最后测量的,因此它应该具有最高值,因为它包括 clock()、time() 和clock_gettime() 也调用。
相关代码:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <sys/time.h>
#include <sys/times.h>
#include <unistd.h>
#include <x86intrin.h>
void timeAlgorithm(const char* msg) {
// Three ways to get system process times - times(), clock(), clock_gettime()
struct tms t;
struct timespec tp;
static struct timespec res;
clock_t clockTime;
static long clockTicks = 0;
// Fetch clock ticks on first call
if (clockTicks == 0) {
clockTicks = sysconf(_SC_CLK_TCK);
if (clockTicks == -1) {
perror("Error getting sysconf(_SC_CLK_TCK) value program will now exit");
return exit(EXIT_FAILURE);
}
}
clockTime = clock();
if (clockTime == -1) {
perror("Error getting process clock time using clock(), Program will now exit");
return exit(EXIT_FAILURE);
}
printf("\t clock() returns %ld clocks-per sec (%.2f secs)\n",
(long)clockTime, (double) clockTime / CLOCKS_PER_SEC);
if (times(&t) == -1) {
perror("The time call failed, this program will now exit.");
return exit(EXIT_FAILURE);
}
printf("\t times() yields: user CPU: %.2f; system CPU: %.2f\n",
(double) t.tms_utime / clockTicks,
(double) t.tms_stime / clockTicks);
if (!res.tv_sec) {
if (clock_getres(CLOCK_MONOTONIC, &res) == -1) {
perror("clock_getres() call failed, this program will now exit");
return exit(EXIT_FAILURE);
}
}
if (clock_gettime(CLOCK_MONOTONIC, &tp) == -1) {
perror("clocl_gettime() call failed, this program will now exit.");
return exit(EXIT_FAILURE);
}
printf("\tresolution: %10jd.%09ld\n",
(intmax_t)res.tv_sec, res.tv_nsec);
printf("\tclockTtime: %10jd.%09ld\n",
(intmax_t)tp.tv_sec, tp.tv_nsec);
}
int main(int argc, char* argv[]) {
printf("CLOCKS_PER_SEC=%ld\tsysconf(_SC_CLK_TCK)=%ld\n\n",
(long)CLOCKS_PER_SEC, sysconf(_SC_CLK_TCK));
uint64_t start = __rdtsc();
timeAlgorithm("At program start");
// 1 less than 4 gigs because 2^32 - 1 is the max value for a uint32_t and 2^32 = 4gb
// Not 2 or 4 gigs right now so I can debug faster
uint32_t TWOGIGS = (uint32_t)(2147483648 / 1) - 1;
// Set seed for random
srandom(time(NULL));
uint32_t TWOGIGSOFLONGS = (uint32_t)(TWOGIGS / sizeof(long));
long* unsortedData = malloc(sizeof(long) * TWOGIGSOFLONGS);
for (uint32_t i = 0; i < TWOGIGSOFLONGS; ++i) {
unsortedData[i] = random();
//printf("%d\t%ld\n", i, unsortedData[i]);
}
timeAlgorithm("At program end");
uint64_t end = __rdtsc();
printf("RTDSC COUNTER: %lu CPU cycles\n", end - start);
return EXIT_SUCCESS;
这种差异在实践中应该无关紧要,我可以选择使用其中一个函数并继续前进,但我很好奇是否有人知道为什么所有这些值都以这种奇怪的方式不同。
如果 clock() 的值低于 times() 低于 clock_gettimes() 低于 __rdtsc() 这将是有意义的,因为这是顺序这些时间是经过测量的,但事实并非如此,而且令人困惑。
如果其他人看到了这个,我看到了这篇论文,我用它来计时我的代码 https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf