使用 C API 访问 Slurm 作业资源时出现取消引用错误
Dereference error when accessing Slurm job resources using C API
我正在尝试使用 C API:
获取 Slurm 集群中每个作业的内存使用信息
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
int main(int argc, char** argv)
{
int c, i, slurm_err;
job_info_msg_t *jobs;
/* Load job info from Slurm */
slurm_err = slurm_load_jobs((time_t) NULL, &jobs, SHOW_DETAIL);
printf("job_id,cluster,partition,user_id,name,job_state,mem_allocated,mem_used\n");
/* Print jobs info to the file in CSV format */
for (i = 0; i < jobs->record_count; i++)
{
printf("%d,%s,%s,%d,%s,%d,%d,%d\n",
jobs->job_array[i].job_id,
jobs->job_array[i].cluster,
jobs->job_array[i].partition,
jobs->job_array[i].user_id,
jobs->job_array[i].name,
jobs->job_array[i].job_state,
jobs->job_array[i].job_resrcs->memory_allocated[0],
jobs->job_array[i].job_resrcs->memory_used[0]
);
}
slurm_free_job_info_msg(jobs);
return 0;
}
当我编译此代码(另存为 jobres.c)时出现以下错误:
jobres.c: In function ‘main’:
jobres.c:34:54: error: dereferencing pointer to incomplete type
jobs->job_array[i].job_resrcs->memory_allocated[0],
^
jobres.c:35:54: error: dereferencing pointer to incomplete type
jobs->job_array[i].job_resrcs->memory_used[0]
^
将 ->
更改为 .
并不能解决问题并产生不同的错误:
jobres.c: In function ‘main’:
jobres.c:34:54: error: request for member ‘memory_allocated’ in something not a structure or union
jobs->job_array[i].job_resrcs.memory_allocated[0],
^
jobres.c:35:54: error: request for member ‘memory_used’ in something not a structure or union
jobs->job_array[i].job_resrcs.memory_used[0]
^
我在 https://github.com/SchedMD/slurm 的一些 Slurm 工具和插件的源代码中看到以类似方式使用的工作资源结构,但显然我一定遗漏了一些东西,因为我的代码甚至无法编译。如果您对此问题有见地的评论或回答,我将不胜感激。
缺少 header 是有用的提示。在 slurm.h
中,作业资源是一种不透明的数据类型,如第 83 行所示:
/* Define job_resources_t below
* to avoid including extraneous slurm headers */
#ifndef __job_resources_t_defined
# define __job_resources_t_defined /* Opaque data for select plugins */
typedef struct job_resources job_resources_t;
#endif
完整的定义可以在job_resources.h中找到,这是Slurm源代码的一部分,而不是API的一部分。我从 job_resources.h
复制了结构定义并将其粘贴到我的程序代码中:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
struct job_resources {
bitstr_t *core_bitmap;
bitstr_t *core_bitmap_used;
uint32_t cpu_array_cnt;
uint16_t *cpu_array_value;
uint32_t *cpu_array_reps;
uint16_t *cpus;
uint16_t *cpus_used;
uint16_t *cores_per_socket;
uint64_t *memory_allocated;
uint64_t *memory_used;
uint32_t nhosts;
bitstr_t *node_bitmap;
uint32_t node_req;
char *nodes;
uint32_t ncpus;
uint32_t *sock_core_rep_count;
uint16_t *sockets_per_node;
uint16_t *tasks_per_node;
uint8_t whole_node;
};
int main(int argc, char** argv)
{
int c, i, slurm_err;
job_info_msg_t *jobs;
/* Load job info from Slurm */
slurm_err = slurm_load_jobs((time_t) NULL, &jobs, SHOW_DETAIL);
printf("job_id,cluster,partition,user_id,name,job_state,mem_allocated,mem_used\n");
/* Print jobs info to the file in CSV format */
for (i = 0; i < jobs->record_count; i++)
{
printf("%d,%s,%s,%d,%s,%d,%d,%d\n",
jobs->job_array[i].job_id,
jobs->job_array[i].cluster,
jobs->job_array[i].partition,
jobs->job_array[i].user_id,
jobs->job_array[i].name,
jobs->job_array[i].job_state,
jobs->job_array[i].job_resrcs->memory_allocated[0],
jobs->job_array[i].job_resrcs->memory_used[0]
);
}
slurm_free_job_info_msg(jobs);
return 0;
}
现在这个程序编译没有错误,运行良好,结果打印正确。
我正在尝试使用 C API:
获取 Slurm 集群中每个作业的内存使用信息#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
int main(int argc, char** argv)
{
int c, i, slurm_err;
job_info_msg_t *jobs;
/* Load job info from Slurm */
slurm_err = slurm_load_jobs((time_t) NULL, &jobs, SHOW_DETAIL);
printf("job_id,cluster,partition,user_id,name,job_state,mem_allocated,mem_used\n");
/* Print jobs info to the file in CSV format */
for (i = 0; i < jobs->record_count; i++)
{
printf("%d,%s,%s,%d,%s,%d,%d,%d\n",
jobs->job_array[i].job_id,
jobs->job_array[i].cluster,
jobs->job_array[i].partition,
jobs->job_array[i].user_id,
jobs->job_array[i].name,
jobs->job_array[i].job_state,
jobs->job_array[i].job_resrcs->memory_allocated[0],
jobs->job_array[i].job_resrcs->memory_used[0]
);
}
slurm_free_job_info_msg(jobs);
return 0;
}
当我编译此代码(另存为 jobres.c)时出现以下错误:
jobres.c: In function ‘main’:
jobres.c:34:54: error: dereferencing pointer to incomplete type
jobs->job_array[i].job_resrcs->memory_allocated[0],
^
jobres.c:35:54: error: dereferencing pointer to incomplete type
jobs->job_array[i].job_resrcs->memory_used[0]
^
将 ->
更改为 .
并不能解决问题并产生不同的错误:
jobres.c: In function ‘main’:
jobres.c:34:54: error: request for member ‘memory_allocated’ in something not a structure or union
jobs->job_array[i].job_resrcs.memory_allocated[0],
^
jobres.c:35:54: error: request for member ‘memory_used’ in something not a structure or union
jobs->job_array[i].job_resrcs.memory_used[0]
^
我在 https://github.com/SchedMD/slurm 的一些 Slurm 工具和插件的源代码中看到以类似方式使用的工作资源结构,但显然我一定遗漏了一些东西,因为我的代码甚至无法编译。如果您对此问题有见地的评论或回答,我将不胜感激。
缺少 header 是有用的提示。在 slurm.h
中,作业资源是一种不透明的数据类型,如第 83 行所示:
/* Define job_resources_t below
* to avoid including extraneous slurm headers */
#ifndef __job_resources_t_defined
# define __job_resources_t_defined /* Opaque data for select plugins */
typedef struct job_resources job_resources_t;
#endif
完整的定义可以在job_resources.h中找到,这是Slurm源代码的一部分,而不是API的一部分。我从 job_resources.h
复制了结构定义并将其粘贴到我的程序代码中:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
struct job_resources {
bitstr_t *core_bitmap;
bitstr_t *core_bitmap_used;
uint32_t cpu_array_cnt;
uint16_t *cpu_array_value;
uint32_t *cpu_array_reps;
uint16_t *cpus;
uint16_t *cpus_used;
uint16_t *cores_per_socket;
uint64_t *memory_allocated;
uint64_t *memory_used;
uint32_t nhosts;
bitstr_t *node_bitmap;
uint32_t node_req;
char *nodes;
uint32_t ncpus;
uint32_t *sock_core_rep_count;
uint16_t *sockets_per_node;
uint16_t *tasks_per_node;
uint8_t whole_node;
};
int main(int argc, char** argv)
{
int c, i, slurm_err;
job_info_msg_t *jobs;
/* Load job info from Slurm */
slurm_err = slurm_load_jobs((time_t) NULL, &jobs, SHOW_DETAIL);
printf("job_id,cluster,partition,user_id,name,job_state,mem_allocated,mem_used\n");
/* Print jobs info to the file in CSV format */
for (i = 0; i < jobs->record_count; i++)
{
printf("%d,%s,%s,%d,%s,%d,%d,%d\n",
jobs->job_array[i].job_id,
jobs->job_array[i].cluster,
jobs->job_array[i].partition,
jobs->job_array[i].user_id,
jobs->job_array[i].name,
jobs->job_array[i].job_state,
jobs->job_array[i].job_resrcs->memory_allocated[0],
jobs->job_array[i].job_resrcs->memory_used[0]
);
}
slurm_free_job_info_msg(jobs);
return 0;
}
现在这个程序编译没有错误,运行良好,结果打印正确。