使用 ksh 监控扭矩集群上的职位
monitoring job titles on torque cluster using ksh
如果我使用 qstat
我可以获得当前的工作列表 运行
host username othername NameTask_JOBXXXX_G1_namesubtask -- 1 1 -- 8783: Q 00:00
host username othername NameTask2_JOBXXXX_G2_namesubtask -- 1 1 -- 8783: C 00:00
到目前为止,我可以使用 ksh 脚本跟踪状态:
#!/usr/bin/ksh
while sleep 2; do ; echo -n $(mystat | grep JOB | grep -c Q) 'Queued ' ;
echo -n $(mystat | grep JOB | grep -c ': R') 'Running ' ;
echo $(mystat | grep JOB | grep -c ': C') 'Completed ' ;
echo "$(mystat | grep C | tail -n 5 | tr -s ' ' | cut -d ' ' -f 4,11)"
echo
done
每 2 秒给我一次 运行、排队、已完成、完成时最后 5 个已完成作业的数量。
我如何扩展它以计算当前 运行 具有不同 'NameTask_' 的行数?
我想得到前面脚本的结果,但按 NameTask_ 和 G_
分组
似乎是数组的工作
typeset -A jobCnt # Associative array for job counts
typeset -A ntCnt # A. array for NameTask count
typeset -A stCnt # A. array for namesubtask count
typeset -a cjList # Indexed array for Completed job list
integer ci=0
while read host uName oName tName v w x y z stat statTime
do
((jobCnt[${stat}]++)) # Count of (current) jobs by status
if [[ ${stat} == C ]] # Completed task (time) ordered
then
cjList[$((ci++))]=${statTime}
fi
((ntCnt[${tName%%_*}]++)) # NameTask count
((stCnt[${tName##*_}]++)) # namesubtask count
done
for st in ${!jobCnt[@]} # Over 'st'atus in jobCnt array
do
echo "Number ${st} jobs: ${jobCnt[${st}]}"
done
echo "Last 5 Completed jobs:"
if (( ${#cjList[@]} <= 5 ))
then
integer istart=0 # All C jobs in array
else
((istart = ${#cjList[@]} - 5)) # Last 5 jobs in array
fi
for ((i=${istart} ; i<${#cjList[@]} ; i++))
do
echo ${cjList[${i}]}
done
echo "Job counts by NameTask: "
for n in ${!ntCnt[@]}
do
echo "${n} ${ntCnt[${n}]}"
done
echo "Job counts by NameSubTask: "
for s in ${!stCnt[@]}
do
echo "${s} ${stCnt[${s}]}"
done
如果我使用 qstat
我可以获得当前的工作列表 运行
host username othername NameTask_JOBXXXX_G1_namesubtask -- 1 1 -- 8783: Q 00:00
host username othername NameTask2_JOBXXXX_G2_namesubtask -- 1 1 -- 8783: C 00:00
到目前为止,我可以使用 ksh 脚本跟踪状态:
#!/usr/bin/ksh
while sleep 2; do ; echo -n $(mystat | grep JOB | grep -c Q) 'Queued ' ;
echo -n $(mystat | grep JOB | grep -c ': R') 'Running ' ;
echo $(mystat | grep JOB | grep -c ': C') 'Completed ' ;
echo "$(mystat | grep C | tail -n 5 | tr -s ' ' | cut -d ' ' -f 4,11)"
echo
done
每 2 秒给我一次 运行、排队、已完成、完成时最后 5 个已完成作业的数量。
我如何扩展它以计算当前 运行 具有不同 'NameTask_' 的行数? 我想得到前面脚本的结果,但按 NameTask_ 和 G_
分组似乎是数组的工作
typeset -A jobCnt # Associative array for job counts
typeset -A ntCnt # A. array for NameTask count
typeset -A stCnt # A. array for namesubtask count
typeset -a cjList # Indexed array for Completed job list
integer ci=0
while read host uName oName tName v w x y z stat statTime
do
((jobCnt[${stat}]++)) # Count of (current) jobs by status
if [[ ${stat} == C ]] # Completed task (time) ordered
then
cjList[$((ci++))]=${statTime}
fi
((ntCnt[${tName%%_*}]++)) # NameTask count
((stCnt[${tName##*_}]++)) # namesubtask count
done
for st in ${!jobCnt[@]} # Over 'st'atus in jobCnt array
do
echo "Number ${st} jobs: ${jobCnt[${st}]}"
done
echo "Last 5 Completed jobs:"
if (( ${#cjList[@]} <= 5 ))
then
integer istart=0 # All C jobs in array
else
((istart = ${#cjList[@]} - 5)) # Last 5 jobs in array
fi
for ((i=${istart} ; i<${#cjList[@]} ; i++))
do
echo ${cjList[${i}]}
done
echo "Job counts by NameTask: "
for n in ${!ntCnt[@]}
do
echo "${n} ${ntCnt[${n}]}"
done
echo "Job counts by NameSubTask: "
for s in ${!stCnt[@]}
do
echo "${s} ${stCnt[${s}]}"
done