循环遍历 netCDF 文件名的年、月、日
Loop through year, month, day for netCDF file name
我在 R 中使用 for 循环从文件夹中读取 netCDF 文件并提取给定的经度、纬度列表的值。它看起来像工作,除了一个问题。当循环 returns 值与日期相对时,它会在 2 月 28 日之后创建 1 月 29 日到 31 日。像往常一样,我想要在 2 月 28 日或 29 日(闰年)之后的 3 月 1 日。这是我的 R 代码:
# given latitude, longitude list
sb1 <- data.frame(longitude=1:10,latitude =1:10)
# Extracting zonal or sub-basin average rainfall from netCDF file
sb1_r <- c()
date <- c()
rain_month <- c()
rain_year <- c()
for (year in 1998:1998){
for (month in 1:3){
for (day in seq_along(1:31)){
FileName <- paste('3B42_daily',year,sprintf("%02d",month),sprintf("%02d", day),'7.SUB.nc', sep='.')
if (!file.exists(FileName)){
next
} else {
File <- nc_open(FileName)
rain <- ncvar_get(File, 'r')
sb1_r[day] <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
date[day] <- paste(year,sprintf("%02d", month),sprintf("%02d", day),sep='-')
rain_month <- data.frame(date,sb1_r)
nc_close(File)
}
}
rain_year <- rbind(rain_year,rain_month)
}
}
您可以找到三个月的每日 netCDF 数据 link:
https://drive.google.com/open?id=0B8rqKaYt0VEaMWVGc1gzdXI1U28
与其尝试创建文件名,不如反其道而行之。提取文件名,并为每个文件从文件名中获取日期,并从文件中获取 sb1_r。您可以使用 data.table 包中的 rbindlist 更快地完成此操作(但不需要)。
#给定经纬度列表
sb1 <- data.frame(经度=1:10,纬度=1:10)
# Extracting zonal or sub-basin average rainfall from netCDF file
filenames = list.files(path = ".", pattern = ".nc")
rain_year = data.frame()
require(ncdf4)
for(FileName in filenames){
File <- nc_open(FileName)
# Create Date
year <- strsplit(FileName, split = '[.]')[[1]][2]
month <- strsplit(FileName, split = '[.]')[[1]][3]
day <- strsplit(FileName, split = '[.]')[[1]][4]
date = paste(year, month, day, sep = "-")
# get value
rain <- ncvar_get(File, 'r')
sb1_r <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
# update data.frame
rain_year = rbind(rain_year, data.frame(date = date, sb1_r = sb1_r))
nc_close(File)
}
# Faster using data.table package
require(data.table)
temp = rbindlist(
lapply(X = filenames, function(FileName){
year <- as.integer( strsplit(FileName, split = '[.]')[[1]][2] )
month <- as.integer( strsplit(FileName, split = '[.]')[[1]][3] )
day <- as.integer( strsplit(FileName, split = '[.]')[[1]][4] )
date = paste(year, month, day, sep = "-")
File <- nc_open(FileName)
rain <- ncvar_get(File, 'r')
sb1_r <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
return(data.frame(date = date, sb1_r = sb1_r))
})
)
请注意,R 中的上述代码不 正确,除非降雨 netcdf 文件使用的是 equi-area 网格,这种情况很少见! (对于本示例中使用的 TRMM 文件,情况并非如此)。这是处理网格化数据时的常见错误。
例如,如果您有一个规则的 lat/lon 网格,则需要考虑到向两极移动时经度维度的余弦缩减。如果您的 sub-basin 很小,则误差很小,但如果面积很大,则误差会很大。对于某些类型的网格,例如减少高斯网格,如果你的 sub-domain 恰好与经度点数的不连续变化一致,特别是对于 "non-smooth" 领域,例如降水。
我总是会使用 CDO 执行区域和 sub-basin 处理以确保正确执行计算。如果您使用 CDO,则区域平均和区域平均函数会考虑原生网格。
因此我的代码看起来像这样:
#!/bin/bash
# define the lat-lon bounds of your sub area
lat1=20
lat2=30
lon1=0
lon2=20
# merge all the daily files into one file
# do this one month at a time as some system limit number of open files
year=1998 # can make this a loop if you want multiple years
for month in `seq -f "%02g" 1 3` ; do
files=`ls 3B42_daily1998${month}*.nc`
cdo mergetime $files TRMM_${month}.nc
done
cdo mergetime $TRMM_*.nc TRMM_timeseries.nc
# now extract the subdomain
cdo sellonlatbox,$lon1,$lon2,$lat1,$lat2 TRMM_timeseries.nc TRMM_box.nc
# CORRECT area average
cdo fldmean TRMM_box.nc TRMM_box_av.nc
# zonal average
cdo zonmean TRMM_box.nc TRMM_box_zon.nc
#!/usr/bin/env Rscript
argv<-commandArgs(trailingOnly=TRUE)
if(length(argv)==2 & argv[1] <= argv[2]) {
if (is.na(strptime(sprintf("%s",argv[1]),"%Y%m%d"))) {
cat("arguments valid check error: ", argv[1], "\n")
stop()
}
if (argv[2]!=format(strptime(sprintf("%s",argv[2]),"%Y%m%d"),"%Y%m%d")) {
cat("arguments valid check error: ", argv[2], "\n")
stop()
}
} else if (length(argv)==2 & argv[1] > argv[2]) {
print(sprintf("error: %s is greater than %s",argv[1],argv[2]))
stop()
} else if (length(argv)!=2) {
script.name<-basename(strsplit(commandArgs(trailingOnly=FALSE)[4],"=")[[1]][2])
print(sprintf("Usage: %s startDate endDate",script.name))
stop()
}
filelist<-c()
for (Ymd in format(seq(
as.POSIXct(sprintf("%s",argv[1]),format="%Y%m%d"),
as.POSIXct(sprintf("%s",argv[2]),format="%Y%m%d"),
by="24 hour"),"%Y%m%d")) {
filelist<-append(filelist, sprintf("%s.%s.%s.%s.%s","3B42_daily",substr(Ymd,1,4),substr(Ymd,5,6),substr(Ymd,7,8),"7.SUB.nc"))
}
sb1_r <- c()
date <- c()
rain_month <- c()
rain_year <- c()
for (i in 1:length(filelist)) {
if (!file.exists(filelist[i])){
next
} else {
year <- as.numeric(substr(filelist[i],12,15))
month <- as.numeric(substr(filelist[i],17,18))
day <- as.numeric(substr(filelist[i],20,21))
File <- nc_open(filelist[i])
rain <- ncvar_get(File, 'r')
sb1_r[day] <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
date[day] <- paste(year,sprintf("%02d", month),sprintf("%02d", day),sep='-')
rain_month <- data.frame(date,sb1_r)
nc_close(File)
}
}
我在 R 中使用 for 循环从文件夹中读取 netCDF 文件并提取给定的经度、纬度列表的值。它看起来像工作,除了一个问题。当循环 returns 值与日期相对时,它会在 2 月 28 日之后创建 1 月 29 日到 31 日。像往常一样,我想要在 2 月 28 日或 29 日(闰年)之后的 3 月 1 日。这是我的 R 代码:
# given latitude, longitude list
sb1 <- data.frame(longitude=1:10,latitude =1:10)
# Extracting zonal or sub-basin average rainfall from netCDF file
sb1_r <- c()
date <- c()
rain_month <- c()
rain_year <- c()
for (year in 1998:1998){
for (month in 1:3){
for (day in seq_along(1:31)){
FileName <- paste('3B42_daily',year,sprintf("%02d",month),sprintf("%02d", day),'7.SUB.nc', sep='.')
if (!file.exists(FileName)){
next
} else {
File <- nc_open(FileName)
rain <- ncvar_get(File, 'r')
sb1_r[day] <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
date[day] <- paste(year,sprintf("%02d", month),sprintf("%02d", day),sep='-')
rain_month <- data.frame(date,sb1_r)
nc_close(File)
}
}
rain_year <- rbind(rain_year,rain_month)
}
}
您可以找到三个月的每日 netCDF 数据 link: https://drive.google.com/open?id=0B8rqKaYt0VEaMWVGc1gzdXI1U28
与其尝试创建文件名,不如反其道而行之。提取文件名,并为每个文件从文件名中获取日期,并从文件中获取 sb1_r。您可以使用 data.table 包中的 rbindlist 更快地完成此操作(但不需要)。
#给定经纬度列表 sb1 <- data.frame(经度=1:10,纬度=1:10)
# Extracting zonal or sub-basin average rainfall from netCDF file
filenames = list.files(path = ".", pattern = ".nc")
rain_year = data.frame()
require(ncdf4)
for(FileName in filenames){
File <- nc_open(FileName)
# Create Date
year <- strsplit(FileName, split = '[.]')[[1]][2]
month <- strsplit(FileName, split = '[.]')[[1]][3]
day <- strsplit(FileName, split = '[.]')[[1]][4]
date = paste(year, month, day, sep = "-")
# get value
rain <- ncvar_get(File, 'r')
sb1_r <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
# update data.frame
rain_year = rbind(rain_year, data.frame(date = date, sb1_r = sb1_r))
nc_close(File)
}
# Faster using data.table package
require(data.table)
temp = rbindlist(
lapply(X = filenames, function(FileName){
year <- as.integer( strsplit(FileName, split = '[.]')[[1]][2] )
month <- as.integer( strsplit(FileName, split = '[.]')[[1]][3] )
day <- as.integer( strsplit(FileName, split = '[.]')[[1]][4] )
date = paste(year, month, day, sep = "-")
File <- nc_open(FileName)
rain <- ncvar_get(File, 'r')
sb1_r <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
return(data.frame(date = date, sb1_r = sb1_r))
})
)
请注意,R 中的上述代码不 正确,除非降雨 netcdf 文件使用的是 equi-area 网格,这种情况很少见! (对于本示例中使用的 TRMM 文件,情况并非如此)。这是处理网格化数据时的常见错误。
例如,如果您有一个规则的 lat/lon 网格,则需要考虑到向两极移动时经度维度的余弦缩减。如果您的 sub-basin 很小,则误差很小,但如果面积很大,则误差会很大。对于某些类型的网格,例如减少高斯网格,如果你的 sub-domain 恰好与经度点数的不连续变化一致,特别是对于 "non-smooth" 领域,例如降水。
我总是会使用 CDO 执行区域和 sub-basin 处理以确保正确执行计算。如果您使用 CDO,则区域平均和区域平均函数会考虑原生网格。
因此我的代码看起来像这样:
#!/bin/bash
# define the lat-lon bounds of your sub area
lat1=20
lat2=30
lon1=0
lon2=20
# merge all the daily files into one file
# do this one month at a time as some system limit number of open files
year=1998 # can make this a loop if you want multiple years
for month in `seq -f "%02g" 1 3` ; do
files=`ls 3B42_daily1998${month}*.nc`
cdo mergetime $files TRMM_${month}.nc
done
cdo mergetime $TRMM_*.nc TRMM_timeseries.nc
# now extract the subdomain
cdo sellonlatbox,$lon1,$lon2,$lat1,$lat2 TRMM_timeseries.nc TRMM_box.nc
# CORRECT area average
cdo fldmean TRMM_box.nc TRMM_box_av.nc
# zonal average
cdo zonmean TRMM_box.nc TRMM_box_zon.nc
#!/usr/bin/env Rscript
argv<-commandArgs(trailingOnly=TRUE)
if(length(argv)==2 & argv[1] <= argv[2]) {
if (is.na(strptime(sprintf("%s",argv[1]),"%Y%m%d"))) {
cat("arguments valid check error: ", argv[1], "\n")
stop()
}
if (argv[2]!=format(strptime(sprintf("%s",argv[2]),"%Y%m%d"),"%Y%m%d")) {
cat("arguments valid check error: ", argv[2], "\n")
stop()
}
} else if (length(argv)==2 & argv[1] > argv[2]) {
print(sprintf("error: %s is greater than %s",argv[1],argv[2]))
stop()
} else if (length(argv)!=2) {
script.name<-basename(strsplit(commandArgs(trailingOnly=FALSE)[4],"=")[[1]][2])
print(sprintf("Usage: %s startDate endDate",script.name))
stop()
}
filelist<-c()
for (Ymd in format(seq(
as.POSIXct(sprintf("%s",argv[1]),format="%Y%m%d"),
as.POSIXct(sprintf("%s",argv[2]),format="%Y%m%d"),
by="24 hour"),"%Y%m%d")) {
filelist<-append(filelist, sprintf("%s.%s.%s.%s.%s","3B42_daily",substr(Ymd,1,4),substr(Ymd,5,6),substr(Ymd,7,8),"7.SUB.nc"))
}
sb1_r <- c()
date <- c()
rain_month <- c()
rain_year <- c()
for (i in 1:length(filelist)) {
if (!file.exists(filelist[i])){
next
} else {
year <- as.numeric(substr(filelist[i],12,15))
month <- as.numeric(substr(filelist[i],17,18))
day <- as.numeric(substr(filelist[i],20,21))
File <- nc_open(filelist[i])
rain <- ncvar_get(File, 'r')
sb1_r[day] <- mean(apply(sb1,1,function(x)rain[x[1],x[2]]),na.rm = TRUE)
date[day] <- paste(year,sprintf("%02d", month),sprintf("%02d", day),sep='-')
rain_month <- data.frame(date,sb1_r)
nc_close(File)
}
}