R - 如何计算 "global" 动物园对象的每月均值

R - how to calculate "global" monthly means of a zoo object

假设我有这个动物园对象:

library(zoo)

df <- structure(list(date = structure(c(0, 31, 59, 90, 120, 151, 181, 
212, 243, 273, 304, 334, 365, 396, 424, 455, 485, 516, 546, 577, 
608, 638, 669, 699, 730, 761, 790, 821, 851, 882, 912, 943, 974, 
1004, 1035, 1065, 1096, 1127, 1155, 1186, 1216, 1247, 1277, 1308, 
1339, 1369, 1400, 1430, 1461, 1492, 1520, 1551, 1581, 1612, 1642, 
1673, 1704, 1734, 1765, 1795), class = "Date"), cru = c(67.6552579213225, 
64.8199295882443, 67.6882136843717, 64.4932343486913, 62.0941221490892, 
59.9333537425379, 54.9891320985765, 55.7801010255625, 59.3864840042859, 
63.0966937088627, 64.384004285933, 67.4058778509108, 70.5033062911373, 
68.2652380223481, 67.5416194703811, 64.7323894076228, 62.7184907393234, 
59.8548599418338, 56.0586407469769, 56.4163171590387, 60.6882596050819, 
64.2160875554875, 64.0664013470075, 64.9156283483851, 67.7770855655901, 
65.9492882289913, 68.2622837899893, 64.3217664166539, 65.2166079902036, 
63.9083881830706, 56.3643961426603, 59.9723098117251, 60.4400887800398, 
62.8953007806521, 66.7075616102862, 69.6426603398133, 69.3881677636614, 
68.445981937854, 69.0054186438084, 67.9314556865146, 64.5561457217205, 
61.9860707178938, 59.2609980101026, 56.8957446808511, 61.1108985152304, 
64.3730904637992, 66.0301545997245, 70.4032909842339, 70.7749272922088, 
71.6810653604776, 69.300183682841, 65.3710546456452, 64.8279044849227, 
60.3732435328333, 56.3447726924843, 57.3151997550896, 60.6496709015766, 
64.0822746058472, 63.9682075616103, 69.4681922547069), model = c(70.8259646403441, 
69.1040767391383, 64.5515271993269, 64.6499744101575, 67.712215046297, 
67.1942041779122, 67.1107310531572, 64.0365506335587, 64.3861402081049, 
66.9474645232039, 70.0900007709146, 70.7322071044208, 71.3681528337099, 
70.6042579609609, 69.1904034259863, 66.3244341830203, 70.45013445949, 
68.4745405096757, 64.6434376970369, 63.6578600688845, 62.6690122245487, 
66.7726540913498, 66.9993526229524, 69.2370082486024, 72.731807414607, 
70.066272418262, 69.8665286312326, 66.7517294017474, 63.2675717559056, 
63.9713073835875, 66.8740144556168, 66.2393105120408, 67.9422440282643, 
65.7423707102056, 67.5695235158686, 69.9511480162994, 70.60938201684, 
69.6737827804075, 65.8727627084409, 69.8091812621223, 66.3834903642866, 
67.7146527454309, 66.1289215196643, 65.2953892851434, 62.4094347321203, 
66.2600390234607, 67.183259636812, 69.644620244991, 67.9219957744587, 
66.9441969493467, 63.5216819663215, 66.4959928679606, 66.1665028019955, 
66.4600239736992, 64.8322046083317, 62.6689871470412, 66.3495593398356, 
66.3105935904296, 69.9372793159708, 73.3729173325656)), .Names = c("date", 
"cru", "model"), row.names = 1441:1500, class = "data.frame")

idx <- as.yearmon(df$date, format='%Y-%m-%d')
df2 <- zoo(df[,-c(1)], idx)

我需要计算全球月均值 - 即该系列每个一月、每年二月、每年三月等的平均值。

最终结果将是一个包含 12 行的简单数据框,对应于月份(最好带有名称)及其平均值。

我该怎么做?

您可以使用 hydroTSM 包中的函数 monthlyfunction

# January mean
apply(df[c(1,13,25,37,49),2:3], 2, mean)
#      cru    model 
# 69.21975 70.69146 

out <- monthlyfunction(df, mean)
out <- as.data.frame(t(out))
out
#          cru    model
# Jan 69.21975 70.69146
# Feb 67.83230 69.27852
# Mar 68.35954 66.60058
# Apr 65.36998 66.80626
# May 63.88265 66.79598
# Jun 61.21118 66.76295
# Jul 56.60359 65.91786
# Aug 57.27593 64.37962
# Sep 60.45508 64.75128
# Oct 63.73269 66.40662
# Nov 65.03127 68.35588
# Dec 68.36713 70.58758

class(out)
# [1] "data.frame"

1 您可以使用 data.table

进行聚合
library(data.table)

# This turns all Jans to 1 and Decs to 12 for example
mth <- month(as.Date(df$date))


dt2 <- as.data.table(df) # turn df into data table dt 
dt2[, mth := mth]        # pop month into your data frame
setkey(dt2, "mth")       # data tables like a key!

# syntax of data table is dt[i, j, by] 
# “Take DT, subset rows using i, then calculate j grouped by by”

result <- dt2[, .(meancru = mean(cru), meanmodel = mean(model)), by = mth]

这让你:

  mth   meancru meanmodel
1   1   69.21975    70.69146
2   2   67.83230    69.27852
...

如果您对 data.table 不满意,可以使用 good cheat sheet here

2 使用基数 R

使用上面第m个代码,则:

df <- cbind(df,mth)
result <- aggregate(df, list(mth), mean)

但是格式需要一些清理。

如果您需要对数据执行更多操作,那么您会欣赏 data.tables 的速度、索引和添加新变量的能力。