R - 如何计算 "global" 动物园对象的每月均值
R - how to calculate "global" monthly means of a zoo object
假设我有这个动物园对象:
library(zoo)
df <- structure(list(date = structure(c(0, 31, 59, 90, 120, 151, 181,
212, 243, 273, 304, 334, 365, 396, 424, 455, 485, 516, 546, 577,
608, 638, 669, 699, 730, 761, 790, 821, 851, 882, 912, 943, 974,
1004, 1035, 1065, 1096, 1127, 1155, 1186, 1216, 1247, 1277, 1308,
1339, 1369, 1400, 1430, 1461, 1492, 1520, 1551, 1581, 1612, 1642,
1673, 1704, 1734, 1765, 1795), class = "Date"), cru = c(67.6552579213225,
64.8199295882443, 67.6882136843717, 64.4932343486913, 62.0941221490892,
59.9333537425379, 54.9891320985765, 55.7801010255625, 59.3864840042859,
63.0966937088627, 64.384004285933, 67.4058778509108, 70.5033062911373,
68.2652380223481, 67.5416194703811, 64.7323894076228, 62.7184907393234,
59.8548599418338, 56.0586407469769, 56.4163171590387, 60.6882596050819,
64.2160875554875, 64.0664013470075, 64.9156283483851, 67.7770855655901,
65.9492882289913, 68.2622837899893, 64.3217664166539, 65.2166079902036,
63.9083881830706, 56.3643961426603, 59.9723098117251, 60.4400887800398,
62.8953007806521, 66.7075616102862, 69.6426603398133, 69.3881677636614,
68.445981937854, 69.0054186438084, 67.9314556865146, 64.5561457217205,
61.9860707178938, 59.2609980101026, 56.8957446808511, 61.1108985152304,
64.3730904637992, 66.0301545997245, 70.4032909842339, 70.7749272922088,
71.6810653604776, 69.300183682841, 65.3710546456452, 64.8279044849227,
60.3732435328333, 56.3447726924843, 57.3151997550896, 60.6496709015766,
64.0822746058472, 63.9682075616103, 69.4681922547069), model = c(70.8259646403441,
69.1040767391383, 64.5515271993269, 64.6499744101575, 67.712215046297,
67.1942041779122, 67.1107310531572, 64.0365506335587, 64.3861402081049,
66.9474645232039, 70.0900007709146, 70.7322071044208, 71.3681528337099,
70.6042579609609, 69.1904034259863, 66.3244341830203, 70.45013445949,
68.4745405096757, 64.6434376970369, 63.6578600688845, 62.6690122245487,
66.7726540913498, 66.9993526229524, 69.2370082486024, 72.731807414607,
70.066272418262, 69.8665286312326, 66.7517294017474, 63.2675717559056,
63.9713073835875, 66.8740144556168, 66.2393105120408, 67.9422440282643,
65.7423707102056, 67.5695235158686, 69.9511480162994, 70.60938201684,
69.6737827804075, 65.8727627084409, 69.8091812621223, 66.3834903642866,
67.7146527454309, 66.1289215196643, 65.2953892851434, 62.4094347321203,
66.2600390234607, 67.183259636812, 69.644620244991, 67.9219957744587,
66.9441969493467, 63.5216819663215, 66.4959928679606, 66.1665028019955,
66.4600239736992, 64.8322046083317, 62.6689871470412, 66.3495593398356,
66.3105935904296, 69.9372793159708, 73.3729173325656)), .Names = c("date",
"cru", "model"), row.names = 1441:1500, class = "data.frame")
idx <- as.yearmon(df$date, format='%Y-%m-%d')
df2 <- zoo(df[,-c(1)], idx)
我需要计算全球月均值 - 即该系列每个一月、每年二月、每年三月等的平均值。
最终结果将是一个包含 12 行的简单数据框,对应于月份(最好带有名称)及其平均值。
我该怎么做?
您可以使用 hydroTSM
包中的函数 monthlyfunction
:
# January mean
apply(df[c(1,13,25,37,49),2:3], 2, mean)
# cru model
# 69.21975 70.69146
out <- monthlyfunction(df, mean)
out <- as.data.frame(t(out))
out
# cru model
# Jan 69.21975 70.69146
# Feb 67.83230 69.27852
# Mar 68.35954 66.60058
# Apr 65.36998 66.80626
# May 63.88265 66.79598
# Jun 61.21118 66.76295
# Jul 56.60359 65.91786
# Aug 57.27593 64.37962
# Sep 60.45508 64.75128
# Oct 63.73269 66.40662
# Nov 65.03127 68.35588
# Dec 68.36713 70.58758
class(out)
# [1] "data.frame"
1 您可以使用 data.table
进行聚合
library(data.table)
# This turns all Jans to 1 and Decs to 12 for example
mth <- month(as.Date(df$date))
dt2 <- as.data.table(df) # turn df into data table dt
dt2[, mth := mth] # pop month into your data frame
setkey(dt2, "mth") # data tables like a key!
# syntax of data table is dt[i, j, by]
# “Take DT, subset rows using i, then calculate j grouped by by”
result <- dt2[, .(meancru = mean(cru), meanmodel = mean(model)), by = mth]
这让你:
mth meancru meanmodel
1 1 69.21975 70.69146
2 2 67.83230 69.27852
...
如果您对 data.table
不满意,可以使用 good cheat sheet here。
2 使用基数 R
使用上面第m个代码,则:
df <- cbind(df,mth)
result <- aggregate(df, list(mth), mean)
但是格式需要一些清理。
如果您需要对数据执行更多操作,那么您会欣赏 data.tables
的速度、索引和添加新变量的能力。
假设我有这个动物园对象:
library(zoo)
df <- structure(list(date = structure(c(0, 31, 59, 90, 120, 151, 181,
212, 243, 273, 304, 334, 365, 396, 424, 455, 485, 516, 546, 577,
608, 638, 669, 699, 730, 761, 790, 821, 851, 882, 912, 943, 974,
1004, 1035, 1065, 1096, 1127, 1155, 1186, 1216, 1247, 1277, 1308,
1339, 1369, 1400, 1430, 1461, 1492, 1520, 1551, 1581, 1612, 1642,
1673, 1704, 1734, 1765, 1795), class = "Date"), cru = c(67.6552579213225,
64.8199295882443, 67.6882136843717, 64.4932343486913, 62.0941221490892,
59.9333537425379, 54.9891320985765, 55.7801010255625, 59.3864840042859,
63.0966937088627, 64.384004285933, 67.4058778509108, 70.5033062911373,
68.2652380223481, 67.5416194703811, 64.7323894076228, 62.7184907393234,
59.8548599418338, 56.0586407469769, 56.4163171590387, 60.6882596050819,
64.2160875554875, 64.0664013470075, 64.9156283483851, 67.7770855655901,
65.9492882289913, 68.2622837899893, 64.3217664166539, 65.2166079902036,
63.9083881830706, 56.3643961426603, 59.9723098117251, 60.4400887800398,
62.8953007806521, 66.7075616102862, 69.6426603398133, 69.3881677636614,
68.445981937854, 69.0054186438084, 67.9314556865146, 64.5561457217205,
61.9860707178938, 59.2609980101026, 56.8957446808511, 61.1108985152304,
64.3730904637992, 66.0301545997245, 70.4032909842339, 70.7749272922088,
71.6810653604776, 69.300183682841, 65.3710546456452, 64.8279044849227,
60.3732435328333, 56.3447726924843, 57.3151997550896, 60.6496709015766,
64.0822746058472, 63.9682075616103, 69.4681922547069), model = c(70.8259646403441,
69.1040767391383, 64.5515271993269, 64.6499744101575, 67.712215046297,
67.1942041779122, 67.1107310531572, 64.0365506335587, 64.3861402081049,
66.9474645232039, 70.0900007709146, 70.7322071044208, 71.3681528337099,
70.6042579609609, 69.1904034259863, 66.3244341830203, 70.45013445949,
68.4745405096757, 64.6434376970369, 63.6578600688845, 62.6690122245487,
66.7726540913498, 66.9993526229524, 69.2370082486024, 72.731807414607,
70.066272418262, 69.8665286312326, 66.7517294017474, 63.2675717559056,
63.9713073835875, 66.8740144556168, 66.2393105120408, 67.9422440282643,
65.7423707102056, 67.5695235158686, 69.9511480162994, 70.60938201684,
69.6737827804075, 65.8727627084409, 69.8091812621223, 66.3834903642866,
67.7146527454309, 66.1289215196643, 65.2953892851434, 62.4094347321203,
66.2600390234607, 67.183259636812, 69.644620244991, 67.9219957744587,
66.9441969493467, 63.5216819663215, 66.4959928679606, 66.1665028019955,
66.4600239736992, 64.8322046083317, 62.6689871470412, 66.3495593398356,
66.3105935904296, 69.9372793159708, 73.3729173325656)), .Names = c("date",
"cru", "model"), row.names = 1441:1500, class = "data.frame")
idx <- as.yearmon(df$date, format='%Y-%m-%d')
df2 <- zoo(df[,-c(1)], idx)
我需要计算全球月均值 - 即该系列每个一月、每年二月、每年三月等的平均值。
最终结果将是一个包含 12 行的简单数据框,对应于月份(最好带有名称)及其平均值。
我该怎么做?
您可以使用 hydroTSM
包中的函数 monthlyfunction
:
# January mean
apply(df[c(1,13,25,37,49),2:3], 2, mean)
# cru model
# 69.21975 70.69146
out <- monthlyfunction(df, mean)
out <- as.data.frame(t(out))
out
# cru model
# Jan 69.21975 70.69146
# Feb 67.83230 69.27852
# Mar 68.35954 66.60058
# Apr 65.36998 66.80626
# May 63.88265 66.79598
# Jun 61.21118 66.76295
# Jul 56.60359 65.91786
# Aug 57.27593 64.37962
# Sep 60.45508 64.75128
# Oct 63.73269 66.40662
# Nov 65.03127 68.35588
# Dec 68.36713 70.58758
class(out)
# [1] "data.frame"
1 您可以使用 data.table
library(data.table)
# This turns all Jans to 1 and Decs to 12 for example
mth <- month(as.Date(df$date))
dt2 <- as.data.table(df) # turn df into data table dt
dt2[, mth := mth] # pop month into your data frame
setkey(dt2, "mth") # data tables like a key!
# syntax of data table is dt[i, j, by]
# “Take DT, subset rows using i, then calculate j grouped by by”
result <- dt2[, .(meancru = mean(cru), meanmodel = mean(model)), by = mth]
这让你:
mth meancru meanmodel
1 1 69.21975 70.69146
2 2 67.83230 69.27852
...
如果您对 data.table
不满意,可以使用 good cheat sheet here。
2 使用基数 R
使用上面第m个代码,则:
df <- cbind(df,mth)
result <- aggregate(df, list(mth), mean)
但是格式需要一些清理。
如果您需要对数据执行更多操作,那么您会欣赏 data.tables
的速度、索引和添加新变量的能力。