计算数据帧的多个子集的斜率时的时间不匹配
time mismatch in calculating the slope of multiple subsets of a data frame
我正在尝试根据年份和国家对我的数据进行子集化,并计算每个数据的回归系数。
我的数据结构的一个子集是:
pdata <- structure(list(movie_odid = c(10100L, 10100L, 520100L, 520100L,
650100L, 650100L, 10100L, 10100L, 520100L, 780100L, 780100L,
950100L, 950100L, 540100L, 540100L, 780100L, 780100L, 880100L,
880100L, 450100L, 450100L, 540100L, 540100L, 640100L, 640100L,
800100L, 800100L, 420100L, 420100L, 450100L, 450100L, 490100L,
490100L, 640100L, 640100L, 430100L, 430100L, 490100L, 490100L,
590100L, 590100L, 1620100L, 1620100L, 390100L, 390100L, 8810100L,
8810100L, 9480100L, 9480100L, 570100L, 570100L, 590100L, 590100L
), chart_date = structure(c(5L, 6L, 3L, 4L, 1L, 2L, 7L, 8L, 7L,
11L, 12L, 9L, 10L, 17L, 18L, 13L, 14L, 15L, 16L, 23L, 24L, 19L,
20L, 25L, 26L, 21L, 22L, 29L, 30L, 27L, 28L, 31L, 32L, 27L, 28L,
37L, 38L, 33L, 34L, 35L, 36L, 39L, 40L, 41L, 42L, 47L, 48L, 45L,
46L, 43L, 44L, 39L, 40L), .Label = c("1997-05-23", "1997-05-30",
"1997-07-04", "1997-07-11", "1997-12-19", "1997-12-26", "1998-01-02",
"1998-01-09", "1998-06-26", "1998-07-03", "1998-07-24", "1998-07-31",
"1999-02-05", "1999-02-12", "1999-06-04", "1999-06-11", "1999-11-19",
"1999-11-26", "2000-01-07", "2000-01-14", "2000-05-26", "2000-06-02",
"2000-11-17", "2000-11-24", "2000-12-22", "2000-12-29", "2001-01-05",
"2001-01-12", "2001-05-18", "2001-05-25", "2001-11-02", "2001-11-09",
"2002-01-04", "2002-01-11", "2002-04-19", "2002-04-26", "2002-11-15",
"2002-11-22", "2003-01-03", "2003-01-10", "2003-05-09", "2003-05-16",
"2003-05-23", "2003-05-30", "2003-09-12", "2003-09-19", "2003-11-07",
"2003-11-14"), class = "factor"), chart_year = c(1997L, 1997L,
1997L, 1997L, 1997L, 1997L, 1998L, 1998L, 1998L, 1998L, 1998L,
1998L, 1998L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 2000L,
2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L), revenue = c(52969336L,
71183357L, 76457208L, 43593212L, 81172327L, 45111185L, 45012810L,
37568867L, 48261L, 49760360L, 36612617L, 1962627L, 1441774L,
23093123L, 65927993L, 5107876L, 4771193L, 3000000L, 82216507L,
84977355L, 59922105L, 8431650L, 7296370L, 78711571L, 40769776L,
83347490L, 37133438L, 56498192L, 63650772L, 2968580L, 788895L,
76599345L, 57025088L, 28499878L, 22837762L, 106131568L, 61909948L,
4502006L, 3272808L, 822068L, 1078673L, 3843873L, 2101748L, 42508303L,
121361422L, 10163670L, 11628760L, 29944555L, 14018616L, 100066590L,
49010220L, 3536766L, 3321470L), theaters = c(2674L, 2711L, 3020L,
3020L, 3281L, 3282L, 2727L, 2746L, 58L, 2453L, 2540L, 214L, 214L,
3236L, 3236L, 1027L, 1140L, 0L, 3312L, 3127L, 3134L, 2752L, 2326L,
2774L, 2929L, 3653L, 3653L, 3587L, 3623L, 2594L, 912L, 3237L,
3269L, 2948L, 3048L, 3682L, 3682L, 1425L, 1313L, 108L, 141L,
1808L, 1180L, 3603L, 3603L, 576L, 1177L, 3282L, 3289L, 3483L,
3492L, 1194L, 1212L), running_time = c(194L, 194L, 98L, 98L,
134L, 134L, 194L, 194L, 98L, 169L, 169L, 220L, 220L, 92L, 92L,
169L, 169L, 95L, 95L, 105L, 105L, 92L, 92L, 143L, 143L, 126L,
126L, 90L, 90L, 105L, 105L, 92L, 92L, 143L, 143L, 161L, 161L,
92L, 92L, 95L, 95L, 133L, 133L, 138L, 138L, 135L, 135L, 0L, 0L,
102L, 102L, 95L, 95L), ifUS = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-53L))
我的代码是:
coefLM <- function(x) {
coef(lm(log(revenue) ~ running_time, data = x))[2]
}
spl <- with(pdata, split(pdata, list(chart_year = chart_year, ifUS = ifUS)))
out <- unique(pdata[, c("chart_year", "ifUS")])
out <- transform(out, slope = sapply(spl, coefLM))
out
但是,结果中存在时间不匹配。例如,对于“1999.0.running_time”,chart_year 是“2012”。能否请您指导我可能的原因是什么?
在您对 out <- transform(...)
的调用中,chart_year
和 ifUS
的顺序与 spl
列表不匹配。
spl <- with(pdata, split(pdata, list(chart_year = chart_year, ifUS = ifUS)))
out <- unique(pdata[, c("chart_year", "ifUS")])
out
# chart_year ifUS
# 1 1997 1
# 7 1998 1
# 14 1999 1
# 20 2000 1
# 28 2001 1
# 36 2002 0
# 38 2002 1
# 42 2003 0
# 44 2003 1
nrow(out)
# [1] 9
但是当你尝试 transform
它时,
out <- transform(out, slope = sapply(spl, coefLM))
# Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
# 0 (non-NA) cases
我怀疑你没有得到这个错误是因为你有一个不同的(更大的)spl
。 然而,如果你看out
和names(spl)
,即使长度相同,也似乎不在一条直线上。 (我将在一个框架中展示只是为了展示
nrow(out)
# [1] 9
length(spl)
# [1] 14
sapply(spl, nrow)
# 1997.0 1998.0 1999.0 2000.0 2001.0 2002.0 2003.0 1997.1 1998.1 1999.1 2000.1 2001.1 2002.1 2003.1
# 0 0 0 0 0 2 6 6 7 6 8 8 4 6
即使我们从 spl
中过滤掉 0 行帧(确实 给我们长度 9),spl
和 out
不一样:
names(Filter(c, sapply(spl, nrow)))
# [1] "2002.0" "2003.0" "1997.1" "1998.1" "1999.1" "2000.1" "2001.1" "2002.1" "2003.1"
do.call(paste, c(out, sep = "."))
# [1] "1997.1" "1998.1" "1999.1" "2000.1" "2001.1" "2002.0" "2002.1" "2003.0" "2003.1"
(不要关注我使用的代码,而是关注我展示的内容。)
(老实说,你应该很高兴你发现了这个问题。否则,你很可能已经识别出具有显着性的模型,正确的,但分配给了错误的组。损坏的结果。)
如何解决?这是一个建议,转向 dplyr
方法:
library(dplyr)
pdata %>%
group_by(chart_year, ifUS) %>%
summarize(coef = coefLM(cur_data())) %>%
ungroup()
# `summarise()` regrouping output by 'chart_year' (override with `.groups` argument)
# # A tibble: 9 x 3
# chart_year ifUS coef
# <int> <int> <dbl>
# 1 1997 1 0.000602
# 2 1998 1 0.0288
# 3 1999 1 -0.0217
# 4 2000 1 0.0299
# 5 2001 1 -0.0125
# 6 2002 0 NA
# 7 2002 1 -0.468
# 8 2003 0 -0.00962
# 9 2003 1 0.0479
使用这种方法的一个优点是所使用的数据和产生的系数始终与相同的 categories/factors.
相关联
快速 演练,虽然 dplyr
上更详细的教程会带来更多学习点:
%>%
只是一个“管道运算符”,它有助于直观地分解正在发生的事情。
这两个在功能上是相同的:
pdata %>% somefunc(.)
somefunc(pdata)
它的优势在于更长的管道:
pdata %>% somefunc(.) %>% anotherfunc(., arg = 7) %>% finalfunc(., n = 1)
finalfunc(anotherfunc(somefunc(pdata), arg = 7), n = 1)
group_by
确保所有data-creating/changing操作一次只对一组数据进行操作。 (注意:“分组”的东西会一直粘在数据上,直到用 ungroup()
主动删除,并且一些计算根据数据大小而有所不同。我一知道就养成总是 ungroup
的习惯不需要它,这样我以后就不会不小心计算错了。)
mutate
(此处未使用)添加或替换框架中的列,保持相同的行数; summarize
通常将一组减少到一行(尽管 dplyr
的较新版本应该能够汇总到多于 1 行......关键是它对形状进行了重大更改的数据)。对于 summarize
,所有其他列都被删除(因为在汇总到 1 行结果时不能保留正常的 n
行列)。
不需要您学习的备选方案 dplyr
:拆分单个名称可以更轻松地重新连接到其余数据。可以使用 paste
之类的,尽管我更喜欢 interaction
函数的传达意图(声明性)流程(...调用 paste
)。
pdata$groupname <- interaction(pdata$chart_year, pdata$ifUS)
head(pdata)
# movie_odid chart_date chart_year revenue theaters running_time ifUS groupname
# 1 10100 1997-12-19 1997 52969336 2674 194 1 1997.1
# 2 10100 1997-12-26 1997 71183357 2711 194 1 1997.1
# 3 520100 1997-07-04 1997 76457208 3020 98 1 1997.1
# 4 520100 1997-07-11 1997 43593212 3020 98 1 1997.1
# 5 650100 1997-05-23 1997 81172327 3281 134 1 1997.1
# 6 650100 1997-05-30 1997 45111185 3282 134 1 1997.1
这并没有做太多,但我们将拆分这个单一字段(与您的 2 字段拆分的效果相同)和 运行 您的函数。
我将使用它的表亲函数 by
而不是 split
,它做同样的事情并且 运行 在每一帧上都有一个函数。 if by
的输出实际上只是一个 list
,即使它看起来非常不同。
out <- by(pdata, pdata$groupname, FUN = coefLM)
out
# pdata$groupname: 1997.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1998.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1999.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2000.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2001.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2002.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2003.0
# [1] -0.009621244
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1997.1
# [1] 0.0006017306
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1998.1
# [1] 0.02882223
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1999.1
# [1] -0.02168886
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2000.1
# [1] 0.02991097
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2001.1
# [1] -0.01250767
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2002.1
# [1] -0.4683953
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2003.1
# [1] 0.04785696
我正在尝试根据年份和国家对我的数据进行子集化,并计算每个数据的回归系数。
我的数据结构的一个子集是:
pdata <- structure(list(movie_odid = c(10100L, 10100L, 520100L, 520100L,
650100L, 650100L, 10100L, 10100L, 520100L, 780100L, 780100L,
950100L, 950100L, 540100L, 540100L, 780100L, 780100L, 880100L,
880100L, 450100L, 450100L, 540100L, 540100L, 640100L, 640100L,
800100L, 800100L, 420100L, 420100L, 450100L, 450100L, 490100L,
490100L, 640100L, 640100L, 430100L, 430100L, 490100L, 490100L,
590100L, 590100L, 1620100L, 1620100L, 390100L, 390100L, 8810100L,
8810100L, 9480100L, 9480100L, 570100L, 570100L, 590100L, 590100L
), chart_date = structure(c(5L, 6L, 3L, 4L, 1L, 2L, 7L, 8L, 7L,
11L, 12L, 9L, 10L, 17L, 18L, 13L, 14L, 15L, 16L, 23L, 24L, 19L,
20L, 25L, 26L, 21L, 22L, 29L, 30L, 27L, 28L, 31L, 32L, 27L, 28L,
37L, 38L, 33L, 34L, 35L, 36L, 39L, 40L, 41L, 42L, 47L, 48L, 45L,
46L, 43L, 44L, 39L, 40L), .Label = c("1997-05-23", "1997-05-30",
"1997-07-04", "1997-07-11", "1997-12-19", "1997-12-26", "1998-01-02",
"1998-01-09", "1998-06-26", "1998-07-03", "1998-07-24", "1998-07-31",
"1999-02-05", "1999-02-12", "1999-06-04", "1999-06-11", "1999-11-19",
"1999-11-26", "2000-01-07", "2000-01-14", "2000-05-26", "2000-06-02",
"2000-11-17", "2000-11-24", "2000-12-22", "2000-12-29", "2001-01-05",
"2001-01-12", "2001-05-18", "2001-05-25", "2001-11-02", "2001-11-09",
"2002-01-04", "2002-01-11", "2002-04-19", "2002-04-26", "2002-11-15",
"2002-11-22", "2003-01-03", "2003-01-10", "2003-05-09", "2003-05-16",
"2003-05-23", "2003-05-30", "2003-09-12", "2003-09-19", "2003-11-07",
"2003-11-14"), class = "factor"), chart_year = c(1997L, 1997L,
1997L, 1997L, 1997L, 1997L, 1998L, 1998L, 1998L, 1998L, 1998L,
1998L, 1998L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 2000L,
2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2000L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L), revenue = c(52969336L,
71183357L, 76457208L, 43593212L, 81172327L, 45111185L, 45012810L,
37568867L, 48261L, 49760360L, 36612617L, 1962627L, 1441774L,
23093123L, 65927993L, 5107876L, 4771193L, 3000000L, 82216507L,
84977355L, 59922105L, 8431650L, 7296370L, 78711571L, 40769776L,
83347490L, 37133438L, 56498192L, 63650772L, 2968580L, 788895L,
76599345L, 57025088L, 28499878L, 22837762L, 106131568L, 61909948L,
4502006L, 3272808L, 822068L, 1078673L, 3843873L, 2101748L, 42508303L,
121361422L, 10163670L, 11628760L, 29944555L, 14018616L, 100066590L,
49010220L, 3536766L, 3321470L), theaters = c(2674L, 2711L, 3020L,
3020L, 3281L, 3282L, 2727L, 2746L, 58L, 2453L, 2540L, 214L, 214L,
3236L, 3236L, 1027L, 1140L, 0L, 3312L, 3127L, 3134L, 2752L, 2326L,
2774L, 2929L, 3653L, 3653L, 3587L, 3623L, 2594L, 912L, 3237L,
3269L, 2948L, 3048L, 3682L, 3682L, 1425L, 1313L, 108L, 141L,
1808L, 1180L, 3603L, 3603L, 576L, 1177L, 3282L, 3289L, 3483L,
3492L, 1194L, 1212L), running_time = c(194L, 194L, 98L, 98L,
134L, 134L, 194L, 194L, 98L, 169L, 169L, 220L, 220L, 92L, 92L,
169L, 169L, 95L, 95L, 105L, 105L, 92L, 92L, 143L, 143L, 126L,
126L, 90L, 90L, 105L, 105L, 92L, 92L, 143L, 143L, 161L, 161L,
92L, 92L, 95L, 95L, 133L, 133L, 138L, 138L, 135L, 135L, 0L, 0L,
102L, 102L, 95L, 95L), ifUS = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-53L))
我的代码是:
coefLM <- function(x) {
coef(lm(log(revenue) ~ running_time, data = x))[2]
}
spl <- with(pdata, split(pdata, list(chart_year = chart_year, ifUS = ifUS)))
out <- unique(pdata[, c("chart_year", "ifUS")])
out <- transform(out, slope = sapply(spl, coefLM))
out
但是,结果中存在时间不匹配。例如,对于“1999.0.running_time”,chart_year 是“2012”。能否请您指导我可能的原因是什么?
在您对 out <- transform(...)
的调用中,chart_year
和 ifUS
的顺序与 spl
列表不匹配。
spl <- with(pdata, split(pdata, list(chart_year = chart_year, ifUS = ifUS)))
out <- unique(pdata[, c("chart_year", "ifUS")])
out
# chart_year ifUS
# 1 1997 1
# 7 1998 1
# 14 1999 1
# 20 2000 1
# 28 2001 1
# 36 2002 0
# 38 2002 1
# 42 2003 0
# 44 2003 1
nrow(out)
# [1] 9
但是当你尝试 transform
它时,
out <- transform(out, slope = sapply(spl, coefLM))
# Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
# 0 (non-NA) cases
我怀疑你没有得到这个错误是因为你有一个不同的(更大的)spl
。 然而,如果你看out
和names(spl)
,即使长度相同,也似乎不在一条直线上。 (我将在一个框架中展示只是为了展示
nrow(out)
# [1] 9
length(spl)
# [1] 14
sapply(spl, nrow)
# 1997.0 1998.0 1999.0 2000.0 2001.0 2002.0 2003.0 1997.1 1998.1 1999.1 2000.1 2001.1 2002.1 2003.1
# 0 0 0 0 0 2 6 6 7 6 8 8 4 6
即使我们从 spl
中过滤掉 0 行帧(确实 给我们长度 9),spl
和 out
不一样:
names(Filter(c, sapply(spl, nrow)))
# [1] "2002.0" "2003.0" "1997.1" "1998.1" "1999.1" "2000.1" "2001.1" "2002.1" "2003.1"
do.call(paste, c(out, sep = "."))
# [1] "1997.1" "1998.1" "1999.1" "2000.1" "2001.1" "2002.0" "2002.1" "2003.0" "2003.1"
(不要关注我使用的代码,而是关注我展示的内容。)
(老实说,你应该很高兴你发现了这个问题。否则,你很可能已经识别出具有显着性的模型,正确的,但分配给了错误的组。损坏的结果。)
如何解决?这是一个建议,转向 dplyr
方法:
library(dplyr)
pdata %>%
group_by(chart_year, ifUS) %>%
summarize(coef = coefLM(cur_data())) %>%
ungroup()
# `summarise()` regrouping output by 'chart_year' (override with `.groups` argument)
# # A tibble: 9 x 3
# chart_year ifUS coef
# <int> <int> <dbl>
# 1 1997 1 0.000602
# 2 1998 1 0.0288
# 3 1999 1 -0.0217
# 4 2000 1 0.0299
# 5 2001 1 -0.0125
# 6 2002 0 NA
# 7 2002 1 -0.468
# 8 2003 0 -0.00962
# 9 2003 1 0.0479
使用这种方法的一个优点是所使用的数据和产生的系数始终与相同的 categories/factors.
相关联快速 演练,虽然 dplyr
上更详细的教程会带来更多学习点:
%>%
只是一个“管道运算符”,它有助于直观地分解正在发生的事情。这两个在功能上是相同的:
pdata %>% somefunc(.) somefunc(pdata)
它的优势在于更长的管道:
pdata %>% somefunc(.) %>% anotherfunc(., arg = 7) %>% finalfunc(., n = 1) finalfunc(anotherfunc(somefunc(pdata), arg = 7), n = 1)
group_by
确保所有data-creating/changing操作一次只对一组数据进行操作。 (注意:“分组”的东西会一直粘在数据上,直到用ungroup()
主动删除,并且一些计算根据数据大小而有所不同。我一知道就养成总是ungroup
的习惯不需要它,这样我以后就不会不小心计算错了。)mutate
(此处未使用)添加或替换框架中的列,保持相同的行数;summarize
通常将一组减少到一行(尽管dplyr
的较新版本应该能够汇总到多于 1 行......关键是它对形状进行了重大更改的数据)。对于summarize
,所有其他列都被删除(因为在汇总到 1 行结果时不能保留正常的n
行列)。
不需要您学习的备选方案 dplyr
:拆分单个名称可以更轻松地重新连接到其余数据。可以使用 paste
之类的,尽管我更喜欢 interaction
函数的传达意图(声明性)流程(...调用 paste
)。
pdata$groupname <- interaction(pdata$chart_year, pdata$ifUS)
head(pdata)
# movie_odid chart_date chart_year revenue theaters running_time ifUS groupname
# 1 10100 1997-12-19 1997 52969336 2674 194 1 1997.1
# 2 10100 1997-12-26 1997 71183357 2711 194 1 1997.1
# 3 520100 1997-07-04 1997 76457208 3020 98 1 1997.1
# 4 520100 1997-07-11 1997 43593212 3020 98 1 1997.1
# 5 650100 1997-05-23 1997 81172327 3281 134 1 1997.1
# 6 650100 1997-05-30 1997 45111185 3282 134 1 1997.1
这并没有做太多,但我们将拆分这个单一字段(与您的 2 字段拆分的效果相同)和 运行 您的函数。
我将使用它的表亲函数 by
而不是 split
,它做同样的事情并且 运行 在每一帧上都有一个函数。 if by
的输出实际上只是一个 list
,即使它看起来非常不同。
out <- by(pdata, pdata$groupname, FUN = coefLM)
out
# pdata$groupname: 1997.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1998.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1999.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2000.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2001.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2002.0
# [1] NA
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2003.0
# [1] -0.009621244
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1997.1
# [1] 0.0006017306
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1998.1
# [1] 0.02882223
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 1999.1
# [1] -0.02168886
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2000.1
# [1] 0.02991097
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2001.1
# [1] -0.01250767
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2002.1
# [1] -0.4683953
# ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# pdata$groupname: 2003.1
# [1] 0.04785696