如何在 tibble 中获取列表的子集
How to take subsets of lists in a tibble
我有几只股票的年度财务数据。我需要把它变成月度数据,多亏了 ,我有一个解决方案涉及将 date
列变成 日期列表 :
library(tidyverse)
library(lubridate)
factors.subset.raw = structure(list(
sec_id = c(1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L),
metric = c("EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY"),
date = structure(c(9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044, 9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044),
class = "Date"), value = c(0.250468, 0.091548, -0.100863, 0.058375, 0.24784, 0.178765, 0.099276, 0.25472, -0.033291, 0.124165, 0.050947, 0.243008, 0.1205, -0.239625, -0.231221, 0.365649, 0.163779, 0.024976, 0.08388, 0.154777, 0.016473, -0.272928, -0.018711, -0.162076, -0.599241, -4.071504, -0.37761, 1.694085, 0.045113, 0.329818, 0.199564, -0.616418, 1.164773, 0.877078, -0.325099, -0.294199, 0.272016, -0.706077, -2.57027, 4.500261, 4.734375, 4.090376, 3.322846, 3.640895, 4.645253, 4.783054, 3.946184, 3.847828, 4.077601, 4.778736, 5.453883, 5.14355, 5.084551, 3.370378, 3.076065, 2.812879, 2.87688, 2.430692, 3.029766, 3.062665, 3.349906, 0.396299, 0.60174, 0.527478, 1.048755, 1.136417, 0.668333, 0.523115, 0.259175, 0.164024, 0.118469, 0.061141, 0.096251, 0.346829, 0.401832, 0.300988, 0.344943, 0.432505)),
row.names = c(NA, -78L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("sec_id", "metric", "date", "value"))
factors.subset.monthly = factors.subset.raw %>%
group_by(sec_id, metric) %>%
mutate(date = ceiling_date(date, 'month')) %>%
mutate(date = map2(date, lead(date - 1, default = today()), seq, by = 'month'))
现在只需在上面添加 %>% unnest() %>% mutate(date = date - 1)
即可将我的年度数据转换为月度数据,所有日期均为月末。
我的问题出现在数据差距很大的时候。遇到这种情况,我只想往前补最多18个月。
我试过添加截断 date
列的管道,但到目前为止我似乎无法弄清楚。这个小 gem 给我不兼容的大小错误,例如:
factors.subset.monthly %>%
mutate(count.date = as.numeric(lapply(date, length))) %>%
mutate(count.cutoff = ifelse(count.date <= 18, count.date, 18)) %>%
mutate(date = date[1:count.cutoff])
您需要使用 map
/lapply
遍历列表列,但您可以简单地使用 head
将其限制为 18 个观察值:
library(tidyverse)
library(lubridate)
df <- factors.subset.monthly %>% mutate(date = map(date, head, 18))
any(lengths(factors.subset.monthly$date) > 18)
#> [1] TRUE
any(lengths(df$date) > 18)
#> [1] FALSE
您也可以在 factors.subset.monthly
:
时只包含 head
factors.subset.raw %>%
group_by(sec_id, metric) %>%
mutate(date = ceiling_date(date, 'month'),
date = map2(date, lead(date - 1, default = today()),
~head(seq(.x, .y, by = 'month'), 18)))
对于 seq
的 to
参数,您也可以使用目标日期中的最小值或开始日期后 18 个月,但添加 18 个月有些困难,因为它们的长度不规则。
我有几只股票的年度财务数据。我需要把它变成月度数据,多亏了 date
列变成 日期列表 :
library(tidyverse)
library(lubridate)
factors.subset.raw = structure(list(
sec_id = c(1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L),
metric = c("EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY"),
date = structure(c(9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044, 9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044),
class = "Date"), value = c(0.250468, 0.091548, -0.100863, 0.058375, 0.24784, 0.178765, 0.099276, 0.25472, -0.033291, 0.124165, 0.050947, 0.243008, 0.1205, -0.239625, -0.231221, 0.365649, 0.163779, 0.024976, 0.08388, 0.154777, 0.016473, -0.272928, -0.018711, -0.162076, -0.599241, -4.071504, -0.37761, 1.694085, 0.045113, 0.329818, 0.199564, -0.616418, 1.164773, 0.877078, -0.325099, -0.294199, 0.272016, -0.706077, -2.57027, 4.500261, 4.734375, 4.090376, 3.322846, 3.640895, 4.645253, 4.783054, 3.946184, 3.847828, 4.077601, 4.778736, 5.453883, 5.14355, 5.084551, 3.370378, 3.076065, 2.812879, 2.87688, 2.430692, 3.029766, 3.062665, 3.349906, 0.396299, 0.60174, 0.527478, 1.048755, 1.136417, 0.668333, 0.523115, 0.259175, 0.164024, 0.118469, 0.061141, 0.096251, 0.346829, 0.401832, 0.300988, 0.344943, 0.432505)),
row.names = c(NA, -78L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("sec_id", "metric", "date", "value"))
factors.subset.monthly = factors.subset.raw %>%
group_by(sec_id, metric) %>%
mutate(date = ceiling_date(date, 'month')) %>%
mutate(date = map2(date, lead(date - 1, default = today()), seq, by = 'month'))
现在只需在上面添加 %>% unnest() %>% mutate(date = date - 1)
即可将我的年度数据转换为月度数据,所有日期均为月末。
我的问题出现在数据差距很大的时候。遇到这种情况,我只想往前补最多18个月。
我试过添加截断 date
列的管道,但到目前为止我似乎无法弄清楚。这个小 gem 给我不兼容的大小错误,例如:
factors.subset.monthly %>%
mutate(count.date = as.numeric(lapply(date, length))) %>%
mutate(count.cutoff = ifelse(count.date <= 18, count.date, 18)) %>%
mutate(date = date[1:count.cutoff])
您需要使用 map
/lapply
遍历列表列,但您可以简单地使用 head
将其限制为 18 个观察值:
library(tidyverse)
library(lubridate)
df <- factors.subset.monthly %>% mutate(date = map(date, head, 18))
any(lengths(factors.subset.monthly$date) > 18)
#> [1] TRUE
any(lengths(df$date) > 18)
#> [1] FALSE
您也可以在 factors.subset.monthly
:
head
factors.subset.raw %>%
group_by(sec_id, metric) %>%
mutate(date = ceiling_date(date, 'month'),
date = map2(date, lead(date - 1, default = today()),
~head(seq(.x, .y, by = 'month'), 18)))
对于 seq
的 to
参数,您也可以使用目标日期中的最小值或开始日期后 18 个月,但添加 18 个月有些困难,因为它们的长度不规则。