使用 mapply 获取每个变量的每日数据
Fetch daily data for each variable using mapply
我有一个函数,objective 是为 data.frame
上的列中的每个变量获取每日数据。范围是一个完整的月份,但可以是任何其他范围。
我的 df
有一个列 unit_id
,所以我需要我的函数获取 col unit_id
的第一个 id
并获取每个日期的数据march
.
| unit | unit_id |
|:-----:|----------|
| AE | 123 |
| AD | 456 |
| AN | 789 |
但是现在,我的函数循环了 unit_id
列中的 ID。所以当我有 3 ids
时,第 4 天函数再次使用 1st id,然后在第 5 天使用 2nd id
等等。如此重复直到本月的最后一天。
我需要它为每个月的每一天使用每个 ID。
代码:
my_dates <- seq(as.Date("2020-03-01"), as.Date("2020-03-31"), by = 1)
my_fetch <- function(unit, unit_id, d) {
df <- google_analytics(unit_id,
date_range = c(d, d),
metrics = c("totalEvents"),
dimensions = c("ga:date", "ga:eventCategory", "ga:eventAction", "ga:eventLabel"),
anti_sample = TRUE)
df$unidad_de_negocio <- unit
filename <- paste0(unit, "-", "total-events", "-", d, ".csv")
path <- "D:\america\costos_protv\total_events"
write.csv(df, file.path(path, filename), row.names = FALSE)
print(filename)
rm(df)
gc()
}
monthly_fetches <- mapply(my_fetch, df$unit,
df$unit_id,
my_dates, SIMPLIFY = FALSE)
变体 2:按月范围
谢谢你,阿克伦。你的答案有效。
我一直在尝试编辑它,而不是在其他类似情况下使用它:
1.- 每月开始和结束:现在循环不是单日日期,而是有开始和结束。我称之为 monthly_dates
| starts | ends |
|:-----------:|------------|
| 2020-02-01 | 2020-02-29 |
| 2020-03-01 | 2020-03-31 |
我已经尝试调整解决方案,但它不起作用。你能看到它并告诉我为什么吗?谢谢。
monthly_fetches <- Map(function(x, y)
lapply(monthly_dates, function(d1, d2) my_fetch(x, y, monthly_dates$starts, monthly_dates$ends)))
主函数适用于使用 2 个日期(开始 "d1" 和结束 "d2"):
my_fetch <- function(udn, udn_id, d1, d2) {
df <- google_analytics(udn_id,
date_range = c(d1, d2),
metrics = c("totalEvents"),
dimensions = c("ga:month"),
anti_sample = TRUE)
df$udn <- udn
df$udn_id <- udn_id
df
}
** 制作每月日期范围的代码:**
make_date_ranges <- function(start, end){
starts <- seq(from = start,
to = Sys.Date()-1 ,
by = "1 month")
ends <- c((seq(from = add_months(start, 1),
to = end,
by = "1 month" ))-1,
(Sys.Date()-1))
data.frame(starts,ends)
}
## useage
monthly_dates <- make_date_ranges(as.Date("2020-02-01"), Sys.Date())
更新 1:
dput(monthly_fetches[1])
list(AE = list(structure(list(month = "02", totalEvents = 19670334,
udn = "AE", udn_id = 74415341), row.names = 1L, totals = list(
list(totalEvents = "19670334")), minimums = list(list(totalEvents = "19670334")), maximums = list(
list(totalEvents = "19670334")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "03", totalEvents = 19765253, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "19765253")), minimums = list(list(totalEvents = "19765253")), maximums = list(
list(totalEvents = "19765253")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "04", totalEvents = 1319087, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "1319087")), minimums = list(list(totalEvents = "1319087")), maximums = list(
list(totalEvents = "1319087")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame")))
更新二:
dput(monthly_fetches[[1]])
list(structure(list(month = "02", totalEvents = 19670334, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(totalEvents = "19670334")), minimums = list(
list(totalEvents = "19670334")), maximums = list(list(totalEvents = "19670334")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "03", totalEvents = 19765253, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "19765253")), minimums = list(list(totalEvents = "19765253")), maximums = list(
list(totalEvents = "19765253")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "04", totalEvents = 1319087, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "1319087")), minimums = list(list(totalEvents = "1319087")), maximums = list(
list(totalEvents = "1319087")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"))
由于 Map/mapply
要求所有参数的长度相同,并且 'df' 的行数为 3,'my_dates' 长度为 31,因此一种选择是遍历 'df', 列,然后在 Map/mapply
中进一步循环
monthly_fetches <- Map(function(x, y)
lapply(my_dates, function(date) my_fetch(x, y, date)),
df$unit, d$unit_id)
或者我们可以为 'my_dates'
设置外循环
lapply(my_dates, function(date) Map(my_fetch, df$unit, df$unit_id, date))
更新
如果我们需要传递两列,使用Map
Map(function(start, end)
Map(my_fetch, df$unit, df$unit_id, start, end),
monthly_dates$starts, monthly_dates$ends))
或
monthly_fetches <- Map(function(x, y) Map(function(start, end)
my_fetch(x, y, start, end),
monthly_dates$starts, monthly_dates$ends), df$unit, df$unit_id)
然后rbind
do.call(rbind,lapply(monthly_fetches, function(x) do.call(rbind, x)))
或使用map
library(purrr)
library(dplyr)
map_dfr(monthly_fetches, bind_rows, .id = 'grp')
我有一个函数,objective 是为 data.frame
上的列中的每个变量获取每日数据。范围是一个完整的月份,但可以是任何其他范围。
我的 df
有一个列 unit_id
,所以我需要我的函数获取 col unit_id
的第一个 id
并获取每个日期的数据march
.
| unit | unit_id |
|:-----:|----------|
| AE | 123 |
| AD | 456 |
| AN | 789 |
但是现在,我的函数循环了 unit_id
列中的 ID。所以当我有 3 ids
时,第 4 天函数再次使用 1st id,然后在第 5 天使用 2nd id
等等。如此重复直到本月的最后一天。
我需要它为每个月的每一天使用每个 ID。
代码:
my_dates <- seq(as.Date("2020-03-01"), as.Date("2020-03-31"), by = 1)
my_fetch <- function(unit, unit_id, d) {
df <- google_analytics(unit_id,
date_range = c(d, d),
metrics = c("totalEvents"),
dimensions = c("ga:date", "ga:eventCategory", "ga:eventAction", "ga:eventLabel"),
anti_sample = TRUE)
df$unidad_de_negocio <- unit
filename <- paste0(unit, "-", "total-events", "-", d, ".csv")
path <- "D:\america\costos_protv\total_events"
write.csv(df, file.path(path, filename), row.names = FALSE)
print(filename)
rm(df)
gc()
}
monthly_fetches <- mapply(my_fetch, df$unit,
df$unit_id,
my_dates, SIMPLIFY = FALSE)
变体 2:按月范围
谢谢你,阿克伦。你的答案有效。
我一直在尝试编辑它,而不是在其他类似情况下使用它:
1.- 每月开始和结束:现在循环不是单日日期,而是有开始和结束。我称之为 monthly_dates
| starts | ends |
|:-----------:|------------|
| 2020-02-01 | 2020-02-29 |
| 2020-03-01 | 2020-03-31 |
我已经尝试调整解决方案,但它不起作用。你能看到它并告诉我为什么吗?谢谢。
monthly_fetches <- Map(function(x, y)
lapply(monthly_dates, function(d1, d2) my_fetch(x, y, monthly_dates$starts, monthly_dates$ends)))
主函数适用于使用 2 个日期(开始 "d1" 和结束 "d2"):
my_fetch <- function(udn, udn_id, d1, d2) {
df <- google_analytics(udn_id,
date_range = c(d1, d2),
metrics = c("totalEvents"),
dimensions = c("ga:month"),
anti_sample = TRUE)
df$udn <- udn
df$udn_id <- udn_id
df
}
** 制作每月日期范围的代码:**
make_date_ranges <- function(start, end){
starts <- seq(from = start,
to = Sys.Date()-1 ,
by = "1 month")
ends <- c((seq(from = add_months(start, 1),
to = end,
by = "1 month" ))-1,
(Sys.Date()-1))
data.frame(starts,ends)
}
## useage
monthly_dates <- make_date_ranges(as.Date("2020-02-01"), Sys.Date())
更新 1:
dput(monthly_fetches[1])
list(AE = list(structure(list(month = "02", totalEvents = 19670334,
udn = "AE", udn_id = 74415341), row.names = 1L, totals = list(
list(totalEvents = "19670334")), minimums = list(list(totalEvents = "19670334")), maximums = list(
list(totalEvents = "19670334")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "03", totalEvents = 19765253, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "19765253")), minimums = list(list(totalEvents = "19765253")), maximums = list(
list(totalEvents = "19765253")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "04", totalEvents = 1319087, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "1319087")), minimums = list(list(totalEvents = "1319087")), maximums = list(
list(totalEvents = "1319087")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame")))
更新二:
dput(monthly_fetches[[1]])
list(structure(list(month = "02", totalEvents = 19670334, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(totalEvents = "19670334")), minimums = list(
list(totalEvents = "19670334")), maximums = list(list(totalEvents = "19670334")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "03", totalEvents = 19765253, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "19765253")), minimums = list(list(totalEvents = "19765253")), maximums = list(
list(totalEvents = "19765253")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"),
structure(list(month = "04", totalEvents = 1319087, udn = "AE",
udn_id = 74415341), row.names = 1L, totals = list(list(
totalEvents = "1319087")), minimums = list(list(totalEvents = "1319087")), maximums = list(
list(totalEvents = "1319087")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"))
由于 Map/mapply
要求所有参数的长度相同,并且 'df' 的行数为 3,'my_dates' 长度为 31,因此一种选择是遍历 'df', 列,然后在 Map/mapply
monthly_fetches <- Map(function(x, y)
lapply(my_dates, function(date) my_fetch(x, y, date)),
df$unit, d$unit_id)
或者我们可以为 'my_dates'
设置外循环lapply(my_dates, function(date) Map(my_fetch, df$unit, df$unit_id, date))
更新
如果我们需要传递两列,使用Map
Map(function(start, end)
Map(my_fetch, df$unit, df$unit_id, start, end),
monthly_dates$starts, monthly_dates$ends))
或
monthly_fetches <- Map(function(x, y) Map(function(start, end)
my_fetch(x, y, start, end),
monthly_dates$starts, monthly_dates$ends), df$unit, df$unit_id)
然后rbind
do.call(rbind,lapply(monthly_fetches, function(x) do.call(rbind, x)))
或使用map
library(purrr)
library(dplyr)
map_dfr(monthly_fetches, bind_rows, .id = 'grp')