使用 mapply 获取每个变量的每日数据

Fetch daily data for each variable using mapply

我有一个函数,objective 是为 data.frame 上的列中的每个变量获取每日数据。范围是一个完整的月份,但可以是任何其他范围。

我的 df 有一个列 unit_id,所以我需要我的函数获取 col unit_id 的第一个 id 并获取每个日期的数据march.

| unit | unit_id |
|:-----:|----------|
|  AE   |    123   |
|  AD   |    456   |
|  AN   |    789   |

但是现在,我的函数循环了 unit_id 列中的 ID。所以当我有 3 ids 时,第 4 天函数再次使用 1st id,然后在第 5 天使用 2nd id 等等。如此重复直到本月的最后一天。

我需要它为每个月的每一天使用每个 ID。

代码:

my_dates <- seq(as.Date("2020-03-01"), as.Date("2020-03-31"), by = 1)

my_fetch <- function(unit, unit_id, d) {


  df <- google_analytics(unit_id,
                         date_range = c(d, d),
                         metrics = c("totalEvents"),
                         dimensions = c("ga:date", "ga:eventCategory", "ga:eventAction", "ga:eventLabel"),
                         anti_sample = TRUE)

  df$unidad_de_negocio <- unit


  filename <- paste0(unit, "-", "total-events", "-", d, ".csv")
  path <- "D:\america\costos_protv\total_events"
  write.csv(df, file.path(path, filename), row.names = FALSE)
  print(filename)
  rm(df)
  gc()


}




monthly_fetches <- mapply(my_fetch, df$unit,
                          df$unit_id,
                          my_dates, SIMPLIFY = FALSE)

变体 2:按月范围

谢谢你,阿克伦。你的答案有效。

我一直在尝试编辑它,而不是在其他类似情况下使用它:

1.- 每月开始和结束:现在循环不是单日日期,而是有开始和结束。我称之为 monthly_dates

|    starts   |    ends    |
|:-----------:|------------|
|  2020-02-01 | 2020-02-29 |
|  2020-03-01 | 2020-03-31 |

我已经尝试调整解决方案,但它不起作用。你能看到它并告诉我为什么吗?谢谢。

monthly_fetches <- Map(function(x, y) 
                   lapply(monthly_dates, function(d1, d2) my_fetch(x, y, monthly_dates$starts, monthly_dates$ends)))

主函数适用于使用 2 个日期(开始 "d1" 和结束 "d2"):

my_fetch <- function(udn, udn_id, d1, d2) {

    df <- google_analytics(udn_id,
                           date_range = c(d1, d2),
                           metrics = c("totalEvents"),
                           dimensions = c("ga:month"),
                           anti_sample = TRUE)

    df$udn <- udn
    df$udn_id <- udn_id

    df

}

** 制作每月日期范围的代码:**

make_date_ranges <- function(start, end){

  starts <- seq(from = start,
                to =  Sys.Date()-1 ,
                by = "1 month")

  ends <- c((seq(from = add_months(start, 1),
                 to = end,
                 by = "1 month" ))-1,
            (Sys.Date()-1))

  data.frame(starts,ends)

}

## useage
monthly_dates <- make_date_ranges(as.Date("2020-02-01"), Sys.Date())

更新 1:

dput(monthly_fetches[1])

list(AE = list(structure(list(month = "02", totalEvents = 19670334, 
    udn = "AE", udn_id = 74415341), row.names = 1L, totals = list(
    list(totalEvents = "19670334")), minimums = list(list(totalEvents = "19670334")), maximums = list(
    list(totalEvents = "19670334")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"), 
    structure(list(month = "03", totalEvents = 19765253, udn = "AE", 
        udn_id = 74415341), row.names = 1L, totals = list(list(
        totalEvents = "19765253")), minimums = list(list(totalEvents = "19765253")), maximums = list(
        list(totalEvents = "19765253")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"), 
    structure(list(month = "04", totalEvents = 1319087, udn = "AE", 
        udn_id = 74415341), row.names = 1L, totals = list(list(
        totalEvents = "1319087")), minimums = list(list(totalEvents = "1319087")), maximums = list(
        list(totalEvents = "1319087")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame")))

更新二:

dput(monthly_fetches[[1]])

list(structure(list(month = "02", totalEvents = 19670334, udn = "AE", 
    udn_id = 74415341), row.names = 1L, totals = list(list(totalEvents = "19670334")), minimums = list(
    list(totalEvents = "19670334")), maximums = list(list(totalEvents = "19670334")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"), 
    structure(list(month = "03", totalEvents = 19765253, udn = "AE", 
        udn_id = 74415341), row.names = 1L, totals = list(list(
        totalEvents = "19765253")), minimums = list(list(totalEvents = "19765253")), maximums = list(
        list(totalEvents = "19765253")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"), 
    structure(list(month = "04", totalEvents = 1319087, udn = "AE", 
        udn_id = 74415341), row.names = 1L, totals = list(list(
        totalEvents = "1319087")), minimums = list(list(totalEvents = "1319087")), maximums = list(
        list(totalEvents = "1319087")), isDataGolden = TRUE, rowCount = 1L, class = "data.frame"))

由于 Map/mapply 要求所有参数的长度相同,并且 'df' 的行数为 3,'my_dates' 长度为 31,因此一种选择是遍历 'df', 列,然后在 Map/mapply

中进一步循环
monthly_fetches <- Map(function(x, y) 
                 lapply(my_dates, function(date) my_fetch(x, y, date)),
                    df$unit, d$unit_id)

或者我们可以为 'my_dates'

设置外循环
lapply(my_dates, function(date) Map(my_fetch, df$unit, df$unit_id, date))

更新

如果我们需要传递两列,使用Map

Map(function(start, end) 
  Map(my_fetch, df$unit, df$unit_id, start, end),  
            monthly_dates$starts, monthly_dates$ends))

monthly_fetches <- Map(function(x, y) Map(function(start, end) 
   my_fetch(x, y, start, end),
      monthly_dates$starts, monthly_dates$ends), df$unit, df$unit_id)

然后rbind

do.call(rbind,lapply(monthly_fetches, function(x) do.call(rbind, x)))

或使用map

library(purrr)
library(dplyr)
map_dfr(monthly_fetches, bind_rows, .id = 'grp')