尝试使用 R 中的函数将新行附加到正在进行的电子表格

Trying to append new rows to an ongoing spreadsheet using a function in R

我一直在使用 write.xlsx 函数来附加通过 HTML scrapper 提取的新数据行。出于某种原因,虽然不是从一个 url 中提取信息,而是将其粘贴到 sheet 中并移动到下一个,它只是粘贴我放入函数中的最后一个 url

我试过在实际代码中写一个 for loop,摆脱 for loop 并为每个人调用函数 url 并放置 url s 进入 vector 并在向量上使用 lapply 函数。所有这些方法 "work" 但结果相同。

urlpull <- function(site){
url <- site
webpage <- read_html(url)
tbls <- webpage %>% html_nodes("table") %>% html_table(header = FALSE, fill = TRUE) 
tbls <- tbls %>% lmap( ~ set_names(.x, nm = pluck(.x, 1, 1, 1))) %>% map(~ set_names(.x, nm = .x[2, ]))
abbr <- as.data.frame(webpage %>% html_nodes('strong') %>% html_text() %>% .[5:6])
rec <- as.data.frame(webpage %>% html_nodes('div') %>% html_text() %>% .[c(26,33)])
date <- as.data.frame(webpage %>% html_nodes('div') %>% html_text() %>% .[36])
awaybas <- tbls %>% .[1]
awayadv <- tbls %>% .[2] 
homebas <- tbls %>% .[3] 
homeadv <- tbls %>% .[4] 
ab1 <- as.data.frame(awaybas)
aa1 <- as.data.frame(awayadv)
hb1 <- as.data.frame(homebas)
ha1 <- as.data.frame(homeadv)
ab <- ab1[-c(1,2,8),]
aa <- aa1[-c(1,2,8),]
hb <- hb1[-c(1,2,8),]
ha <- ha1[-c(1,2,8),]
ab[,c(3:21)] <- sapply(ab[,c(3:21)], as.numeric)
aa[,c(3:16)] <- sapply(aa[,c(3:16)], as.numeric)
hb[,c(3:21)] <- sapply(hb[,c(3:21)], as.numeric)
ha[,c(3:16)] <- sapply(ha[,c(3:16)], as.numeric)
aa <- cbind(aa, abbr[1,], abbr[2,])
ab <- cbind(ab, abbr[1,], abbr[2,])
hb <- cbind(hb, abbr[2,], abbr[1,])
ha <- cbind(ha, abbr[2,], abbr[1,])
aa <- cbind(aa, rec[1,])
ab <- cbind(ab, rec[1,])
hb <- cbind(hb, rec[2,])
ha <- cbind(ha, rec[2,])
aa <- cbind(aa, date)
ab <- cbind(ab, date)
hb <- cbind(hb, date)
ha <- cbind(ha, date)
names(aa)[17:20]<-c("TEAM", "OPP", "RCRD", "DT")
names(ab)[22:25]<-c("TEAM", "OPP", "RCRD", "DT")
names(hb)[22:25]<-c("TEAM", "OPP", "RCRD", "DT")
names(ha)[17:20]<-c("TEAM", "OPP", "RCRD", "DT")
aa <- aa %>% separate("MP", c("min","sec"), sep = ":") %>% separate("RCRD", c("W","L"), sep= "-") %>% separate("DT", c("time", "day", "year"), sep = ",") %>% unite(DT, c("day", "year", "time"), sep = ",") %>% mutate(DT = mdy_hm(DT))
ab <- ab %>% separate("MP", c("min","sec"), sep = ":") %>% separate("RCRD", c("W","L"), sep= "-") %>% separate("DT", c("time", "day", "year"), sep = ",") %>% unite(DT, c("day", "year", "time"), sep = ",") %>% mutate(DT = mdy_hm(DT))
hb <- hb %>% separate("MP", c("min","sec"), sep = ":") %>% separate("RCRD", c("W","L"), sep= "-") %>% separate("DT", c("time", "day", "year"), sep = ",") %>% unite(DT, c("day", "year", "time"), sep = ",") %>% mutate(DT = mdy_hm(DT))
ha <- ha %>% separate("MP", c("min","sec"), sep = ":") %>% separate("RCRD", c("W","L"), sep= "-") %>% separate("DT", c("time", "day", "year"), sep = ",") %>% unite(DT, c("day", "year", "time"), sep = ",") %>% mutate(DT = mdy_hm(DT))
aa[,c(20:21)] <- sapply(aa[,c(20:21)], as.numeric)
ab[,c(25:26)] <- sapply(ab[,c(25:26)], as.numeric)
hb[,c(25:26)] <- sapply(hb[,c(25:26)], as.numeric)
ha[,c(20:21)] <- sapply(ha[,c(20:21)], as.numeric)
aa <- aa %>% mutate(GAME = W + L)
ab <- ab %>% mutate(GAME = W + L)
hb <- hb %>% mutate(GAME = W + L)
ha <- ha %>% mutate(GAME = W + L)
aac <- aa[,-c(1:3)]
hac <- ha[,-c(1:3)]
am <- cbind(ab[,-c(23:28)],aac)
hm <- cbind(hb[,-c(23:28)],hac)
am <- am %>% mutate(LOCAL = "away")
hm <- hm %>% mutate(LOCAL = "home")
final <- rbind(am,hm)
print(final)
write.xlsx(final, "Book1.xlsx", sheetName= "Sheet1", col.names=TRUE, row.names=FALSE, append=TRUE, showNA= TRUE)
}
x <- c("https://www.basketball-reference.com/boxscores/201410280LAL.html",     "https://www.basketball-reference.com/boxscores/201811140BRK.html")

lapply(x, urlpull)

我只想将输出中的最后一个 table 放在最后一个 table 之后的第一行。

为了解决这个问题,我保留了 for 循环并让函数 row 将所有数据帧绑定为一个,然后再将其导出。我通过在 for 循环之外启动一个空数据框来做到这一点,如下所示:

oldtable <- data.frame()

for (i in x) {

  #Generic lines of code

  final          #table from one iteration of loop
  oldtable <- rbind.data.frame(oldtable, final)
}

使用基础 R 中的 rbind.data.frame 函数得到了我之前想要的结果。