将列类型转换为 R 中的 read_csv() 列类型
Convert column types to their read_csv() column type in R
关于 library(readr) 和 R 中的 read_csv() 函数,我最喜欢的一点是它几乎总是将我的数据的列类型设置为正确的 class。但是,我目前正在 R 中使用 API 将 returns 数据作为所有字符 classes 的数据帧发送给我,即使数据显然是数字。以这个数据框为例,它有一些运动数据:
dput(mydf)
structure(list(isUnplayed = c("false", "false", "false"), isInProgress =
c("false", "false", "false"), isCompleted = c("true", "true", "true"), awayScore = c("106",
"95", "95"), homeScore = c("94", "97", "111"), game.ID = c("31176",
"31177", "31178"), game.date = c("2015-10-27", "2015-10-27",
"2015-10-27"), game.time = c("8:00PM", "8:00PM", "10:30PM"),
game.location = c("Philips Arena", "United Center", "Oracle Arena"
), game.awayTeam.ID = c("88", "86", "110"), game.awayTeam.City = c("Detroit",
"Cleveland", "New Orleans"), game.awayTeam.Name = c("Pistons",
"Cavaliers", "Pelicans"), game.awayTeam.Abbreviation = c("DET",
"CLE", "NOP"), game.homeTeam.ID = c("91", "89", "101"), game.homeTeam.City = c("Atlanta",
"Chicago", "Golden State"), game.homeTeam.Name = c("Hawks",
"Bulls", "Warriors"), game.homeTeam.Abbreviation = c("ATL",
"CHI", "GSW"), quarterSummary.quarter = list(structure(list(
`@number` = c("1", "2", "3", "4"), awayScore = c("25",
"23", "34", "24"), homeScore = c("25", "18", "23", "28"
)), .Names = c("@number", "awayScore", "homeScore"), class = "data.frame", row.names = c(NA,
4L)), structure(list(`@number` = c("1", "2", "3", "4"), awayScore = c("17",
"23", "28", "27"), homeScore = c("26", "20", "25", "26")), .Names = c("@number",
"awayScore", "homeScore"), class = "data.frame", row.names = c(NA,
4L)), structure(list(`@number` = c("1", "2", "3", "4"), awayScore = c("35",
"14", "26", "20"), homeScore = c("39", "20", "35", "17")), .Names = c("@number",
"awayScore", "homeScore"), class = "data.frame", row.names = c(NA,
4L)))), .Names = c("isUnplayed", "isInProgress", "isCompleted",
"awayScore", "homeScore", "game.ID", "game.date", "game.time",
"game.location", "game.awayTeam.ID", "game.awayTeam.City", "game.awayTeam.Name",
"game.awayTeam.Abbreviation", "game.homeTeam.ID", "game.homeTeam.City",
"game.homeTeam.Name", "game.homeTeam.Abbreviation", "quarterSummary.quarter"
), class = "data.frame", row.names = c(NA, 3L))
考虑到 class 类型,一旦 API 返回此数据帧,处理起来就很麻烦了。我想出了一种更新列 classes 的技巧,如下所示:
write_csv(mydf, 'mydf.csv')
mydf <- read_csv('mydf.csv')
通过写入 CSV,然后使用 read_csv() 重新读取 CSV,数据框列会更新。不幸的是,我的目录中留下了一个我不想要的 CSV 文件。有没有一种方法可以将 R 数据帧的列更新为 'read_csv()' 列 classes,而无需实际编写 CSV?
感谢任何帮助!
尝试此代码,type.convert
将字符向量适当地转换为逻辑、整数、数字、复数或因子。
indx <- which(sapply(df, is.character))
df[, indx] <- lapply(df[, indx], type.convert)
indx <- which(sapply(df, is.factor))
df[, indx] <- lapply(df[, indx], as.character)
如果您只想 readr
猜测您的列类型,则无需写入和读取数据。您可以为此使用 readr::type_convert
:
iris %>%
dplyr::mutate(Sepal.Width = as.character(Sepal.Width)) %>%
readr::type_convert() %>%
str()
比较:
iris %>%
dplyr::mutate(Sepal.Width = as.character(Sepal.Width)) %>%
str()
关于 library(readr) 和 R 中的 read_csv() 函数,我最喜欢的一点是它几乎总是将我的数据的列类型设置为正确的 class。但是,我目前正在 R 中使用 API 将 returns 数据作为所有字符 classes 的数据帧发送给我,即使数据显然是数字。以这个数据框为例,它有一些运动数据:
dput(mydf)
structure(list(isUnplayed = c("false", "false", "false"), isInProgress =
c("false", "false", "false"), isCompleted = c("true", "true", "true"), awayScore = c("106",
"95", "95"), homeScore = c("94", "97", "111"), game.ID = c("31176",
"31177", "31178"), game.date = c("2015-10-27", "2015-10-27",
"2015-10-27"), game.time = c("8:00PM", "8:00PM", "10:30PM"),
game.location = c("Philips Arena", "United Center", "Oracle Arena"
), game.awayTeam.ID = c("88", "86", "110"), game.awayTeam.City = c("Detroit",
"Cleveland", "New Orleans"), game.awayTeam.Name = c("Pistons",
"Cavaliers", "Pelicans"), game.awayTeam.Abbreviation = c("DET",
"CLE", "NOP"), game.homeTeam.ID = c("91", "89", "101"), game.homeTeam.City = c("Atlanta",
"Chicago", "Golden State"), game.homeTeam.Name = c("Hawks",
"Bulls", "Warriors"), game.homeTeam.Abbreviation = c("ATL",
"CHI", "GSW"), quarterSummary.quarter = list(structure(list(
`@number` = c("1", "2", "3", "4"), awayScore = c("25",
"23", "34", "24"), homeScore = c("25", "18", "23", "28"
)), .Names = c("@number", "awayScore", "homeScore"), class = "data.frame", row.names = c(NA,
4L)), structure(list(`@number` = c("1", "2", "3", "4"), awayScore = c("17",
"23", "28", "27"), homeScore = c("26", "20", "25", "26")), .Names = c("@number",
"awayScore", "homeScore"), class = "data.frame", row.names = c(NA,
4L)), structure(list(`@number` = c("1", "2", "3", "4"), awayScore = c("35",
"14", "26", "20"), homeScore = c("39", "20", "35", "17")), .Names = c("@number",
"awayScore", "homeScore"), class = "data.frame", row.names = c(NA,
4L)))), .Names = c("isUnplayed", "isInProgress", "isCompleted",
"awayScore", "homeScore", "game.ID", "game.date", "game.time",
"game.location", "game.awayTeam.ID", "game.awayTeam.City", "game.awayTeam.Name",
"game.awayTeam.Abbreviation", "game.homeTeam.ID", "game.homeTeam.City",
"game.homeTeam.Name", "game.homeTeam.Abbreviation", "quarterSummary.quarter"
), class = "data.frame", row.names = c(NA, 3L))
考虑到 class 类型,一旦 API 返回此数据帧,处理起来就很麻烦了。我想出了一种更新列 classes 的技巧,如下所示:
write_csv(mydf, 'mydf.csv')
mydf <- read_csv('mydf.csv')
通过写入 CSV,然后使用 read_csv() 重新读取 CSV,数据框列会更新。不幸的是,我的目录中留下了一个我不想要的 CSV 文件。有没有一种方法可以将 R 数据帧的列更新为 'read_csv()' 列 classes,而无需实际编写 CSV?
感谢任何帮助!
尝试此代码,type.convert
将字符向量适当地转换为逻辑、整数、数字、复数或因子。
indx <- which(sapply(df, is.character))
df[, indx] <- lapply(df[, indx], type.convert)
indx <- which(sapply(df, is.factor))
df[, indx] <- lapply(df[, indx], as.character)
如果您只想 readr
猜测您的列类型,则无需写入和读取数据。您可以为此使用 readr::type_convert
:
iris %>%
dplyr::mutate(Sepal.Width = as.character(Sepal.Width)) %>%
readr::type_convert() %>%
str()
比较:
iris %>%
dplyr::mutate(Sepal.Width = as.character(Sepal.Width)) %>%
str()