有没有办法将数据集的列与 R 中的另一个数据集行匹配
Is there a way to match the columns of a dataset with another dataset rows in R
我有这两个数据集:
weather_data<- structure(list(`0` = c(20150310L, 20150310L, 20150310L, 20150310L,
20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 20150310L,
20150310L), `0` = c(200L, 205L, 210L, 215L, 220L, 225L, 230L,
235L, 240L, 245L, 250L), `1` = c(2.96667242050171, 4.08907222747803,
3.34765760600567, 2.70875519514084, 4.04301458597183, 3.277907371521,
1.19018948078156, 8.01013946533203, 3.17771905660629, 4.08452725410461,
6.84288668632507), `2` = c(3.66441679000854, 3.8880712389946,
5.4497694671154, 1.65072797238827, 1.22804968059063, 2.80022418498993,
2.09879478812218, 7.83757948875427, 3.06818120181561, 3.02766072750092,
2.10350251197815), `3` = c(0, 3.77445685863495, 0.66255909204483,
0.979387938976288, 1.84569960832596, 1.28901898860931, 1.75025188922882,
3.86244678497314, 2.02710714936256, 1.35766339302063, 5.03071141242981
), `4` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `5` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `6` = c(0, 0, 0, 0.255719035863876, 1.27965438365936, 2.14472162723541,
1.15851271152496, 4.7572363615036, 2.20618277788162, 0.508674263954163,
6.12584185600281), `7` = c(0, 0, 0.858640074729919, 1.97637134790421,
1.29098010063171, 3.4027111530304, 0.723613187670708, 3.21064579486847,
2.03270196914673, 2.11455392837524, 1.08344982564449), `8` = c(0,
0, 0, 0, 0, 2.42211884260178, 0, 0, 0, 4.46288549900055, 0)), row.names = 85:95, class = "data.frame")
和
sensors<- structure(list(Agency.Station.ID = c("MI064E000.7U", "MI064E002.0U",
"MI064E003.2U", "MI064E004.6U", "MI064E004.6V", "MI064E005.6U",
"MI064E007.1U", "MI064E008.5U"), ColumnNum = 1:8), row.names = c(NA,
8L), class = "data.frame")
我想创建一个新的数据框,该数据框是通过将传感器中 ColumnNum 中的数字与天气数据中的相应列相匹配而创建的,并保留天气数据中列 (0,0) 的日期和时间。
我尝试了以下代码
lst1 <- lapply(split(sensors$ColumnNum, sensors$Agency.Station.ID),
function(i)cbind(weather_data[1:2], weather_data[i]))
#adding new columns for each dataframe
colnames <- c("Date2","Time2","Precipitation")
lst1<- lapply(lst1, setNames, colnames)
#adding the sensors name for each data frame
lst1 <- Map(cbind, lst1, Agency.Station.ID = names(lst1))
#making a data frame for all the sensors
k <- do.call("rbind", lst1)
remove(lst1, sensors, colnames, weather_data)
结果应该是这样的:
k <- structure(list(Date2 = c(20150309L, 20150309L, 20150309L, 20150309L,
20150309L, 20150309L, 20150309L, 20150309L, 20150309L, 20150309L
), Time2 = c(1900L, 1905L, 1910L, 1915L, 1920L, 1925L, 1930L,
1935L, 1940L, 1945L), Precipitation = c(20150309, 20150309, 20150309,
20150309, 20150309, 20150309, 20150309, 20150309, 20150309, 20150309
), Agency.Station.ID = c("MI064E000.7U", "MI064E000.7U", "MI064E000.7U",
"MI064E000.7U", "MI064E000.7U", "MI064E000.7U", "MI064E000.7U",
"MI064E000.7U", "MI064E000.7U", "MI064E000.7U")), row.names = c(NA,
-10L), class = "data.frame")
Agency.Station.ID 在这个输出示例中并不重要,因为它不是来自我上面的数据,但我无法准确获得降水量。它正在获取日期。任何人都可以帮助表达所有的感激和感激吗?
我认为如果您先重命名天气数据的列,那将是最简单的。 (顺便说一下,你不应该有多个同名的列。)然后你可以从宽数据框切换到长数据框。
weather_data<- structure(list(`0` = c(20150310L, 20150310L, 20150310L, 20150310L,
20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 20150310L,
20150310L), `0` = c(200L, 205L, 210L, 215L, 220L, 225L, 230L,
235L, 240L, 245L, 250L), `1` = c(2.96667242050171, 4.08907222747803,
3.34765760600567, 2.70875519514084, 4.04301458597183, 3.277907371521,
1.19018948078156, 8.01013946533203, 3.17771905660629, 4.08452725410461,
6.84288668632507), `2` = c(3.66441679000854, 3.8880712389946,
5.4497694671154, 1.65072797238827, 1.22804968059063, 2.80022418498993,
2.09879478812218, 7.83757948875427, 3.06818120181561, 3.02766072750092,
2.10350251197815), `3` = c(0, 3.77445685863495, 0.66255909204483,
0.979387938976288, 1.84569960832596, 1.28901898860931, 1.75025188922882,
3.86244678497314, 2.02710714936256, 1.35766339302063, 5.03071141242981
), `4` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `5` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `6` = c(0, 0, 0, 0.255719035863876, 1.27965438365936, 2.14472162723541,
1.15851271152496, 4.7572363615036, 2.20618277788162, 0.508674263954163,
6.12584185600281), `7` = c(0, 0, 0.858640074729919, 1.97637134790421,
1.29098010063171, 3.4027111530304, 0.723613187670708, 3.21064579486847,
2.03270196914673, 2.11455392837524, 1.08344982564449), `8` = c(0,
0, 0, 0, 0, 2.42211884260178, 0, 0, 0, 4.46288549900055, 0)), row.names = 85:95, class = "data.frame")
sensors<- structure(list(Agency.Station.ID = c("MI064E000.7U", "MI064E002.0U",
"MI064E003.2U", "MI064E004.6U", "MI064E004.6V", "MI064E005.6U",
"MI064E007.1U", "MI064E008.5U"), ColumnNum = 1:8), row.names = c(NA,
8L), class = "data.frame")
names(weather_data)[1:2] <- c("Date2","Time2")
# make sure the names match up with the correct columns
names(weather_data)[-(1:2)] <- sensors[sensors$ColumnNum[order(names(weather_data)[-(1:2)])], "Agency.Station.ID"]
tidyr::pivot_longer(weather_data, -c(1,2), names_to = "Agency.Station.ID", values_to = "Precipitation")
#> # A tibble: 88 × 4
#> Date2 Time2 Agency.Station.ID Precipitation
#> <int> <int> <chr> <dbl>
#> 1 20150310 200 MI064E000.7U 2.97
#> 2 20150310 200 MI064E002.0U 3.66
#> 3 20150310 200 MI064E003.2U 0
#> 4 20150310 200 MI064E004.6U 0
#> 5 20150310 200 MI064E004.6V 0
#> 6 20150310 200 MI064E005.6U 0
#> 7 20150310 200 MI064E007.1U 0
#> 8 20150310 200 MI064E008.5U 0
#> 9 20150310 205 MI064E000.7U 4.09
#> 10 20150310 205 MI064E002.0U 3.89
#> # … with 78 more rows
由 reprex package (v2.0.1)
创建于 2022-05-31
我有这两个数据集:
weather_data<- structure(list(`0` = c(20150310L, 20150310L, 20150310L, 20150310L,
20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 20150310L,
20150310L), `0` = c(200L, 205L, 210L, 215L, 220L, 225L, 230L,
235L, 240L, 245L, 250L), `1` = c(2.96667242050171, 4.08907222747803,
3.34765760600567, 2.70875519514084, 4.04301458597183, 3.277907371521,
1.19018948078156, 8.01013946533203, 3.17771905660629, 4.08452725410461,
6.84288668632507), `2` = c(3.66441679000854, 3.8880712389946,
5.4497694671154, 1.65072797238827, 1.22804968059063, 2.80022418498993,
2.09879478812218, 7.83757948875427, 3.06818120181561, 3.02766072750092,
2.10350251197815), `3` = c(0, 3.77445685863495, 0.66255909204483,
0.979387938976288, 1.84569960832596, 1.28901898860931, 1.75025188922882,
3.86244678497314, 2.02710714936256, 1.35766339302063, 5.03071141242981
), `4` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `5` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `6` = c(0, 0, 0, 0.255719035863876, 1.27965438365936, 2.14472162723541,
1.15851271152496, 4.7572363615036, 2.20618277788162, 0.508674263954163,
6.12584185600281), `7` = c(0, 0, 0.858640074729919, 1.97637134790421,
1.29098010063171, 3.4027111530304, 0.723613187670708, 3.21064579486847,
2.03270196914673, 2.11455392837524, 1.08344982564449), `8` = c(0,
0, 0, 0, 0, 2.42211884260178, 0, 0, 0, 4.46288549900055, 0)), row.names = 85:95, class = "data.frame")
和
sensors<- structure(list(Agency.Station.ID = c("MI064E000.7U", "MI064E002.0U",
"MI064E003.2U", "MI064E004.6U", "MI064E004.6V", "MI064E005.6U",
"MI064E007.1U", "MI064E008.5U"), ColumnNum = 1:8), row.names = c(NA,
8L), class = "data.frame")
我想创建一个新的数据框,该数据框是通过将传感器中 ColumnNum 中的数字与天气数据中的相应列相匹配而创建的,并保留天气数据中列 (0,0) 的日期和时间。 我尝试了以下代码
lst1 <- lapply(split(sensors$ColumnNum, sensors$Agency.Station.ID),
function(i)cbind(weather_data[1:2], weather_data[i]))
#adding new columns for each dataframe
colnames <- c("Date2","Time2","Precipitation")
lst1<- lapply(lst1, setNames, colnames)
#adding the sensors name for each data frame
lst1 <- Map(cbind, lst1, Agency.Station.ID = names(lst1))
#making a data frame for all the sensors
k <- do.call("rbind", lst1)
remove(lst1, sensors, colnames, weather_data)
结果应该是这样的:
k <- structure(list(Date2 = c(20150309L, 20150309L, 20150309L, 20150309L,
20150309L, 20150309L, 20150309L, 20150309L, 20150309L, 20150309L
), Time2 = c(1900L, 1905L, 1910L, 1915L, 1920L, 1925L, 1930L,
1935L, 1940L, 1945L), Precipitation = c(20150309, 20150309, 20150309,
20150309, 20150309, 20150309, 20150309, 20150309, 20150309, 20150309
), Agency.Station.ID = c("MI064E000.7U", "MI064E000.7U", "MI064E000.7U",
"MI064E000.7U", "MI064E000.7U", "MI064E000.7U", "MI064E000.7U",
"MI064E000.7U", "MI064E000.7U", "MI064E000.7U")), row.names = c(NA,
-10L), class = "data.frame")
Agency.Station.ID 在这个输出示例中并不重要,因为它不是来自我上面的数据,但我无法准确获得降水量。它正在获取日期。任何人都可以帮助表达所有的感激和感激吗?
我认为如果您先重命名天气数据的列,那将是最简单的。 (顺便说一下,你不应该有多个同名的列。)然后你可以从宽数据框切换到长数据框。
weather_data<- structure(list(`0` = c(20150310L, 20150310L, 20150310L, 20150310L,
20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 20150310L,
20150310L), `0` = c(200L, 205L, 210L, 215L, 220L, 225L, 230L,
235L, 240L, 245L, 250L), `1` = c(2.96667242050171, 4.08907222747803,
3.34765760600567, 2.70875519514084, 4.04301458597183, 3.277907371521,
1.19018948078156, 8.01013946533203, 3.17771905660629, 4.08452725410461,
6.84288668632507), `2` = c(3.66441679000854, 3.8880712389946,
5.4497694671154, 1.65072797238827, 1.22804968059063, 2.80022418498993,
2.09879478812218, 7.83757948875427, 3.06818120181561, 3.02766072750092,
2.10350251197815), `3` = c(0, 3.77445685863495, 0.66255909204483,
0.979387938976288, 1.84569960832596, 1.28901898860931, 1.75025188922882,
3.86244678497314, 2.02710714936256, 1.35766339302063, 5.03071141242981
), `4` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `5` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783,
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `6` = c(0, 0, 0, 0.255719035863876, 1.27965438365936, 2.14472162723541,
1.15851271152496, 4.7572363615036, 2.20618277788162, 0.508674263954163,
6.12584185600281), `7` = c(0, 0, 0.858640074729919, 1.97637134790421,
1.29098010063171, 3.4027111530304, 0.723613187670708, 3.21064579486847,
2.03270196914673, 2.11455392837524, 1.08344982564449), `8` = c(0,
0, 0, 0, 0, 2.42211884260178, 0, 0, 0, 4.46288549900055, 0)), row.names = 85:95, class = "data.frame")
sensors<- structure(list(Agency.Station.ID = c("MI064E000.7U", "MI064E002.0U",
"MI064E003.2U", "MI064E004.6U", "MI064E004.6V", "MI064E005.6U",
"MI064E007.1U", "MI064E008.5U"), ColumnNum = 1:8), row.names = c(NA,
8L), class = "data.frame")
names(weather_data)[1:2] <- c("Date2","Time2")
# make sure the names match up with the correct columns
names(weather_data)[-(1:2)] <- sensors[sensors$ColumnNum[order(names(weather_data)[-(1:2)])], "Agency.Station.ID"]
tidyr::pivot_longer(weather_data, -c(1,2), names_to = "Agency.Station.ID", values_to = "Precipitation")
#> # A tibble: 88 × 4
#> Date2 Time2 Agency.Station.ID Precipitation
#> <int> <int> <chr> <dbl>
#> 1 20150310 200 MI064E000.7U 2.97
#> 2 20150310 200 MI064E002.0U 3.66
#> 3 20150310 200 MI064E003.2U 0
#> 4 20150310 200 MI064E004.6U 0
#> 5 20150310 200 MI064E004.6V 0
#> 6 20150310 200 MI064E005.6U 0
#> 7 20150310 200 MI064E007.1U 0
#> 8 20150310 200 MI064E008.5U 0
#> 9 20150310 205 MI064E000.7U 4.09
#> 10 20150310 205 MI064E002.0U 3.89
#> # … with 78 more rows
由 reprex package (v2.0.1)
创建于 2022-05-31