有没有办法将数据集的列与 R 中的另一个数据集行匹配

Question

我有这两个数据集：

weather_data<- structure(list(`0` = c(20150310L, 20150310L, 20150310L, 20150310L, 
20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 
20150310L), `0` = c(200L, 205L, 210L, 215L, 220L, 225L, 230L, 
235L, 240L, 245L, 250L), `1` = c(2.96667242050171, 4.08907222747803, 
3.34765760600567, 2.70875519514084, 4.04301458597183, 3.277907371521, 
1.19018948078156, 8.01013946533203, 3.17771905660629, 4.08452725410461, 
6.84288668632507), `2` = c(3.66441679000854, 3.8880712389946, 
5.4497694671154, 1.65072797238827, 1.22804968059063, 2.80022418498993, 
2.09879478812218, 7.83757948875427, 3.06818120181561, 3.02766072750092, 
2.10350251197815), `3` = c(0, 3.77445685863495, 0.66255909204483, 
0.979387938976288, 1.84569960832596, 1.28901898860931, 1.75025188922882, 
3.86244678497314, 2.02710714936256, 1.35766339302063, 5.03071141242981
), `4` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783, 
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `5` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783, 
4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
), `6` = c(0, 0, 0, 0.255719035863876, 1.27965438365936, 2.14472162723541, 
1.15851271152496, 4.7572363615036, 2.20618277788162, 0.508674263954163, 
6.12584185600281), `7` = c(0, 0, 0.858640074729919, 1.97637134790421, 
1.29098010063171, 3.4027111530304, 0.723613187670708, 3.21064579486847, 
2.03270196914673, 2.11455392837524, 1.08344982564449), `8` = c(0, 
0, 0, 0, 0, 2.42211884260178, 0, 0, 0, 4.46288549900055, 0)), row.names = 85:95, class = "data.frame")

和

sensors<- structure(list(Agency.Station.ID = c("MI064E000.7U", "MI064E002.0U", 
"MI064E003.2U", "MI064E004.6U", "MI064E004.6V", "MI064E005.6U", 
"MI064E007.1U", "MI064E008.5U"), ColumnNum = 1:8), row.names = c(NA, 
8L), class = "data.frame")

我想创建一个新的数据框，该数据框是通过将传感器中 ColumnNum 中的数字与天气数据中的相应列相匹配而创建的，并保留天气数据中列 (0,0) 的日期和时间。我尝试了以下代码

lst1 <- lapply(split(sensors$ColumnNum, sensors$Agency.Station.ID),
               function(i)cbind(weather_data[1:2], weather_data[i]))

#adding new columns for each dataframe
colnames <- c("Date2","Time2","Precipitation") 
lst1<- lapply(lst1, setNames, colnames)
#adding the sensors name for each data frame 
lst1 <- Map(cbind, lst1, Agency.Station.ID = names(lst1))
#making a data frame for all the sensors
k <- do.call("rbind", lst1)
remove(lst1, sensors, colnames, weather_data)

结果应该是这样的：

    k <- structure(list(Date2 = c(20150309L, 20150309L, 20150309L, 20150309L, 
20150309L, 20150309L, 20150309L, 20150309L, 20150309L, 20150309L
), Time2 = c(1900L, 1905L, 1910L, 1915L, 1920L, 1925L, 1930L, 
1935L, 1940L, 1945L), Precipitation = c(20150309, 20150309, 20150309, 
20150309, 20150309, 20150309, 20150309, 20150309, 20150309, 20150309
), Agency.Station.ID = c("MI064E000.7U", "MI064E000.7U", "MI064E000.7U", 
"MI064E000.7U", "MI064E000.7U", "MI064E000.7U", "MI064E000.7U", 
"MI064E000.7U", "MI064E000.7U", "MI064E000.7U")), row.names = c(NA, 
-10L), class = "data.frame")

Agency.Station.ID 在这个输出示例中并不重要，因为它不是来自我上面的数据，但我无法准确获得降水量。它正在获取日期。任何人都可以帮助表达所有的感激和感激吗？

Answer 1

我认为如果您先重命名天气数据的列，那将是最简单的。（顺便说一下，你不应该有多个同名的列。）然后你可以从宽数据框切换到长数据框。

weather_data<- structure(list(`0` = c(20150310L, 20150310L, 20150310L, 20150310L, 
                                      20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 20150310L, 
                                      20150310L), `0` = c(200L, 205L, 210L, 215L, 220L, 225L, 230L, 
                                                          235L, 240L, 245L, 250L), `1` = c(2.96667242050171, 4.08907222747803, 
                                                                                           3.34765760600567, 2.70875519514084, 4.04301458597183, 3.277907371521, 
                                                                                           1.19018948078156, 8.01013946533203, 3.17771905660629, 4.08452725410461, 
                                                                                           6.84288668632507), `2` = c(3.66441679000854, 3.8880712389946, 
                                                                                                                      5.4497694671154, 1.65072797238827, 1.22804968059063, 2.80022418498993, 
                                                                                                                      2.09879478812218, 7.83757948875427, 3.06818120181561, 3.02766072750092, 
                                                                                                                      2.10350251197815), `3` = c(0, 3.77445685863495, 0.66255909204483, 
                                                                                                                                                 0.979387938976288, 1.84569960832596, 1.28901898860931, 1.75025188922882, 
                                                                                                                                                 3.86244678497314, 2.02710714936256, 1.35766339302063, 5.03071141242981
                                                                                                                      ), `4` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783, 
                                                                                                                                 4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
                                                                                                                      ), `5` = c(0, 0, 0, 1.86805146932602, 0, 2.28361964225769, 1.64410877227783, 
                                                                                                                                 4.72498226165771, 0.665152907371521, 3.52564084529877, 5.99745666980743
                                                                                                                      ), `6` = c(0, 0, 0, 0.255719035863876, 1.27965438365936, 2.14472162723541, 
                                                                                                                                 1.15851271152496, 4.7572363615036, 2.20618277788162, 0.508674263954163, 
                                                                                                                                 6.12584185600281), `7` = c(0, 0, 0.858640074729919, 1.97637134790421, 
                                                                                                                                                            1.29098010063171, 3.4027111530304, 0.723613187670708, 3.21064579486847, 
                                                                                                                                                            2.03270196914673, 2.11455392837524, 1.08344982564449), `8` = c(0, 
                                                                                                                                                                                                                           0, 0, 0, 0, 2.42211884260178, 0, 0, 0, 4.46288549900055, 0)), row.names = 85:95, class = "data.frame")
sensors<- structure(list(Agency.Station.ID = c("MI064E000.7U", "MI064E002.0U", 
                                               "MI064E003.2U", "MI064E004.6U", "MI064E004.6V", "MI064E005.6U", 
                                               "MI064E007.1U", "MI064E008.5U"), ColumnNum = 1:8), row.names = c(NA, 
                                                                                                                8L), class = "data.frame")

names(weather_data)[1:2] <- c("Date2","Time2")
# make sure the names match up with the correct columns
names(weather_data)[-(1:2)] <- sensors[sensors$ColumnNum[order(names(weather_data)[-(1:2)])], "Agency.Station.ID"]

tidyr::pivot_longer(weather_data, -c(1,2), names_to = "Agency.Station.ID", values_to = "Precipitation")
#> # A tibble: 88 × 4
#>       Date2 Time2 Agency.Station.ID Precipitation
#>       <int> <int> <chr>                     <dbl>
#>  1 20150310   200 MI064E000.7U               2.97
#>  2 20150310   200 MI064E002.0U               3.66
#>  3 20150310   200 MI064E003.2U               0   
#>  4 20150310   200 MI064E004.6U               0   
#>  5 20150310   200 MI064E004.6V               0   
#>  6 20150310   200 MI064E005.6U               0   
#>  7 20150310   200 MI064E007.1U               0   
#>  8 20150310   200 MI064E008.5U               0   
#>  9 20150310   205 MI064E000.7U               4.09
#> 10 20150310   205 MI064E002.0U               3.89
#> # … with 78 more rows

^{由 reprex package (v2.0.1)}

创建于 2022-05-31

有没有办法将数据集的列与 R 中的另一个数据集行匹配

Is there a way to match the columns of a dataset with another dataset rows in R

merge

r

list

lapply

tidyverse