inner_join R 中数据列表的函数(使用循环)(如 excel 中的 vlookup)

inner_join function from datalist in R (using loop)(like vlookup in excel)

我有1000个县名数据。 (ok_field) 此外,还有 1 到 10 天的天气数据。 (m) 这个数据是一个数据表。

(数据量太大,请以下面的数据为例,不胜感激!)

ok_field<-structure(list(state = c("oklahoma", "oklahoma", "oklahoma", 
                         "oklahoma", "oklahoma", "oklahoma", "oklahoma", "oklahoma", "oklahoma", 
                         "oklahoma"), county = c("Texas", "Texas", "Texas", "Texas", "Cimarron", 
                                                 "Cimarron", "Texas", "Texas", "Texas", "Texas")), row.names = c(NA, 
                                                                                                                 10L), class = "data.frame")

> ok_field
      state   county
1  oklahoma    Texas
2  oklahoma    Texas
3  oklahoma    Texas
4  oklahoma    Texas
5  oklahoma Cimarron
6  oklahoma Cimarron
7  oklahoma    Texas
8  oklahoma    Texas
9  oklahoma    Texas
10 oklahoma    Texas

m <- list(`1` = structure(list(DAY = c(15, 15, 15, 15, 15, 15, 15, 
                                       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
                                       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
                                       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
                                       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
                                       15, 15, 15, 15, 15, 15), county = c("Adair", "Alfalfa", "Atoka", 
                                                                           "Beaver", "Beckham", "Blaine", "Bryan", "Caddo", "Canadian", 
                                                                           "Carter", "Cherokee", "Choctaw", "Cimarron", "Cleveland", "Coal", 
                                                                           "Comanche", "Cotton", "Craig", "Creek", "Custer", "Delaware", 
                                                                           "Dewey", "Ellis", "Garfield", "Garvin", "Grady", "Grant", "Greer", 
                                                                           "Harmon", "Harper", "Haskell", "Hughes", "Jackson", "Jefferson", 
                                                                           "Johnston", "Kay", "Kingfisher", "Kiowa", "Latimer", "Le Flore", 
                                                                           "Lincoln", "Logan", "Love", "Major", "Marshall", "Mayes", "McClain", 
                                                                           "McCurtain", "McIntosh", "Murray", "Muskogee", "Noble", "Nowata", 
                                                                           "Okfuskee", "Oklahoma", "Okmulgee", "Osage", "Ottawa", "Pawnee", 
                                                                           "Payne", "Pittsburg", "Pontotoc", "Pottawatomie", "Pushmataha", 
                                                                           "Roger Mills", "Rogers", "Seminole", "Sequoyah", "Stephens", 
                                                                           "Texas", "Tillman", "Tulsa", "Wagoner", "Washington", "Washita", 
                                                                           "Woods", "Woodward"), TAVG_C = c(27.6888888888889, 31.1388888888889, 
                                                                                                            28.6777777777778, 30.2027777777778, 28.8111111111111, 30.25, 
                                                                                                            28.1111111111111, 29.4851851851852, 29.3055555555556, 28.7972222222222, 
                                                                                                            27.8805555555556, 29.1722222222222, 26.8166666666667, 28.8444444444444, 
                                                                                                            28.9222222222222, 29.1388888888889, 30.0722222222222, 27.4222222222222, 
                                                                                                            28.1611111111111, 29.8638888888889, 28.1277777777778, 29.55, 
                                                                                                            28.9888888888889, 29.4166666666667, 28.4666666666667, 29.212962962963, 
                                                                                                            29.9888888888889, 29.7888888888889, 29.9611111111111, 31.8777777777778, 
                                                                                                            28.3833333333333, 27.7, 29.3, 29.8277777777778, 28.1055555555556, 
                                                                                                            29.0027777777778, 31.3444444444444, 30.2666666666667, 29.1111111111111, 
                                                                                                            28.4805555555556, 28.0777777777778, 29.2361111111111, 29.7888888888889, 
                                                                                                            30.7777777777778, 28.7055555555556, 27.9388888888889, 27.7388888888889, 
                                                                                                            28.3111111111111, 29.15, 27.6, 28.4055555555556, 29.3666666666667, 
                                                                                                            28.6555555555556, 28.3, 28.4416666666667, 28.1666666666667, 27.9083333333333, 
                                                                                                            28.2888888888889, 28.6888888888889, 28.4069444444444, 28.5944444444444, 
                                                                                                            28.2222222222222, 28.4777777777778, 28.3259259259259, 28.0055555555556, 
                                                                                                            28.2138888888889, 27.6, 28.75, 29.0777777777778, 29.4555555555556, 
                                                                                                            30.9111111111111, 29.4166666666667, 28.4222222222222, 27.9, 30.0333333333333, 
                                                                                                            30.6944444444444, 30.1907407407407)), row.names = c(NA, -77L), class = "data.frame"), 
          `2` = structure(list(DAY = c(16, 16, 16, 16, 16, 16, 16, 
                                       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
                                       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
                                       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
                                       16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
                                       16, 16, 16, 16, 16, 16, 16, 16, 16, 16), county = c("Adair", 
                                                                                           "Alfalfa", "Atoka", "Beaver", "Beckham", "Blaine", "Bryan", 
                                                                                           "Caddo", "Canadian", "Carter", "Cherokee", "Choctaw", "Cimarron", 
                                                                                           "Cleveland", "Coal", "Comanche", "Cotton", "Craig", "Creek", 
                                                                                           "Custer", "Delaware", "Dewey", "Ellis", "Garfield", "Garvin", 
                                                                                           "Grady", "Grant", "Greer", "Harmon", "Harper", "Haskell", 
                                                                                           "Hughes", "Jackson", "Jefferson", "Johnston", "Kay", "Kingfisher", 
                                                                                           "Kiowa", "Latimer", "Le Flore", "Lincoln", "Logan", "Love", 
                                                                                           "Major", "Marshall", "Mayes", "McClain", "McCurtain", "McIntosh", 
                                                                                           "Murray", "Muskogee", "Noble", "Nowata", "Okfuskee", "Oklahoma", 
                                                                                           "Okmulgee", "Osage", "Ottawa", "Pawnee", "Payne", "Pittsburg", 
                                                                                           "Pontotoc", "Pottawatomie", "Pushmataha", "Roger Mills", 
                                                                                           "Rogers", "Seminole", "Sequoyah", "Stephens", "Texas", "Tillman", 
                                                                                           "Tulsa", "Wagoner", "Washington", "Washita", "Woods", "Woodward"
                                       ), TAVG_C = c(27.75, 30.0333333333333, 28.65, 27.1083333333333, 
                                                     28.6583333333333, 29.1722222222222, 28.2277777777778, 28.5814814814815, 
                                                     28.5055555555556, 28.4083333333333, 27.9833333333333, 29.2388888888889, 
                                                     22.9194444444444, 28.2333333333333, 29.0333333333333, 28.3833333333333, 
                                                     29.3944444444444, 27.2166666666667, 27.425, 28.9861111111111, 
                                                     27.9166666666667, 28.8083333333333, 27.65, 28.5833333333333, 
                                                     28.1388888888889, 28.5759259259259, 29.2444444444444, 29.6611111111111, 
                                                     29.9444444444444, 29.8444444444444, 28.4222222222222, 27.7, 
                                                     29.5777777777778, 29.2833333333333, 28.0388888888889, 28.3805555555556, 
                                                     30.2833333333333, 29.6722222222222, 29.15, 28.9416666666667, 
                                                     27.85, 28.5638888888889, 29.2944444444444, 29.6694444444444, 
                                                     28.6111111111111, 27.9111111111111, 27.2277777777778, 28.7277777777778, 
                                                     29.1, 27.6055555555556, 28.4916666666667, 28.7555555555556, 
                                                     28.5166666666667, 28.2055555555556, 27.8138888888889, 28.0444444444444, 
                                                     27.7361111111111, 28.1666666666667, 28.2944444444444, 27.9152777777778, 
                                                     28.6638888888889, 27.9527777777778, 28.2611111111111, 28.9777777777778, 
                                                     27.2888888888889, 28.2527777777778, 27.35, 28.9055555555556, 
                                                     28.4944444444445, 24.8185185185185, 30.4555555555556, 29.4138888888889, 
                                                     28.6333333333333, 27.9388888888889, 29.35, 29.4777777777778, 
                                                     29.3074074074074)), row.names = c(NA, -77L), class = "data.frame"))

>m
$`1`
   DAY       county   TAVG_C
1   15        Adair 27.68889
2   15      Alfalfa 31.13889
3   15        Atoka 28.67778
4   15       Beaver 30.20278
5   15      Beckham 28.81111
6   15       Blaine 30.25000
7   15        Bryan 28.11111
8   15        Caddo 29.48519
9   15     Canadian 29.30556
10  15       Carter 28.79722
11  15     Cherokee 27.88056
12  15      Choctaw 29.17222
13  15     Cimarron 26.81667
14  15    Cleveland 28.84444
15  15         Coal 28.92222
16  15     Comanche 29.13889
17  15       Cotton 30.07222
18  15        Craig 27.42222
19  15        Creek 28.16111
20  15       Custer 29.86389
21  15     Delaware 28.12778
22  15        Dewey 29.55000
23  15        Ellis 28.98889
24  15     Garfield 29.41667
25  15       Garvin 28.46667
26  15        Grady 29.21296
27  15        Grant 29.98889
28  15        Greer 29.78889
29  15       Harmon 29.96111
30  15       Harper 31.87778
31  15      Haskell 28.38333
32  15       Hughes 27.70000
33  15      Jackson 29.30000
34  15    Jefferson 29.82778
35  15     Johnston 28.10556
36  15          Kay 29.00278
37  15   Kingfisher 31.34444
38  15        Kiowa 30.26667
39  15      Latimer 29.11111
40  15     Le Flore 28.48056
41  15      Lincoln 28.07778
42  15        Logan 29.23611
43  15         Love 29.78889
44  15        Major 30.77778
45  15     Marshall 28.70556
46  15        Mayes 27.93889
47  15      McClain 27.73889
48  15    McCurtain 28.31111
49  15     McIntosh 29.15000
50  15       Murray 27.60000
51  15     Muskogee 28.40556
52  15        Noble 29.36667
53  15       Nowata 28.65556
54  15     Okfuskee 28.30000
55  15     Oklahoma 28.44167
56  15     Okmulgee 28.16667
57  15        Osage 27.90833
58  15       Ottawa 28.28889
59  15       Pawnee 28.68889
60  15        Payne 28.40694
61  15    Pittsburg 28.59444
62  15     Pontotoc 28.22222
63  15 Pottawatomie 28.47778
64  15   Pushmataha 28.32593
65  15  Roger Mills 28.00556
66  15       Rogers 28.21389
67  15     Seminole 27.60000
68  15     Sequoyah 28.75000
69  15     Stephens 29.07778
70  15        Texas 29.45556
71  15      Tillman 30.91111
72  15        Tulsa 29.41667
73  15      Wagoner 28.42222
74  15   Washington 27.90000
75  15      Washita 30.03333
76  15        Woods 30.69444
77  15     Woodward 30.19074

$`2`
   DAY       county   TAVG_C
1   16        Adair 27.75000
2   16      Alfalfa 30.03333
3   16        Atoka 28.65000
4   16       Beaver 27.10833
5   16      Beckham 28.65833
6   16       Blaine 29.17222
7   16        Bryan 28.22778
8   16        Caddo 28.58148
9   16     Canadian 28.50556
10  16       Carter 28.40833
11  16     Cherokee 27.98333
12  16      Choctaw 29.23889
13  16     Cimarron 22.91944
14  16    Cleveland 28.23333
15  16         Coal 29.03333
16  16     Comanche 28.38333
17  16       Cotton 29.39444
18  16        Craig 27.21667
19  16        Creek 27.42500
20  16       Custer 28.98611
21  16     Delaware 27.91667
22  16        Dewey 28.80833
23  16        Ellis 27.65000
24  16     Garfield 28.58333
25  16       Garvin 28.13889
26  16        Grady 28.57593
27  16        Grant 29.24444
28  16        Greer 29.66111
29  16       Harmon 29.94444
30  16       Harper 29.84444
31  16      Haskell 28.42222
32  16       Hughes 27.70000
33  16      Jackson 29.57778
34  16    Jefferson 29.28333
35  16     Johnston 28.03889
36  16          Kay 28.38056
37  16   Kingfisher 30.28333
38  16        Kiowa 29.67222
39  16      Latimer 29.15000
40  16     Le Flore 28.94167
41  16      Lincoln 27.85000
42  16        Logan 28.56389
43  16         Love 29.29444
44  16        Major 29.66944
45  16     Marshall 28.61111
46  16        Mayes 27.91111
47  16      McClain 27.22778
48  16    McCurtain 28.72778
49  16     McIntosh 29.10000
50  16       Murray 27.60556
51  16     Muskogee 28.49167
52  16        Noble 28.75556
53  16       Nowata 28.51667
54  16     Okfuskee 28.20556
55  16     Oklahoma 27.81389
56  16     Okmulgee 28.04444
57  16        Osage 27.73611
58  16       Ottawa 28.16667
59  16       Pawnee 28.29444
60  16        Payne 27.91528
61  16    Pittsburg 28.66389
62  16     Pontotoc 27.95278
63  16 Pottawatomie 28.26111
64  16   Pushmataha 28.97778
65  16  Roger Mills 27.28889
66  16       Rogers 28.25278
67  16     Seminole 27.35000
68  16     Sequoyah 28.90556
69  16     Stephens 28.49444
70  16        Texas 24.81852
71  16      Tillman 30.45556
72  16        Tulsa 29.41389
73  16      Wagoner 28.63333
74  16   Washington 27.93889
75  16      Washita 29.35000
76  16        Woods 29.47778
77  16     Woodward 29.30741

和Excel中的vlookup一样,想把符合县​​名的天气数据按日期合并。 我可以使用此代码每 1 天合并一次,但我想使用循环合并 1-10 天的天气数据。

z <- inner_join(ok_field, m$`1`,by="county",copy=TRUE)
county DAY TAVG_C
Woods 15 30.69444444
Alfalfa 15 31.13888889
Alfalfa 15 31.13888889
Grant 15 29.98888889
Alfalfa 15 31.13888889
Major 15 30.77777778

然而,我想要的table如下

county TAVG_C_15 TAVG_C_16
Woods 30.69444444 24.81852
Alfalfa 31.13888889 22.91944
Alfalfa 31.13888889 24.81852
Grant 29.98888889 22.91944
Alfalfa 31.13888889 22.91944
Major 30.77777778 24.81852

我试过的代码是下面的代码。

1.

look <- data.frame(matrix(nrow=(length(field$Id)),ncol=nrow(A)))
for (i in 1:nrow(A)){
  look[,i]<-ok_field[list(m[i]), on="county", nomatch=0]
}
for (i in 1:nrow(A)){
  look[,i] <- inner_join(ok_field, (m[i]),by="county",copy=TRUE)
  }

如能帮助我,不胜感激!

这使用一个循环来填充您想要的数据框。

ok_field=data.frame(county=c("Woods", "Alfalfa", "Grant", "Major"))
for (i in 1:length(m)) {
  ok_field=inner_join(ok_field, pivot_wider(m[[i]], names_from=DAY, names_prefix="TAVG_C", values_from=TAVG_C), by="county")
}

   county TAVG_C_15 TAVG_C_16
1   Woods  30.69444  29.47778
2 Alfalfa  31.13889  30.03333
3   Grant  29.98889  29.24444
4   Major  30.77778  29.66944

进一步编辑

使用 purrr 轻松完成。

  • 第 1 步:更改列名称
  • 第 2 步:删除 day
  • 第三步:最后应用合并
  • step-4:用ok_field连接数据相对容易。跳过那个代码。 (提示:使用 merge
#1
m <- map(m, ~setNames(.x, paste(colnames(.x), c("", "", .x[1,1]), sep = "_")))
#2
m <- map(m, ~ .x[2:3])
#3
purrr::reduce(m, ~merge(.x, .y, by = "county_"))
  county_ TAVG_C_15 TAVG_C_16  TAVG_C_17 TAVG_C_18 TAVG_C_19
1   Adair 0.4512674 0.6363238 0.81542154 0.6032407 0.5054940
2 Alfalfa 0.7837798 0.7013460 0.98298687 0.5674534 0.8675381
3   Atoka 0.7096822 0.6404389 0.02726706 0.8200528 0.9581816
4  Beaver 0.3817443 0.2666797 0.83749040 0.2515713 0.5456977

#创建的样本数据(较小的一个)

set.seed(2021)
m <- list(`1` = structure(list(DAY = rep(15,4), 
                               county = c("Adair", "Alfalfa", "Atoka", "Beaver"), 
                               TAVG_C = runif(4)), 
                          row.names = c(NA, -4L), 
                          class = "data.frame"),
          `2` = structure(list(DAY = rep(16,4), 
                               county = c("Adair", "Alfalfa", "Atoka", "Beaver"), 
                               TAVG_C = runif(4)), 
                          row.names = c(NA, -4L), 
                          class = "data.frame"),
          `3` = structure(list(DAY = rep(17,4), 
                               county = c("Adair", "Alfalfa", "Atoka", "Beaver"), 
                               TAVG_C = runif(4)), 
                          row.names = c(NA, -4L), 
                          class = "data.frame"),
          `4` = structure(list(DAY = rep(18,4), 
                               county = c("Adair", "Alfalfa", "Atoka", "Beaver"), 
                               TAVG_C = runif(4)), 
                          row.names = c(NA, -4L), 
                          class = "data.frame"),
          `5` = structure(list(DAY = rep(19,4), 
                               county = c("Adair", "Alfalfa", "Atoka", "Beaver"), 
                               TAVG_C = runif(4)), 
                          row.names = c(NA, -4L), 
                          class = "data.frame"))

Edit 使用此语法,将按预期加入您的数据库。但是,您只需更改第一天(在列表中)的 colname。

purrr::reduce(m, ~merge(.x, .y, by = "county", suffixes = paste0("_", c(.x[1, (length(.x)-1)], .y[1,(length(.y)-2)]))))

         county DAY_Adair TAVG_C_Adair DAY_16 TAVG_C_16
1         Adair        15     27.68889     16  27.75000
2       Alfalfa        15     31.13889     16  30.03333
3         Atoka        15     28.67778     16  28.65000
4        Beaver        15     30.20278     16  27.10833
5       Beckham        15     28.81111     16  28.65833
6        Blaine        15     30.25000     16  29.17222
7         Bryan        15     28.11111     16  28.22778
8         Caddo        15     29.48519     16  28.58148
9      Canadian        15     29.30556     16  28.50556
10       Carter        15     28.79722     16  28.40833
11     Cherokee        15     27.88056     16  27.98333
12      Choctaw        15     29.17222     16  29.23889
13     Cimarron        15     26.81667     16  22.91944
14    Cleveland        15     28.84444     16  28.23333
15         Coal        15     28.92222     16  29.03333
16     Comanche        15     29.13889     16  28.38333
17       Cotton        15     30.07222     16  29.39444
18        Craig        15     27.42222     16  27.21667
19        Creek        15     28.16111     16  27.42500
20       Custer        15     29.86389     16  28.98611
21     Delaware        15     28.12778     16  27.91667
22        Dewey        15     29.55000     16  28.80833
23        Ellis        15     28.98889     16  27.65000
24     Garfield        15     29.41667     16  28.58333
25       Garvin        15     28.46667     16  28.13889
26        Grady        15     29.21296     16  28.57593
27        Grant        15     29.98889     16  29.24444
28        Greer        15     29.78889     16  29.66111
29       Harmon        15     29.96111     16  29.94444
30       Harper        15     31.87778     16  29.84444
31      Haskell        15     28.38333     16  28.42222
32       Hughes        15     27.70000     16  27.70000
33      Jackson        15     29.30000     16  29.57778
34    Jefferson        15     29.82778     16  29.28333
35     Johnston        15     28.10556     16  28.03889
36          Kay        15     29.00278     16  28.38056
37   Kingfisher        15     31.34444     16  30.28333
38        Kiowa        15     30.26667     16  29.67222
39      Latimer        15     29.11111     16  29.15000
40     Le Flore        15     28.48056     16  28.94167
41      Lincoln        15     28.07778     16  27.85000
42        Logan        15     29.23611     16  28.56389
43         Love        15     29.78889     16  29.29444
44        Major        15     30.77778     16  29.66944
45     Marshall        15     28.70556     16  28.61111
46        Mayes        15     27.93889     16  27.91111
47      McClain        15     27.73889     16  27.22778
48    McCurtain        15     28.31111     16  28.72778
49     McIntosh        15     29.15000     16  29.10000
50       Murray        15     27.60000     16  27.60556
51     Muskogee        15     28.40556     16  28.49167
52        Noble        15     29.36667     16  28.75556
53       Nowata        15     28.65556     16  28.51667
54     Okfuskee        15     28.30000     16  28.20556
55     Oklahoma        15     28.44167     16  27.81389
56     Okmulgee        15     28.16667     16  28.04444
57        Osage        15     27.90833     16  27.73611
58       Ottawa        15     28.28889     16  28.16667
59       Pawnee        15     28.68889     16  28.29444
60        Payne        15     28.40694     16  27.91528
61    Pittsburg        15     28.59444     16  28.66389
62     Pontotoc        15     28.22222     16  27.95278
63 Pottawatomie        15     28.47778     16  28.26111
64   Pushmataha        15     28.32593     16  28.97778
65  Roger Mills        15     28.00556     16  27.28889
66       Rogers        15     28.21389     16  28.25278
67     Seminole        15     27.60000     16  27.35000
68     Sequoyah        15     28.75000     16  28.90556
69     Stephens        15     29.07778     16  28.49444
70        Texas        15     29.45556     16  24.81852
71      Tillman        15     30.91111     16  30.45556
72        Tulsa        15     29.41667     16  29.41389
73      Wagoner        15     28.42222     16  28.63333
74   Washington        15     27.90000     16  27.93889
75      Washita        15     30.03333     16  29.35000
76        Woods        15     30.69444     16  29.47778
77     Woodward        15     30.19074     16  29.30741