使用 R 重塑名称存根

using R for reshape with name stubs

我必须将一个用 Stata 编写的旧 do 文件重写到 R 中。我受困于 reshape 命令,希望能得到一些帮助。

数据如下所示:

dat <- structure(list(country_num = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 15, 
                                  15, 15, 15, 15, 15, 15), country = c("Germany", "Germany", "Germany", 
                                                                       "Germany", "Germany", "Germany", "Germany", "Germany", "Germany", 
                                                                       "United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom", 
                                                                       "United Kingdom", "United Kingdom", "United Kingdom"), parlgov_id = c(558, 
                                                                                                                                             808, 543, 791, 772, 1180, 1537, 865, 2253, 1556, 773, 659, 1272, 
                                                                                                                                             1284, 467, 1250), persparty_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 
                                                                                                                                                                                1, 2, 3, 4, 5, 6, 7), parlgovname = c("Sozialdemokratische Partei Deutschlands", 
                                                                                                                                                                                                                      "Christlich Demokratische Union", "Freie Demokratische Partei", 
                                                                                                                                                                                                                      "Die Linke / PDS", "Bündnis 90 / Die Grünen", "Christlich Soziale Union", 
                                                                                                                                                                                                                      "Nationaldemokratische Partei Deutschlands", "Piratenpartei Deutschland", 
                                                                                                                                                                                                                      "Alternative für Deutschland", "Labour Party", "Conservatives", 
                                                                                                                                                                                                                      "Liberals", "United Kingdom Independence Party", "Scottish National Party <U+0096> Pàrtaidh Nàiseanta na h-Alba", 
                                                                                                                                                                                                                      "Green Party", "British National Party"), abrev = c("SPD", "CDU", 
                                                                                                                                                                                                                                                                          "FDP", "Li/PDS", "B90/Gru", "CSU", "NPD", "Pi", "AfD", "Lab", 
                                                                                                                                                                                                                                                                          "Con", "Lib", "UKIP", "SNP", "GP", "BNP"), el_date1 = structure(c(1127001600, 
                                                                                                                                                                                                                                                                                                                                            1127001600, 1127001600, 1127001600, 1127001600, 1127001600, 1127001600, 
                                                                                                                                                                                                                                                                                                                                            NA, NA, 1115251200, 1115251200, 1115251200, 1115251200, 1115251200, 
                                                                                                                                                                                                                                                                                                                                            1115251200, 1115251200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
                  pname1 = c("Social Democratic Party of Germany", "Christian Democratic Union", 
                             "Free Democratic Party", "PDS", "Alliance 90 / Greens", "Christian Social Union", 
                             "National Democratic Party", NA, NA, "Labour", "Conservatives", 
                             "Liberals", "United Kingdom Independence Party", "Scottish National Party", 
                             "Green Party", "British National Party"), vote1 = c(34.2000007629395, 
                                                                                 27.7999992370605, 9.80000019073486, 8.69999980926514, 8.10000038146973, 
                                                                                 7.40000009536743, 1.60000002384186, NA, NA, 35.2000007629395, 
                                                                                 32.4000015258789, 22, 2.20000004768372, 1.5, 1, 0.7), not_run2 = c(NA, 
                                                                                                                                                    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
                                                                                 ), new2 = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 
                                                                                             NA, NA, NA, NA), disbanded2 = c(NA_real_, NA_real_, NA_real_, 
                                                                                                                             NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                             NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                             NA_real_), splinter2 = c(NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                                                      NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                                                      NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
                                                                                                                             ), merged2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                                                                                                                            NA, NA, NA, NA, NA), joint2 = c(NA_real_, NA_real_, NA_real_, 
                                                                                                                                                                            NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                                                                            NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                                                                            NA_real_), merged_new_name2 = c(NA, NA, NA, 1, NA, NA, NA, 
                                                                                                                                                                                                            NA, NA, NA, NA, NA, NA, NA, NA, NA), joint_new_name2 = c(NA_real_, 
                                                                                                                                                                                                                                                                     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                                                                                                                                                                     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
                                                                                                                                                                                                                                                                     NA_real_, NA_real_, NA_real_), not_coded2 = c(NA, NA, NA, 
                                                                                                                                                                                                                                                                                                                   NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), note2 = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                  NA, NA, "PDS and part of the SPD", NA, NA, NA, NA, NA, NA, 
                                                                                                                                                                                                                                                                                                                                                                                  NA, NA, NA, NA, NA, NA), el_date2 = structure(c(1254009600, 
                                                                                                                                                                                                                                                                                                                                                                                                                                  1254009600, 1254009600, 1254009600, 1254009600, 1254009600, 
                                                                                                                                                                                                                                                                                                                                                                                                                                  1254009600, 1254009600, NA, 1273104000, 1273104000, 1273104000, 
                                                                                                                                                                                                                                                                                                                                                                                                                                  1273104000, 1273104000, 1273104000, 1273104000), class = c("POSIXct", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             "POSIXt"), tzone = "UTC"), pname2 = c("Social Democratic Party of Germany", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   "Christian Democratic Union", "Free Democratic Party", "Die Linke", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   "Alliance 90 / Greens", "Christian Social Union", "National Democratic Party", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   "German Pirate Party", NA, "Labour", "Conservatives", "Liberals", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   "United Kingdom Independence Party", "Scottish National Party", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   "Green Party", "British National Party"), vote2 = c(23, 27.2999992370605, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       14.6000003814697, 11.8999996185303, 10.6999998092651, 6.5, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       1.5, 2, NA, 29, 36.0999984741211, 23, 3.09999990463257, 1.70000004768372, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       1, 1.89999997615814)), row.names = c(NA, -16L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 "tbl", "data.frame"))

抱歉代码太长了,我知道它看起来很难看,这就是重点。我必须将它重塑成类似的东西:

    dat <- structure(list(country_num = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 
15, 15, 15, 15), country = c("Germany", "Germany", "Germany", 
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany", 
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany", 
"Germany", "Germany", "Germany", "United Kingdom", "United Kingdom", 
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom", 
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom", 
"United Kingdom", "United Kingdom", "United Kingdom", "United Kingdom"
), persparty_id = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 
8, 8, 9, 9, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7), parlgov_id = c(558, 
558, 808, 808, 543, 543, 791, 791, 772, 772, 1180, 1180, 1537, 
1537, 865, 865, 2253, 2253, 1556, 1556, 773, 773, 659, 659, 1272, 
1272, 1284, 1284, 467, 467, 1250, 1250), parlgovname = c("Sozialdemokratische Partei Deutschlands", 
"Sozialdemokratische Partei Deutschlands", "Christlich Demokratische Union", 
"Christlich Demokratische Union", "Freie Demokratische Partei", 
"Freie Demokratische Partei", "Die Linke / PDS", "Die Linke / PDS", 
"Bündnis 90 / Die Grünen", "Bündnis 90 / Die Grünen", "Christlich Soziale Union", 
"Christlich Soziale Union", "Nationaldemokratische Partei Deutschlands", 
"Nationaldemokratische Partei Deutschlands", "Piratenpartei Deutschland", 
"Piratenpartei Deutschland", "Alternative für Deutschland", "Alternative für Deutschland", 
"Labour Party", "Labour Party", "Conservatives", "Conservatives", 
"Liberals", "Liberals", "United Kingdom Independence Party", 
"United Kingdom Independence Party", "Scottish National Party <U+0096> Pàrtaidh Nàiseanta na h-Alba", 
"Scottish National Party <U+0096> Pàrtaidh Nàiseanta na h-Alba", 
"Green Party", "Green Party", "British National Party", "British National Party"
), abrev = c("SPD", "SPD", "CDU", "CDU", "FDP", "FDP", "Li/PDS", 
"Li/PDS", "B90/Gru", "B90/Gru", "CSU", "CSU", "NPD", "NPD", "Pi", 
"Pi", "AfD", "AfD", "Lab", "Lab", "Con", "Con", "Lib", "Lib", 
"UKIP", "UKIP", "SNP", "SNP", "GP", "GP", "BNP", "BNP"), long_id = c(1, 
2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 
1, 2, 1, 2, 1, 2, 1, 2, 1, 2), not_run = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), new = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), disbanded = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    splinter = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), merged = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), joint = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), merged_new_name = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), joint_new_name = c(NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), not_coded = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), note = c(NA, NA, NA, NA, NA, 
    NA, NA, "PDS and part of the SPD", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), el_date = structure(c(1127001600, 1254009600, 
    1127001600, 1254009600, 1127001600, 1254009600, 1127001600, 
    1254009600, 1127001600, 1254009600, 1127001600, 1254009600, 
    1127001600, 1254009600, NA, 1254009600, NA, NA, 1115251200, 
    1273104000, 1115251200, 1273104000, 1115251200, 1273104000, 
    1115251200, 1273104000, 1115251200, 1273104000, 1115251200, 
    1273104000, 1115251200, 1273104000), class = c("POSIXct", 
    "POSIXt"), tzone = "UTC"), pname = c("Social Democratic Party of Germany", 
    "Social Democratic Party of Germany", "Christian Democratic Union", 
    "Christian Democratic Union", "Free Democratic Party", "Free Democratic Party", 
    "PDS", "Die Linke", "Alliance 90 / Greens", "Alliance 90 / Greens", 
    "Christian Social Union", "Christian Social Union", "National Democratic Party", 
    "National Democratic Party", NA, "German Pirate Party", NA, 
    NA, "Labour", "Labour", "Conservatives", "Conservatives", 
    "Liberals", "Liberals", "United Kingdom Independence Party", 
    "United Kingdom Independence Party", "Scottish National Party", 
    "Scottish National Party", "Green Party", "Green Party", 
    "British National Party", "British National Party"), vote = c(34.2000007629395, 
    23, 27.7999992370605, 27.2999992370605, 9.80000019073486, 
    14.6000003814697, 8.69999980926514, 11.8999996185303, 8.10000038146973, 
    10.6999998092651, 7.40000009536743, 6.5, 1.60000002384186, 
    1.5, NA, 2, NA, NA, 35.2000007629395, 29, 32.4000015258789, 
    36.0999984741211, 22, 23, 2.20000004768372, 3.09999990463257, 
    1.5, 1.70000004768372, 1, 1, 0.7, 1.89999997615814)), row.names = c(NA, 
-32L), class = c("tbl_df", "tbl", "data.frame"))

在 stata 中,代码为:

reshape long el_date pname vote not_run new disbanded splinter merged joint merged_new_name joint_new_name not_coded note, i(country_num country persparty_id parlgov_id parlgovname abrev) j(long_id)

在 R 中,我尝试了以下方法:

cols_to_reshape <- colnames(dat)[7:74]

long <- dat %>% 
  gather(variable, value, cols_to_reshape)

我也试过类似的东西:

 gather(dfwide, variable, value, starts_with("stub"))

基于this suggestion,但显然starts_with不适用于矢量,所以我有点卡住了。如果有任何帮助,我将不胜感激!

这应该可以解决问题:

dat %>% 
  pivot_longer(cols = el_date1:vote2,
               names_pattern = "^(\w+)([12])$",
               names_to = c(".value", "long_id"))

这使用了 tidyr 中的新 pivot_longer 函数。它从列名中提取名称(\w+)和数字([12]),并将它们分别存储为列名和long_id