在 R 中将汇总数据重塑为垂直整齐的数据

Question

我从 excel 文件中复制了一份汇总数据，然后 dput 如下：

df <- structure(list(date = c(NA, NA, 2018L, 2019L, 2020L), area = structure(c(4L, 
5L, 3L, 1L, 2L), .Label = c("1573.98", "1574.95", "1580.86", 
"bj", "unit:m2"), class = "factor"), X = structure(c(4L, 5L, 
3L, 2L, 1L), .Label = c("1831.15", "1871.61", "1927.95", "sh", 
"unit:m2"), class = "factor"), X.1 = structure(c(4L, 5L, 3L, 
2L, 1L), .Label = c("519.82", "529.47", "532.24", "tj", "unit:m2"
), class = "factor"), price = structure(c(4L, 5L, 1L, 3L, 2L), .Label = c("20.67", 
"4.69", "7.49", "bj", "unit:dollar"), class = "factor"), X.2 = structure(c(4L, 
5L, 3L, 2L, 1L), .Label = c("19.34", "21.99", "34.6", "sh", "unit:dollar"
), class = "factor"), X.3 = structure(c(4L, 5L, 3L, 2L, 1L), .Label = c("0.65", 
"2.76", "2.96", "tj", "unit:dollar"), class = "factor")), class = "data.frame", row.names = c(NA, 
-5L))

我怎样才能将它从汇总数据重塑为整洁的垂直数据，如下所示：

structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("bj", 
"sh", "tj"), class = "factor"), date = c(2018L, 2018L, 2018L, 
2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2018L, 2018L, 2018L, 
2019L, 2019L, 2019L, 2020L, 2020L, 2020L), type = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("area", "price"), class = "factor"), values = c(1580.86, 
1927.95, 532.24, 1573.98, 1871.61, 529.47, 1574.95, 1831.15, 
519.82, 20.67, 34.6, 2.96, 7.49, 21.99, 2.76, 4.69, 19.34, 0.65
)), class = "data.frame", row.names = c(NA, -18L))
structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("bj", 
"sh", "tj"), class = "factor"), date = c(2018L, 2018L, 2018L, 
2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2018L, 2018L, 2018L, 
2019L, 2019L, 2019L, 2020L, 2020L, 2020L), type = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("area", "price"), class = "factor"), values = c(1580.86, 
1927.95, 532.24, 1573.98, 1871.61, 529.47, 1574.95, 1831.15, 
519.82, 20.67, 34.6, 2.96, 7.49, 21.99, 2.76, 4.69, 19.34, 0.65
)), class = "data.frame", row.names = c(NA, -18L))

谢谢。

Answer 1

library(tidyverse)

df <- df[-c(1:2), ] ## remove the two rows of headers

## create unique column names with consistent separator
names(df) <- c("date", "bj_area", "sh_area", "tj_area", "bj_price", "sh_price", "tj_price")  

## gather the areas variable and separate them by the separator to city and type
d1 <- df %>% gather(type, values, bj_area:tj_area) %>% separate(type, c("city", "type"), sep="_") %>% select(city, date, type, values)

## gather the price variable and separate them by the separator to city and type
d2 <- df %>% gather(type, values, bj_price:tj_price) %>% separate(type, c("city", "type"), sep="_") %>% select(city, date, type, values)

# rowbind the two datasets
do.call(rbind, list(d1, d2))

  city date  type  values
1    bj 2018  area 1580.86
2    bj 2019  area 1573.98
3    bj 2020  area 1574.95
4    sh 2018  area 1927.95
5    sh 2019  area 1871.61
6    sh 2020  area 1831.15
7    tj 2018  area  532.24
8    tj 2019  area  529.47
9    tj 2020  area  519.82
10   bj 2018 price   20.67
11   bj 2019 price    7.49
12   bj 2020 price    4.69
13   sh 2018 price    34.6
14   sh 2019 price   21.99
15   sh 2020 price   19.34
16   tj 2018 price    2.96
17   tj 2019 price    2.76
18   tj 2020 price    0.65

在 R 中将汇总数据重塑为垂直整齐的数据

Reshape summarized data into vertical and tidy one in R

r

reshape

dplyr