在 R 中将汇总数据重塑为垂直整齐的数据
Reshape summarized data into vertical and tidy one in R
我从 excel 文件中复制了一份汇总数据,然后 dput
如下:
df <- structure(list(date = c(NA, NA, 2018L, 2019L, 2020L), area = structure(c(4L,
5L, 3L, 1L, 2L), .Label = c("1573.98", "1574.95", "1580.86",
"bj", "unit:m2"), class = "factor"), X = structure(c(4L, 5L,
3L, 2L, 1L), .Label = c("1831.15", "1871.61", "1927.95", "sh",
"unit:m2"), class = "factor"), X.1 = structure(c(4L, 5L, 3L,
2L, 1L), .Label = c("519.82", "529.47", "532.24", "tj", "unit:m2"
), class = "factor"), price = structure(c(4L, 5L, 1L, 3L, 2L), .Label = c("20.67",
"4.69", "7.49", "bj", "unit:dollar"), class = "factor"), X.2 = structure(c(4L,
5L, 3L, 2L, 1L), .Label = c("19.34", "21.99", "34.6", "sh", "unit:dollar"
), class = "factor"), X.3 = structure(c(4L, 5L, 3L, 2L, 1L), .Label = c("0.65",
"2.76", "2.96", "tj", "unit:dollar"), class = "factor")), class = "data.frame", row.names = c(NA,
-5L))
我怎样才能将它从汇总数据重塑为整洁的垂直数据,如下所示:
structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("bj",
"sh", "tj"), class = "factor"), date = c(2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L), type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("area", "price"), class = "factor"), values = c(1580.86,
1927.95, 532.24, 1573.98, 1871.61, 529.47, 1574.95, 1831.15,
519.82, 20.67, 34.6, 2.96, 7.49, 21.99, 2.76, 4.69, 19.34, 0.65
)), class = "data.frame", row.names = c(NA, -18L))
structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("bj",
"sh", "tj"), class = "factor"), date = c(2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L), type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("area", "price"), class = "factor"), values = c(1580.86,
1927.95, 532.24, 1573.98, 1871.61, 529.47, 1574.95, 1831.15,
519.82, 20.67, 34.6, 2.96, 7.49, 21.99, 2.76, 4.69, 19.34, 0.65
)), class = "data.frame", row.names = c(NA, -18L))
谢谢。
library(tidyverse)
df <- df[-c(1:2), ] ## remove the two rows of headers
## create unique column names with consistent separator
names(df) <- c("date", "bj_area", "sh_area", "tj_area", "bj_price", "sh_price", "tj_price")
## gather the areas variable and separate them by the separator to city and type
d1 <- df %>% gather(type, values, bj_area:tj_area) %>% separate(type, c("city", "type"), sep="_") %>% select(city, date, type, values)
## gather the price variable and separate them by the separator to city and type
d2 <- df %>% gather(type, values, bj_price:tj_price) %>% separate(type, c("city", "type"), sep="_") %>% select(city, date, type, values)
# rowbind the two datasets
do.call(rbind, list(d1, d2))
city date type values
1 bj 2018 area 1580.86
2 bj 2019 area 1573.98
3 bj 2020 area 1574.95
4 sh 2018 area 1927.95
5 sh 2019 area 1871.61
6 sh 2020 area 1831.15
7 tj 2018 area 532.24
8 tj 2019 area 529.47
9 tj 2020 area 519.82
10 bj 2018 price 20.67
11 bj 2019 price 7.49
12 bj 2020 price 4.69
13 sh 2018 price 34.6
14 sh 2019 price 21.99
15 sh 2020 price 19.34
16 tj 2018 price 2.96
17 tj 2019 price 2.76
18 tj 2020 price 0.65
我从 excel 文件中复制了一份汇总数据,然后 dput
如下:
df <- structure(list(date = c(NA, NA, 2018L, 2019L, 2020L), area = structure(c(4L,
5L, 3L, 1L, 2L), .Label = c("1573.98", "1574.95", "1580.86",
"bj", "unit:m2"), class = "factor"), X = structure(c(4L, 5L,
3L, 2L, 1L), .Label = c("1831.15", "1871.61", "1927.95", "sh",
"unit:m2"), class = "factor"), X.1 = structure(c(4L, 5L, 3L,
2L, 1L), .Label = c("519.82", "529.47", "532.24", "tj", "unit:m2"
), class = "factor"), price = structure(c(4L, 5L, 1L, 3L, 2L), .Label = c("20.67",
"4.69", "7.49", "bj", "unit:dollar"), class = "factor"), X.2 = structure(c(4L,
5L, 3L, 2L, 1L), .Label = c("19.34", "21.99", "34.6", "sh", "unit:dollar"
), class = "factor"), X.3 = structure(c(4L, 5L, 3L, 2L, 1L), .Label = c("0.65",
"2.76", "2.96", "tj", "unit:dollar"), class = "factor")), class = "data.frame", row.names = c(NA,
-5L))
我怎样才能将它从汇总数据重塑为整洁的垂直数据,如下所示:
structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("bj",
"sh", "tj"), class = "factor"), date = c(2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L), type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("area", "price"), class = "factor"), values = c(1580.86,
1927.95, 532.24, 1573.98, 1871.61, 529.47, 1574.95, 1831.15,
519.82, 20.67, 34.6, 2.96, 7.49, 21.99, 2.76, 4.69, 19.34, 0.65
)), class = "data.frame", row.names = c(NA, -18L))
structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("bj",
"sh", "tj"), class = "factor"), date = c(2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2018L, 2018L, 2018L,
2019L, 2019L, 2019L, 2020L, 2020L, 2020L), type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("area", "price"), class = "factor"), values = c(1580.86,
1927.95, 532.24, 1573.98, 1871.61, 529.47, 1574.95, 1831.15,
519.82, 20.67, 34.6, 2.96, 7.49, 21.99, 2.76, 4.69, 19.34, 0.65
)), class = "data.frame", row.names = c(NA, -18L))
谢谢。
library(tidyverse)
df <- df[-c(1:2), ] ## remove the two rows of headers
## create unique column names with consistent separator
names(df) <- c("date", "bj_area", "sh_area", "tj_area", "bj_price", "sh_price", "tj_price")
## gather the areas variable and separate them by the separator to city and type
d1 <- df %>% gather(type, values, bj_area:tj_area) %>% separate(type, c("city", "type"), sep="_") %>% select(city, date, type, values)
## gather the price variable and separate them by the separator to city and type
d2 <- df %>% gather(type, values, bj_price:tj_price) %>% separate(type, c("city", "type"), sep="_") %>% select(city, date, type, values)
# rowbind the two datasets
do.call(rbind, list(d1, d2))
city date type values
1 bj 2018 area 1580.86
2 bj 2019 area 1573.98
3 bj 2020 area 1574.95
4 sh 2018 area 1927.95
5 sh 2019 area 1871.61
6 sh 2020 area 1831.15
7 tj 2018 area 532.24
8 tj 2019 area 529.47
9 tj 2020 area 519.82
10 bj 2018 price 20.67
11 bj 2019 price 7.49
12 bj 2020 price 4.69
13 sh 2018 price 34.6
14 sh 2019 price 21.99
15 sh 2020 price 19.34
16 tj 2018 price 2.96
17 tj 2019 price 2.76
18 tj 2020 price 0.65