制作一个频率列表,将类别连接到该列表
Make a list out of frequencies, concatenating categories to that list
我正在努力适应 ,来自 onyambu:
新数据
dat_in_new <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) east` = c(1269L, 85L), `[0,25) north` = c(364L, 21L
), `[0,25) south` = c(1172L, 97L), `[0,25) west` = c(549L, 49L
), `[25,50) east` = c(0L, 0L), `[25,50) north` = c(0L, 0L), `[25,50) south` = c(0L,
0L), `[25,50) west` = c(0L, 0L), `[25,100) east` = c(579L, 220L
), `[25,100) north` = c(406L, 58L), `[25,100) south` = c(1048L,
316L), `[25,100) west` = c(764L, 131L), `[50,100) east` = c(0L,
0L), `[50,100) north` = c(0L, 0L), `[50,100) south` = c(0L, 0L
), `[50,100) west` = c(0L, 0L), `[100,250) east` = c(441L, 149L
), `[100,250) north` = c(224L, 45L), `[100,250) south` = c(521L,
247L), `[100,250) west` = c(770L, 124L), `[100,500) east` = c(0L,
0L), `[100,500) north` = c(0L, 0L), `[100,500) south` = c(0L,
0L), `[100,500) west` = c(0L, 0L), `[250,500) east` = c(232L,
172L), `[250,500) north` = c(207L, 40L), `[250,500) south` = c(202L,
148L), `[250,500) west` = c(457L, 153L), `[500,1000) east` = c(103L,
0L), `[500,1000) north` = c(185L, 0L), `[500,1000) south` = c(66L,
0L), `[500,1000) west` = c(200L, 0L), `[1000,1500) east` = c(0L,
0L), `[1000,1500) north` = c(0L, 0L), `[1000,1500) south` = c(0L,
0L), `[1000,1500) west` = c(0L, 0L), `[1500,3000) east` = c(0L,
0L), `[1500,3000) north` = c(0L, 0L), `[1500,3000) south` = c(0L,
0L), `[1500,3000) west` = c(0L, 0L), `[500,1000000] east` = c(0L,
288L), `[500,1000000] north` = c(0L, 120L), `[500,1000000] south` = c(0L,
229L), `[500,1000000] west` = c(0L, 175L), `[1000,1000000] east` = c(53L,
0L), `[1000,1000000] north` = c(82L, 0L), `[1000,1000000] south` = c(23L,
0L), `[1000,1000000] west` = c(63L, 0L), `[3000,1000000] east` = c(0L,
0L), `[3000,1000000] north` = c(0L, 0L), `[3000,1000000] south` = c(0L,
0L), `[3000,1000000] west` = c(0L, 0L), Sum_table_in = c(9980,
2867), strata = list(c(0, 25, 100, 250, 500, 1000, 1e+06), c(0,
25, 100, 250, 500, 1e+06))), row.names = c(NA, -2L), class = c("data.table",
"data.frame"))
旧数据和解决方案
library(dplyr)
dat_in <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L,
3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L,
0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L,
0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L),
`[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250,
500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA,
-2L), class = c("data.table", "data.frame"))
dat_in %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper'), '(\d+),(\d+)', convert = TRUE) %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
问题
对于新数据,有更多的值,因为有 east
、north
、south
和 west
的值。应将这些值连接起来形成一长串频率,每种频率一个。
开始可以修改如下:
part1 <- dat_in_new %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper', 'rest'), '(\d+),(\d+)[\]\)]\s*(\w*)', convert = TRUE) %>%
select(-where(is_all_na))
但是我卡在了下面的部分,因为我不太理解group_by(rn)
之后的几行,尤其是.groups = 'drop_last'
部分(link)。我通常只是 运行 逐行查看代码以查看会发生什么,但对于这个解决方案并没有真正帮助:
part2 <- part2 %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
目前,代码 运行s,但我最终得到了旧结果。我应该如何调整此代码以添加所有值?
我们可以用pivot_longer
作为
library(dplyr)
library(tidyr)
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\[(\d+),(\d+)[\)\]]\s+(\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq), .groups = "drop")
-输出
# A tibble: 8 × 3
rn direction freq
<chr> <chr> <list>
1 Type_A east <int [7]>
2 Type_A north <int [7]>
3 Type_A south <int [7]>
4 Type_A west <int [7]>
5 Type_B east <int [6]>
6 Type_B north <int [6]>
7 Type_B south <int [6]>
8 Type_B west <int [6]>
如果我们只想 'rn'
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\[(\d+),(\d+)[\)\]]\s+(\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>% group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop")
-输出
# A tibble: 2 × 2
rn freq
<chr> <list>
1 Type_A <int [28]>
2 Type_B <int [24]>
我正在努力适应
新数据
dat_in_new <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25) east` = c(1269L, 85L), `[0,25) north` = c(364L, 21L
), `[0,25) south` = c(1172L, 97L), `[0,25) west` = c(549L, 49L
), `[25,50) east` = c(0L, 0L), `[25,50) north` = c(0L, 0L), `[25,50) south` = c(0L,
0L), `[25,50) west` = c(0L, 0L), `[25,100) east` = c(579L, 220L
), `[25,100) north` = c(406L, 58L), `[25,100) south` = c(1048L,
316L), `[25,100) west` = c(764L, 131L), `[50,100) east` = c(0L,
0L), `[50,100) north` = c(0L, 0L), `[50,100) south` = c(0L, 0L
), `[50,100) west` = c(0L, 0L), `[100,250) east` = c(441L, 149L
), `[100,250) north` = c(224L, 45L), `[100,250) south` = c(521L,
247L), `[100,250) west` = c(770L, 124L), `[100,500) east` = c(0L,
0L), `[100,500) north` = c(0L, 0L), `[100,500) south` = c(0L,
0L), `[100,500) west` = c(0L, 0L), `[250,500) east` = c(232L,
172L), `[250,500) north` = c(207L, 40L), `[250,500) south` = c(202L,
148L), `[250,500) west` = c(457L, 153L), `[500,1000) east` = c(103L,
0L), `[500,1000) north` = c(185L, 0L), `[500,1000) south` = c(66L,
0L), `[500,1000) west` = c(200L, 0L), `[1000,1500) east` = c(0L,
0L), `[1000,1500) north` = c(0L, 0L), `[1000,1500) south` = c(0L,
0L), `[1000,1500) west` = c(0L, 0L), `[1500,3000) east` = c(0L,
0L), `[1500,3000) north` = c(0L, 0L), `[1500,3000) south` = c(0L,
0L), `[1500,3000) west` = c(0L, 0L), `[500,1000000] east` = c(0L,
288L), `[500,1000000] north` = c(0L, 120L), `[500,1000000] south` = c(0L,
229L), `[500,1000000] west` = c(0L, 175L), `[1000,1000000] east` = c(53L,
0L), `[1000,1000000] north` = c(82L, 0L), `[1000,1000000] south` = c(23L,
0L), `[1000,1000000] west` = c(63L, 0L), `[3000,1000000] east` = c(0L,
0L), `[3000,1000000] north` = c(0L, 0L), `[3000,1000000] south` = c(0L,
0L), `[3000,1000000] west` = c(0L, 0L), Sum_table_in = c(9980,
2867), strata = list(c(0, 25, 100, 250, 500, 1000, 1e+06), c(0,
25, 100, 250, 500, 1e+06))), row.names = c(NA, -2L), class = c("data.table",
"data.frame"))
旧数据和解决方案
library(dplyr)
dat_in <- structure(list(rn = c("Type_A", "Type_B"
), `[0,25)` = c(5L, 0L), `[25,50)` = c(0L, 0L), `[25,100)` = c(38L,
3L), `[50,100)` = c(0L, 0L), `[100,250)` = c(43L, 5L), `[100,500)` = c(0L,
0L), `[250,500)` = c(27L, 12L), `[500,1000)` = c(44L, 0L), `[1000,1500)` = c(0L,
0L), `[1500,3000)` = c(0L, 0L), `[500,1000000]` = c(0L, 53L),
`[1000,1000000]` = c(20L, 0L), `[3000,1000000]` = c(0L, 0L
), Sum_bin = c(177, 73), strata = list(c(0, 25, 100, 250,
500, 1000, 1e+06), c(0, 25, 100, 250, 500, 1e+06))), row.names = c(NA,
-2L), class = c("data.table", "data.frame"))
dat_in %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper'), '(\d+),(\d+)', convert = TRUE) %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
问题
对于新数据,有更多的值,因为有 east
、north
、south
和 west
的值。应将这些值连接起来形成一长串频率,每种频率一个。
开始可以修改如下:
part1 <- dat_in_new %>%
pivot_longer(-c(rn, strata)) %>%
tidyr::extract(name, c('lower', 'upper', 'rest'), '(\d+),(\d+)[\]\)]\s*(\w*)', convert = TRUE) %>%
select(-where(is_all_na))
但是我卡在了下面的部分,因为我不太理解group_by(rn)
之后的几行,尤其是.groups = 'drop_last'
部分(link)。我通常只是 运行 逐行查看代码以查看会发生什么,但对于这个解决方案并没有真正帮助:
part2 <- part2 %>%
group_by(rn) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq))
目前,代码 运行s,但我最终得到了旧结果。我应该如何调整此代码以添加所有值?
我们可以用pivot_longer
作为
library(dplyr)
library(tidyr)
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\[(\d+),(\d+)[\)\]]\s+(\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>%
summarise(freq = list(freq), .groups = "drop")
-输出
# A tibble: 8 × 3
rn direction freq
<chr> <chr> <list>
1 Type_A east <int [7]>
2 Type_A north <int [7]>
3 Type_A south <int [7]>
4 Type_A west <int [7]>
5 Type_B east <int [6]>
6 Type_B north <int [6]>
7 Type_B south <int [6]>
8 Type_B west <int [6]>
如果我们只想 'rn'
dat_in_new %>%
pivot_longer(cols = -c(rn, strata, Sum_table_in),
names_to = c("lower", "upper", "direction"),
names_pattern = "\[(\d+),(\d+)[\)\]]\s+(\S+$)",
values_drop_na = TRUE) %>%
type.convert(as.is = TRUE) %>%
group_by(rn, direction) %>%
filter(lower%in%strata[[1]] & upper %in% strata[[1]]) %>%
group_by(upper,.add = TRUE) %>%
summarise(freq = sum(value), .groups = 'drop_last') %>%
group_modify(~add_row(.,freq = sum(.$freq))) %>% group_by(rn) %>%
summarise(freq = list(freq), .groups = "drop")
-输出
# A tibble: 2 × 2
rn freq
<chr> <list>
1 Type_A <int [28]>
2 Type_B <int [24]>