为分组行+子分组添加带有 mutate dplyr 的列
add column with mutate dplyr for grouped rows + subgrouping
这是我的最小示例,是按单词拆分的书籍数据集的简化版本。
structure(list(word = c("in", "großer", "erregung", "umstehen",
"bauersleute", "knechte", "und", "mägde", "das", "gehöft",
"des", "servaz", "amareller", "bauers", "im", "hemmernmoos",
"und", "besprechen", "den", "einleitung", "lieber", "leser",
"weißt", "du", "was", "das", "wort", "greenhorn", "bedeutet",
"eine", "höchst", "ärgerliche", "und", "despektierliche", "bezeichnung",
"für", "denjenigen", "auf", "zum", "alm", "öhi", "hinauf",
"vom", "freundlichen", "dorfe", "maienfeld", "führt", "ein",
"fußweg", "durch", "grüne", "baumreiche", "fluren", "bis",
"zum", "fuße", "der"), word_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L), book = c("bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "winnetou", "winnetou", "winnetou",
"winnetou", "winnetou", "winnetou", "winnetou", "winnetou", "winnetou",
"winnetou", "winnetou", "winnetou", "winnetou", "winnetou", "winnetou",
"winnetou", "winnetou", "winnetou", "winnetou", "heidilehr",
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr",
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr",
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr",
"heidilehr", "heidilehr", "heidilehr")), row.names = c(NA, -57L
), groups = structure(list(word = c("alm", "amareller", "ärgerliche",
"auf", "bauers", "bauersleute", "baumreiche", "bedeutet", "besprechen",
"bezeichnung", "bis", "das", "den", "denjenigen", "der", "des",
"despektierliche", "dorfe", "du", "durch", "ein", "eine", "einleitung",
"erregung", "fluren", "freundlichen", "führt", "für", "fuße",
"fußweg", "gehöft", "greenhorn", "großer", "grüne", "hemmernmoos",
"hinauf", "höchst", "im", "in", "knechte", "leser", "lieber",
"mägde", "maienfeld", "öhi", "servaz", "umstehen", "und", "vom",
"was", "weißt", "wort", "zum"), .rows = structure(list(40L,
13L, 32L, 38L, 14L, 5L, 52L, 29L, 18L, 35L, 54L, c(9L, 26L
), 19L, 37L, 57L, 11L, 34L, 45L, 24L, 50L, 48L, 30L, 20L,
3L, 53L, 44L, 47L, 36L, 56L, 49L, 10L, 28L, 2L, 51L, 16L,
42L, 31L, 15L, 1L, 6L, 22L, 21L, 8L, 46L, 41L, 12L, 4L, c(7L,
17L, 33L), 43L, 25L, 23L, 27L, c(39L, 55L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 53L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
因为这些书没有章节,为了我的分析,我想插入一个 'fake' 章节列,称为 'section',它按比例拆分每本书 (group_by)分10块(每本书10节),根据每本书的实际行数,从1到10依次命名节。
使用 dplyr 我找不到解决方案,我不知道如何实现。有什么建议么?
谢谢!
一种方法是利用 cut
将每个 book
分成 10 个部分。
library(dplyr)
df %>%
group_by(book) %>%
mutate(section = cut(row_number(), breaks = 10, labels = FALSE))
# word word_id book section
# <chr> <int> <chr> <int>
# 1 in 1 bergrichters 1
# 2 großer 2 bergrichters 1
# 3 erregung 3 bergrichters 2
# 4 umstehen 4 bergrichters 2
# 5 bauersleute 5 bergrichters 3
# 6 knechte 6 bergrichters 3
# 7 und 7 bergrichters 4
# 8 mägde 8 bergrichters 4
# 9 das 9 bergrichters 5
#10 gehöft 10 bergrichters 5
# … with 47 more rows
这也可以在 base R 中使用 ave
完成:
df$section <- with(df, ave(word_id, book, FUN = function(x)
cut(seq_along(x), breaks = 10, labels = FALSE)))
这是我的最小示例,是按单词拆分的书籍数据集的简化版本。
structure(list(word = c("in", "großer", "erregung", "umstehen",
"bauersleute", "knechte", "und", "mägde", "das", "gehöft",
"des", "servaz", "amareller", "bauers", "im", "hemmernmoos",
"und", "besprechen", "den", "einleitung", "lieber", "leser",
"weißt", "du", "was", "das", "wort", "greenhorn", "bedeutet",
"eine", "höchst", "ärgerliche", "und", "despektierliche", "bezeichnung",
"für", "denjenigen", "auf", "zum", "alm", "öhi", "hinauf",
"vom", "freundlichen", "dorfe", "maienfeld", "führt", "ein",
"fußweg", "durch", "grüne", "baumreiche", "fluren", "bis",
"zum", "fuße", "der"), word_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L), book = c("bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "bergrichters", "bergrichters",
"bergrichters", "bergrichters", "winnetou", "winnetou", "winnetou",
"winnetou", "winnetou", "winnetou", "winnetou", "winnetou", "winnetou",
"winnetou", "winnetou", "winnetou", "winnetou", "winnetou", "winnetou",
"winnetou", "winnetou", "winnetou", "winnetou", "heidilehr",
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr",
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr",
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr",
"heidilehr", "heidilehr", "heidilehr")), row.names = c(NA, -57L
), groups = structure(list(word = c("alm", "amareller", "ärgerliche",
"auf", "bauers", "bauersleute", "baumreiche", "bedeutet", "besprechen",
"bezeichnung", "bis", "das", "den", "denjenigen", "der", "des",
"despektierliche", "dorfe", "du", "durch", "ein", "eine", "einleitung",
"erregung", "fluren", "freundlichen", "führt", "für", "fuße",
"fußweg", "gehöft", "greenhorn", "großer", "grüne", "hemmernmoos",
"hinauf", "höchst", "im", "in", "knechte", "leser", "lieber",
"mägde", "maienfeld", "öhi", "servaz", "umstehen", "und", "vom",
"was", "weißt", "wort", "zum"), .rows = structure(list(40L,
13L, 32L, 38L, 14L, 5L, 52L, 29L, 18L, 35L, 54L, c(9L, 26L
), 19L, 37L, 57L, 11L, 34L, 45L, 24L, 50L, 48L, 30L, 20L,
3L, 53L, 44L, 47L, 36L, 56L, 49L, 10L, 28L, 2L, 51L, 16L,
42L, 31L, 15L, 1L, 6L, 22L, 21L, 8L, 46L, 41L, 12L, 4L, c(7L,
17L, 33L), 43L, 25L, 23L, 27L, c(39L, 55L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 53L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
因为这些书没有章节,为了我的分析,我想插入一个 'fake' 章节列,称为 'section',它按比例拆分每本书 (group_by)分10块(每本书10节),根据每本书的实际行数,从1到10依次命名节。
使用 dplyr 我找不到解决方案,我不知道如何实现。有什么建议么? 谢谢!
一种方法是利用 cut
将每个 book
分成 10 个部分。
library(dplyr)
df %>%
group_by(book) %>%
mutate(section = cut(row_number(), breaks = 10, labels = FALSE))
# word word_id book section
# <chr> <int> <chr> <int>
# 1 in 1 bergrichters 1
# 2 großer 2 bergrichters 1
# 3 erregung 3 bergrichters 2
# 4 umstehen 4 bergrichters 2
# 5 bauersleute 5 bergrichters 3
# 6 knechte 6 bergrichters 3
# 7 und 7 bergrichters 4
# 8 mägde 8 bergrichters 4
# 9 das 9 bergrichters 5
#10 gehöft 10 bergrichters 5
# … with 47 more rows
这也可以在 base R 中使用 ave
完成:
df$section <- with(df, ave(word_id, book, FUN = function(x)
cut(seq_along(x), breaks = 10, labels = FALSE)))