如何在 r 的新列中对具有字符值的列进行分组
How to group a column with character values in a new column in r
我有一个包含国家列的数据集,我想创建一个新列并将国家分为以下几类(第一世界、第二世界、第三世界)国家。
我是 R 的新手,我发现很难找到处理字符的合适函数!
我的数据集包含这样的国家,我有三个带有国家列表的向量,如下所示:
nt_final_table$`Country name`
#[1] "Finland" "Denmark" "Switzerland"
#[4] "Iceland" "Netherlands" "Norway"
#[7] "Sweden" "Luxembourg" "New Zealand"
#[10] "Austria" "Australia" "Israel"
first_world_countries <- c("Australia","Austria","Belgium","Canada","Denmark","France","Germany","Greece","Iceland","Ireland","Israel","Italy","Japan","Luxembourg","Netherlands","New Zealand","Norway","Portugal","South Korea",
"Spain","Sweden","Switzerland","Turkey","United Kingdom","USA")
Second_world_countries <- c("Albania","Armenia","Azerbaijan","Belarus","Bosnia and Herzegovina","Bulgaria","China","Croatia","Cuba","Czech Republic","EastGermany","Estonia","Georgia","Hungary","Kazakhstan","Kyrgyzstan","Laos","Poland","Romania","Russia","Serbia","Slovakia","Slovenia","Tajikistan","Turkmenistan","Ukraine","Uzbekistan","Vietnam")
Third_world_countries <- ("Somalia","Niger","South Sudan")
我想要一个包含以下值的新列:
基于国家名称列的第一世界、第二世界、第三世界
如有任何帮助,我们将不胜感激!
谢谢!
您可以通过以下 2 种方式完成此操作。
使用 dplyr 包
您可以使用 dplyr
包中的 case_when
来执行此操作。
library(dplyr)
country_name <-c("Finland", "Denmark", "Switzerland","Iceland", "Netherlands", "Norway", "Sweden", "Luxembourg", "New Zealand",
"Austria", "Australia", "Israel")
nt_final_table <- data.frame(country_name)
first_world_countries <- c("Australia","Austria","Belgium","Canada","Denmark","France","Germany","Greece","Iceland","Ireland","Israel","Italy","Japan","Luxembourg","Netherlands","New Zealand","Norway","Portugal","South Korea", "Spain","Sweden","Switzerland","Turkey","United Kingdom","USA")
second_world_countries <- c("Albania","Armenia","Azerbaijan","Belarus","Bosnia and Herzegovina","Bulgaria","China","Croatia","Cuba","Czech Republic","EastGermany","Estonia","Georgia","Hungary","Kazakhstan","Kyrgyzstan","Laos","Poland","Romania","Russia","Serbia","Slovakia","Slovenia","Tajikistan","Turkmenistan","Ukraine","Uzbekistan","Vietnam")
third_world_countries <- c("Somalia","Niger","South Sudan")
nt_final_table_categorized <- nt_final_table %>% mutate(category = case_when(country_name %in% first_world_countries ~ "First",
country_name %in% second_world_countries ~ "Second",
country_name %in% third_world_countries ~ "Third",
TRUE ~"Not listed"))
nt_final_table_categorized
示例输出
country_name category
1 Finland Not listed
2 Denmark First
3 Switzerland First
4 Iceland First
5 Netherlands First
6 Norway First
7 Sweden First
8 Luxembourg First
9 New Zealand First
10 Austria First
11 Australia First
12 Israel First
使用基数 R
在 base R
中,我们可以创建一个列出国家及其类别的数据框,然后使用 merge
对 2 个数据框执行 left-join
。
country_name <-c("Finland", "Denmark", "Switzerland","Iceland", "Netherlands", "Norway", "Sweden", "Luxembourg", "New Zealand",
"Austria", "Australia", "Israel")
nt_final_table <- data.frame(country_name)
first_world_countries <- c("Australia","Austria","Belgium","Canada","Denmark","France","Germany","Greece","Iceland","Ireland","Israel","Italy","Japan","Luxembourg","Netherlands","New Zealand","Norway","Portugal","South Korea", "Spain","Sweden","Switzerland","Turkey","United Kingdom","USA")
second_world_countries <- c("Albania","Armenia","Azerbaijan","Belarus","Bosnia and Herzegovina","Bulgaria","China","Croatia","Cuba","Czech Republic","EastGermany","Estonia","Georgia","Hungary","Kazakhstan","Kyrgyzstan","Laos","Poland","Romania","Russia","Serbia","Slovakia","Slovenia","Tajikistan","Turkmenistan","Ukraine","Uzbekistan","Vietnam")
third_world_countries <- c("Somalia","Niger","South Sudan")
country_name <- c(first_world_countries,second_world_countries,third_world_countries)
categories <- c(rep("First", length(first_world_countries)),
rep("Second",length(second_world_countries)),
rep("Third",length(third_world_countries)))
all_countries_categorised <- data.frame(country_name, categories)
nt_final_table_categorized <-merge(nt_final_table, all_countries_categorised, by ="country_name", all.x=TRUE)
nt_final_table_categorized
示例输出
country_name categories
1 Australia First
2 Austria First
3 Denmark First
4 Finland <NA>
5 Iceland First
6 Israel First
7 Luxembourg First
8 Netherlands First
9 New Zealand First
10 Norway First
11 Sweden First
12 Switzerland First
我有一个包含国家列的数据集,我想创建一个新列并将国家分为以下几类(第一世界、第二世界、第三世界)国家。 我是 R 的新手,我发现很难找到处理字符的合适函数!
我的数据集包含这样的国家,我有三个带有国家列表的向量,如下所示:
nt_final_table$`Country name`
#[1] "Finland" "Denmark" "Switzerland"
#[4] "Iceland" "Netherlands" "Norway"
#[7] "Sweden" "Luxembourg" "New Zealand"
#[10] "Austria" "Australia" "Israel"
first_world_countries <- c("Australia","Austria","Belgium","Canada","Denmark","France","Germany","Greece","Iceland","Ireland","Israel","Italy","Japan","Luxembourg","Netherlands","New Zealand","Norway","Portugal","South Korea",
"Spain","Sweden","Switzerland","Turkey","United Kingdom","USA")
Second_world_countries <- c("Albania","Armenia","Azerbaijan","Belarus","Bosnia and Herzegovina","Bulgaria","China","Croatia","Cuba","Czech Republic","EastGermany","Estonia","Georgia","Hungary","Kazakhstan","Kyrgyzstan","Laos","Poland","Romania","Russia","Serbia","Slovakia","Slovenia","Tajikistan","Turkmenistan","Ukraine","Uzbekistan","Vietnam")
Third_world_countries <- ("Somalia","Niger","South Sudan")
我想要一个包含以下值的新列: 基于国家名称列的第一世界、第二世界、第三世界
如有任何帮助,我们将不胜感激! 谢谢!
您可以通过以下 2 种方式完成此操作。
使用 dplyr 包
您可以使用 dplyr
包中的 case_when
来执行此操作。
library(dplyr)
country_name <-c("Finland", "Denmark", "Switzerland","Iceland", "Netherlands", "Norway", "Sweden", "Luxembourg", "New Zealand",
"Austria", "Australia", "Israel")
nt_final_table <- data.frame(country_name)
first_world_countries <- c("Australia","Austria","Belgium","Canada","Denmark","France","Germany","Greece","Iceland","Ireland","Israel","Italy","Japan","Luxembourg","Netherlands","New Zealand","Norway","Portugal","South Korea", "Spain","Sweden","Switzerland","Turkey","United Kingdom","USA")
second_world_countries <- c("Albania","Armenia","Azerbaijan","Belarus","Bosnia and Herzegovina","Bulgaria","China","Croatia","Cuba","Czech Republic","EastGermany","Estonia","Georgia","Hungary","Kazakhstan","Kyrgyzstan","Laos","Poland","Romania","Russia","Serbia","Slovakia","Slovenia","Tajikistan","Turkmenistan","Ukraine","Uzbekistan","Vietnam")
third_world_countries <- c("Somalia","Niger","South Sudan")
nt_final_table_categorized <- nt_final_table %>% mutate(category = case_when(country_name %in% first_world_countries ~ "First",
country_name %in% second_world_countries ~ "Second",
country_name %in% third_world_countries ~ "Third",
TRUE ~"Not listed"))
nt_final_table_categorized
示例输出
country_name category
1 Finland Not listed
2 Denmark First
3 Switzerland First
4 Iceland First
5 Netherlands First
6 Norway First
7 Sweden First
8 Luxembourg First
9 New Zealand First
10 Austria First
11 Australia First
12 Israel First
使用基数 R
在 base R
中,我们可以创建一个列出国家及其类别的数据框,然后使用 merge
对 2 个数据框执行 left-join
。
country_name <-c("Finland", "Denmark", "Switzerland","Iceland", "Netherlands", "Norway", "Sweden", "Luxembourg", "New Zealand",
"Austria", "Australia", "Israel")
nt_final_table <- data.frame(country_name)
first_world_countries <- c("Australia","Austria","Belgium","Canada","Denmark","France","Germany","Greece","Iceland","Ireland","Israel","Italy","Japan","Luxembourg","Netherlands","New Zealand","Norway","Portugal","South Korea", "Spain","Sweden","Switzerland","Turkey","United Kingdom","USA")
second_world_countries <- c("Albania","Armenia","Azerbaijan","Belarus","Bosnia and Herzegovina","Bulgaria","China","Croatia","Cuba","Czech Republic","EastGermany","Estonia","Georgia","Hungary","Kazakhstan","Kyrgyzstan","Laos","Poland","Romania","Russia","Serbia","Slovakia","Slovenia","Tajikistan","Turkmenistan","Ukraine","Uzbekistan","Vietnam")
third_world_countries <- c("Somalia","Niger","South Sudan")
country_name <- c(first_world_countries,second_world_countries,third_world_countries)
categories <- c(rep("First", length(first_world_countries)),
rep("Second",length(second_world_countries)),
rep("Third",length(third_world_countries)))
all_countries_categorised <- data.frame(country_name, categories)
nt_final_table_categorized <-merge(nt_final_table, all_countries_categorised, by ="country_name", all.x=TRUE)
nt_final_table_categorized
示例输出
country_name categories
1 Australia First
2 Austria First
3 Denmark First
4 Finland <NA>
5 Iceland First
6 Israel First
7 Luxembourg First
8 Netherlands First
9 New Zealand First
10 Norway First
11 Sweden First
12 Switzerland First