如何在 R 中对一列值进行更广泛的旋转
How to pivot wider in R on one column value
下面是示例数据和我到目前为止所做的操作。我已经以其他方式尝试过这个,但有一个想法可以使它更简单一些。预期的结果在底部。我正在寻找的是一种根据 smb 列显示总计的时间来扩大范围的方法。 smb.. 1,2,3,4 和 total 有五个可能的值。我希望有一个新列 smb.total,其中包含每个 smb/year/qtr/area 组合的总数。我试过在 pivot 更宽的语句(底部)前面放置一个过滤器
library(readxl)
library(dplyr)
library(stringr)
library(tidyverse)
library(gt)
employment <- c(1,45,125,130,165,260,600,601,2,46,127,132,167,265,601,602,50,61,110,121,170,305,55,603,52,66,112,123,172,310,604,605)
small <- c(1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA)
area <-c(001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003)
year<-c(2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020)
qtr <-c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2)
smbtest <- data.frame(employment,small,area,year,qtr)
smbtest$smb <-0
smbtest <- smbtest %>% mutate(smb = case_when(employment >=0 & employment <100 ~ "1",employment >=0
& employment <150 ~ "2",employment >=0 & employment <250 ~ "3", employment >=0 & employment <500 ~
"4", employment >=0 & employment <100000 ~ "Total"))
smbsummary2<-smbtest %>%
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area,period,smb) %>%
summarise(employment = sum(employment), worksites = n(),
.groups = 'drop_last') %>%
mutate(employment = cumsum(employment),
worksites = cumsum(worksites))
smbsummary2<- smbsummary2%>%
group_by(area,smb)%>%
mutate(empprevyear=lag(employment),
empprevyearpp=employment-empprevyear,
empprevyearpct=((employment/empprevyear)-1),
empprevyearpct=scales::percent(empprevyearpct,accuracy = 0.01)
)
area period smb employment worksites smb.Total
1 2020q1 1 46 2 1927
1 2020q1 2 301 4 1927
1 2020q1 3 466 5 1927
1 2020q1 4 726 6 1927
1 2020q1 Total 1927 8 1927
smbsummary2<-smbsummary2 %>%
filter(small=='Total')
pivot_wider(names_from = small, values_from = employment)
我知道我理解的对不对
你想要smb.total什么?就业变量?
如是。
在您的对象“smbsummary2”中使用此代码:
smbsummary2 <- smbtest %>%
relocate(smb, year, qtr, area, small, employment) %>%
group_by(smb, year, qtr, area) %>%
mutate(smb.total = n())
如果不是,你能更好地解释我吗?
也许这段代码可以解决您的问题:
employment <- c(1, 45, 125, 130, 165, 260, 600, 601, 2, 46, 127,
132, 167, 265, 601, 602, 50, 61, 110, 121, 170,
305, 55, 603, 52, 66, 112, 123, 172, 310, 604, 605)
small <- c(1, 1, 2, 2, 3, 4, NA, NA, 1, 1, 2, 2, 3, 4, NA, NA, 1, 1,
2, 2, 3, 4, NA, NA, 1, 1, 2, 2, 3, 4, NA, NA)
area <-c(001, 001, 001, 001, 001, 001, 001, 001, 001, 001, 001, 001,
001, 001, 001, 001, 003, 003, 003, 003, 003, 003, 003, 003,
003, 003, 003, 003, 003, 003, 003, 003)
year<-c(2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020)
qtr <-c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2)
smbtest <- tibble(employment, small, area, year, qtr)
smbtest$smb <- 0
smbtest <- smbtest %>%
mutate(smb = case_when(employment >=0 & employment <100 ~ "1",
employment >=0 & employment <150 ~ "2",
employment >=0 & employment <250 ~ "3",
employment >=0 & employment <500 ~ "4",
employment >=0 & employment <100000 ~ "Total"))
smbtest <- smbtest %>%
relocate(smb, year, qtr, area, small, employment)
smbsummary2 <- smbtest %>%
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area, period, smb) %>%
summarise(employment = sum(employment),
worksites = n()) %>%
mutate(employment = cumsum(employment),
worksites = cumsum(worksites))
smbsummary2 %>%
group_by(area, period) %>%
mutate(`employ/period (%)` = employment/employment[smb == "Total"]*100)
可能不是最佳答案,但对于您的数据,我认为它很有效。
如果不是请告诉我。
干得好!
下面是示例数据和我到目前为止所做的操作。我已经以其他方式尝试过这个,但有一个想法可以使它更简单一些。预期的结果在底部。我正在寻找的是一种根据 smb 列显示总计的时间来扩大范围的方法。 smb.. 1,2,3,4 和 total 有五个可能的值。我希望有一个新列 smb.total,其中包含每个 smb/year/qtr/area 组合的总数。我试过在 pivot 更宽的语句(底部)前面放置一个过滤器
library(readxl)
library(dplyr)
library(stringr)
library(tidyverse)
library(gt)
employment <- c(1,45,125,130,165,260,600,601,2,46,127,132,167,265,601,602,50,61,110,121,170,305,55,603,52,66,112,123,172,310,604,605)
small <- c(1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA)
area <-c(001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003)
year<-c(2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020)
qtr <-c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2)
smbtest <- data.frame(employment,small,area,year,qtr)
smbtest$smb <-0
smbtest <- smbtest %>% mutate(smb = case_when(employment >=0 & employment <100 ~ "1",employment >=0
& employment <150 ~ "2",employment >=0 & employment <250 ~ "3", employment >=0 & employment <500 ~
"4", employment >=0 & employment <100000 ~ "Total"))
smbsummary2<-smbtest %>%
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area,period,smb) %>%
summarise(employment = sum(employment), worksites = n(),
.groups = 'drop_last') %>%
mutate(employment = cumsum(employment),
worksites = cumsum(worksites))
smbsummary2<- smbsummary2%>%
group_by(area,smb)%>%
mutate(empprevyear=lag(employment),
empprevyearpp=employment-empprevyear,
empprevyearpct=((employment/empprevyear)-1),
empprevyearpct=scales::percent(empprevyearpct,accuracy = 0.01)
)
area period smb employment worksites smb.Total
1 2020q1 1 46 2 1927
1 2020q1 2 301 4 1927
1 2020q1 3 466 5 1927
1 2020q1 4 726 6 1927
1 2020q1 Total 1927 8 1927
smbsummary2<-smbsummary2 %>%
filter(small=='Total')
pivot_wider(names_from = small, values_from = employment)
我知道我理解的对不对
你想要smb.total什么?就业变量? 如是。 在您的对象“smbsummary2”中使用此代码:
smbsummary2 <- smbtest %>%
relocate(smb, year, qtr, area, small, employment) %>%
group_by(smb, year, qtr, area) %>%
mutate(smb.total = n())
如果不是,你能更好地解释我吗?
也许这段代码可以解决您的问题:
employment <- c(1, 45, 125, 130, 165, 260, 600, 601, 2, 46, 127,
132, 167, 265, 601, 602, 50, 61, 110, 121, 170,
305, 55, 603, 52, 66, 112, 123, 172, 310, 604, 605)
small <- c(1, 1, 2, 2, 3, 4, NA, NA, 1, 1, 2, 2, 3, 4, NA, NA, 1, 1,
2, 2, 3, 4, NA, NA, 1, 1, 2, 2, 3, 4, NA, NA)
area <-c(001, 001, 001, 001, 001, 001, 001, 001, 001, 001, 001, 001,
001, 001, 001, 001, 003, 003, 003, 003, 003, 003, 003, 003,
003, 003, 003, 003, 003, 003, 003, 003)
year<-c(2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020)
qtr <-c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2)
smbtest <- tibble(employment, small, area, year, qtr)
smbtest$smb <- 0
smbtest <- smbtest %>%
mutate(smb = case_when(employment >=0 & employment <100 ~ "1",
employment >=0 & employment <150 ~ "2",
employment >=0 & employment <250 ~ "3",
employment >=0 & employment <500 ~ "4",
employment >=0 & employment <100000 ~ "Total"))
smbtest <- smbtest %>%
relocate(smb, year, qtr, area, small, employment)
smbsummary2 <- smbtest %>%
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area, period, smb) %>%
summarise(employment = sum(employment),
worksites = n()) %>%
mutate(employment = cumsum(employment),
worksites = cumsum(worksites))
smbsummary2 %>%
group_by(area, period) %>%
mutate(`employ/period (%)` = employment/employment[smb == "Total"]*100)
可能不是最佳答案,但对于您的数据,我认为它很有效。 如果不是请告诉我。
干得好!