每个值每年的百分比
Percentages per year per value
我的数据
Chemical date concentration limit
A 01-01-2016 0.2 0.01
A 01-02-2016 0.2 0.01
A 01-01-2017 0.005 0.01
A 01-02-2017 0.2 0.01
B 01-01-2016 0.3 0.1
B 01-02-2016 0.05 0.1
B 01-01-2017 0.2 0.1
B 01-02-2017 0.2 0.1
C 01-01-2016 1.2 1
C 01-02-2016 0.8 1
C 01-01-2017 0.9 1
C 01-02-2017 0.9 1
我想显示每年超过限值的每种化学品的百分比(注意每个限值都不同)。所以我想得到这样的东西
Year A B C
2016 100% 50% 50%
2017 50% 100% 0
我已经有了计算每种化学物质每年超过次数的代码,但是在计算百分比时我弄错了。
这个我得算次数了
library(tidyverse)
counts<- data %>%
group_by(Chemical, grp = format(date, format = '%Y')) %>%
mutate(exceed = concentration >= limit) %>% # TRUE/FALSE
summarise(tot_exceed = sum(exceed)) %>% # count each T/F
spread(Chemical, tot_exceed, fill = 0)
所以我明白了
Year A B C
2016 2 1 1
2017 1 2 0
对于百分比,我试过了。
percentage_exceed<- data %>%
group_by(Chemical, grp = format(date, format = '%Y')) %>%
mutate(exceed = concentration >= limit, countconc = length(concentration))
%>%
summarise(percent = (sum(exceed)/countconc)*100) %>%
spread(Chemical, percent, fill = 0)
但是我没有得到我想要的结果。你能帮帮我吗?
与tidyverse
:
library(tidyverse)
library(lubridate)
data %>%
mutate(yr=mdy(date) %>% year) %>%
group_by(Chemical,yr) %>%
mutate(exceed = ifelse(concentration>=limit,1,0 )) %>%
summarise(tot_exceed =sum(exceed)) %>%
group_by(Chemical) %>%
mutate(proc=tot_exceed/max(tot_exceed)*100) %>%
select(-tot_exceed) %>%
spread(Chemical,proc)
# A tibble: 2 x 4
yr A B C
<dbl> <dbl> <dbl> <dbl>
1 2016 100 50 100
2 2017 50 100 0
dt = read.table(text = "
Chemical date concentration limit
A 01-01-2016 0.2 0.01
A 01-02-2016 0.2 0.01
A 01-01-2017 0.005 0.01
A 01-02-2017 0.2 0.01
B 01-01-2016 0.3 0.1
B 01-02-2016 0.05 0.1
B 01-01-2017 0.2 0.1
B 01-02-2017 0.2 0.1
C 01-01-2016 1.2 1
C 01-02-2016 0.8 1
C 01-01-2017 0.9 1
C 01-02-2017 0.9 1
", header=T)
library(tidyverse)
library(lubridate)
dt %>%
mutate(year = year(dmy(date))) %>%
group_by(year, Chemical) %>%
summarise(Total = n(),
Num_exceed = sum(concentration >= limit)) %>%
ungroup() %>%
mutate(Prc = paste0(Num_exceed / Total * 100,"%")) %>%
select(year, Chemical, Prc) %>%
spread(Chemical, Prc)
# # A tibble: 2 x 4
# year A B C
# <dbl> <chr> <chr> <chr>
# 1 2016 100% 50% 50%
# 2 2017 50% 100% 0%
你的方法非常好,你只需要用mean
替换sum
并乘以100:
data %>% group_by(Chemical, grp = format(date, format = '%Y')) %>%
mutate(exceed = concentration >= limit) %>%
summarise(tot_exceed = mean(exceed) * 100) %>%
spread(Chemical, tot_exceed, fill = 0)
# A tibble: 2 x 4
# grp A B C
# <chr> <dbl> <dbl> <dbl>
# 1 2016 100 50 50
# 2 2017 50 100 0
您尝试的线路
summarise(percent = (sum(exceed)/countconc) * 100)
几乎做到了这一点:错误是关于 countconc
是整个列而不是单个值(汇总所需)这一事实。因此,由于无论如何它在每个组中都是 constant 列,因此您可以编写,例如,
summarise(percent = (sum(exceed)/countconc[1]) * 100)
但是考虑到之前的那一行,
mutate(exceed = concentration >= limit, countconc = length(concentration))
归根结底只是一个意思,所以我们回到我回答开头的代码。
另请注意,使用 lubridate
您可以将第一行写为
data %>% group_by(Chemical, Year = year(date)) %>%
一些非常简洁但可能不是您想要的格式
data %>% group_by(Chemical, Year = year(date)) %>%
summarise(Percentage = mean(concentration > limit) * 100)
# A tibble: 6 x 3
# Groups: Chemical [?]
# Chemical Year Percentage
# <fct> <dbl> <dbl>
# 1 A 2016 100
# 2 A 2017 50
# 3 B 2016 50
# 4 B 2017 100
# 5 C 2016 50
# 6 C 2017 0
使用 tidyverse
和 reshape2
你可以:
df %>%
mutate(date = str_sub(as.character(date), 7, 10)) %>% #Taking out the year from "date"
group_by(date, Chemical) %>% #Group by "date" and "Chemical"
summarise(temp = length(concentration[concentration > limit])/n()*100) %>% #Applying the condition
dcast(date~Chemical, value.var = "temp") #Reshaping the data
date A B C
1 2016 100 50 50
2 2017 50 100 0
或者仅 tidyverse
使用 spread()
df %>%
mutate(date = str_sub(as.character(date), 7, 10)) %>% #Taking out the year from "date"
group_by(date, Chemical) %>% #Group by "date" and "Chemical"
summarise(temp = length(concentration[concentration > limit])/n()*100) %>% #Applying the condition
spread(Chemical, temp, fill = 0) #Reshaping the data
我的数据
Chemical date concentration limit
A 01-01-2016 0.2 0.01
A 01-02-2016 0.2 0.01
A 01-01-2017 0.005 0.01
A 01-02-2017 0.2 0.01
B 01-01-2016 0.3 0.1
B 01-02-2016 0.05 0.1
B 01-01-2017 0.2 0.1
B 01-02-2017 0.2 0.1
C 01-01-2016 1.2 1
C 01-02-2016 0.8 1
C 01-01-2017 0.9 1
C 01-02-2017 0.9 1
我想显示每年超过限值的每种化学品的百分比(注意每个限值都不同)。所以我想得到这样的东西
Year A B C
2016 100% 50% 50%
2017 50% 100% 0
我已经有了计算每种化学物质每年超过次数的代码,但是在计算百分比时我弄错了。
这个我得算次数了
library(tidyverse)
counts<- data %>%
group_by(Chemical, grp = format(date, format = '%Y')) %>%
mutate(exceed = concentration >= limit) %>% # TRUE/FALSE
summarise(tot_exceed = sum(exceed)) %>% # count each T/F
spread(Chemical, tot_exceed, fill = 0)
所以我明白了
Year A B C
2016 2 1 1
2017 1 2 0
对于百分比,我试过了。
percentage_exceed<- data %>%
group_by(Chemical, grp = format(date, format = '%Y')) %>%
mutate(exceed = concentration >= limit, countconc = length(concentration))
%>%
summarise(percent = (sum(exceed)/countconc)*100) %>%
spread(Chemical, percent, fill = 0)
但是我没有得到我想要的结果。你能帮帮我吗?
与tidyverse
:
library(tidyverse)
library(lubridate)
data %>%
mutate(yr=mdy(date) %>% year) %>%
group_by(Chemical,yr) %>%
mutate(exceed = ifelse(concentration>=limit,1,0 )) %>%
summarise(tot_exceed =sum(exceed)) %>%
group_by(Chemical) %>%
mutate(proc=tot_exceed/max(tot_exceed)*100) %>%
select(-tot_exceed) %>%
spread(Chemical,proc)
# A tibble: 2 x 4
yr A B C
<dbl> <dbl> <dbl> <dbl>
1 2016 100 50 100
2 2017 50 100 0
dt = read.table(text = "
Chemical date concentration limit
A 01-01-2016 0.2 0.01
A 01-02-2016 0.2 0.01
A 01-01-2017 0.005 0.01
A 01-02-2017 0.2 0.01
B 01-01-2016 0.3 0.1
B 01-02-2016 0.05 0.1
B 01-01-2017 0.2 0.1
B 01-02-2017 0.2 0.1
C 01-01-2016 1.2 1
C 01-02-2016 0.8 1
C 01-01-2017 0.9 1
C 01-02-2017 0.9 1
", header=T)
library(tidyverse)
library(lubridate)
dt %>%
mutate(year = year(dmy(date))) %>%
group_by(year, Chemical) %>%
summarise(Total = n(),
Num_exceed = sum(concentration >= limit)) %>%
ungroup() %>%
mutate(Prc = paste0(Num_exceed / Total * 100,"%")) %>%
select(year, Chemical, Prc) %>%
spread(Chemical, Prc)
# # A tibble: 2 x 4
# year A B C
# <dbl> <chr> <chr> <chr>
# 1 2016 100% 50% 50%
# 2 2017 50% 100% 0%
你的方法非常好,你只需要用mean
替换sum
并乘以100:
data %>% group_by(Chemical, grp = format(date, format = '%Y')) %>%
mutate(exceed = concentration >= limit) %>%
summarise(tot_exceed = mean(exceed) * 100) %>%
spread(Chemical, tot_exceed, fill = 0)
# A tibble: 2 x 4
# grp A B C
# <chr> <dbl> <dbl> <dbl>
# 1 2016 100 50 50
# 2 2017 50 100 0
您尝试的线路
summarise(percent = (sum(exceed)/countconc) * 100)
几乎做到了这一点:错误是关于 countconc
是整个列而不是单个值(汇总所需)这一事实。因此,由于无论如何它在每个组中都是 constant 列,因此您可以编写,例如,
summarise(percent = (sum(exceed)/countconc[1]) * 100)
但是考虑到之前的那一行,
mutate(exceed = concentration >= limit, countconc = length(concentration))
归根结底只是一个意思,所以我们回到我回答开头的代码。
另请注意,使用 lubridate
您可以将第一行写为
data %>% group_by(Chemical, Year = year(date)) %>%
一些非常简洁但可能不是您想要的格式
data %>% group_by(Chemical, Year = year(date)) %>%
summarise(Percentage = mean(concentration > limit) * 100)
# A tibble: 6 x 3
# Groups: Chemical [?]
# Chemical Year Percentage
# <fct> <dbl> <dbl>
# 1 A 2016 100
# 2 A 2017 50
# 3 B 2016 50
# 4 B 2017 100
# 5 C 2016 50
# 6 C 2017 0
使用 tidyverse
和 reshape2
你可以:
df %>%
mutate(date = str_sub(as.character(date), 7, 10)) %>% #Taking out the year from "date"
group_by(date, Chemical) %>% #Group by "date" and "Chemical"
summarise(temp = length(concentration[concentration > limit])/n()*100) %>% #Applying the condition
dcast(date~Chemical, value.var = "temp") #Reshaping the data
date A B C
1 2016 100 50 50
2 2017 50 100 0
或者仅 tidyverse
使用 spread()
df %>%
mutate(date = str_sub(as.character(date), 7, 10)) %>% #Taking out the year from "date"
group_by(date, Chemical) %>% #Group by "date" and "Chemical"
summarise(temp = length(concentration[concentration > limit])/n()*100) %>% #Applying the condition
spread(Chemical, temp, fill = 0) #Reshaping the data