根据特定列更改 Dataframe 中的值
Changing Values in Dataframe based on Specific Columns
我目前有一个dataframe consisting of lower and upper bounds of confidence intervals
and point estimates. I want to create a new dataframe so I can plot the variables where point estimates are not equal to zero. For each value of n
, is it possible to change the values of the upper and lower bounds to na如果点估计值是0
?例如,在 n = 205
时提供的数据框中, y_pe = 0
所以我想将相应的 y_lo
和 y_up
更改为 NA
.
X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939, 1.317839 ,11.891103, 15.165398, 1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000, 0.000000 , 0.000000 , 0.000000, 0.000000)
test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)
提前致谢。
在i
中指定逻辑向量,在j
中指定列名,然后将'y_pe'为0的列赋值给NA
test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA
如果数据是data.frame
,想应用到除第一列('n')以外的所有列组,那么我们可以split
成一组data.frames 并进行替换
nm1 <- sub("_.*", "", colnames(test.df)[-1])
out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
function(x) {
i1 <- endsWith(names(x), "pe")
x[!x[,i1], !i1] <- NA
x})))
out
# x_lo x_up x_pe X1_lo X1_up X1_pe y_lo y_up y_pe
#1 NA NA 0 NA NA 0 -24.71177 14.07412 -2.984101
#2 NA NA 0 NA NA 0 -25.13779 14.05121 -2.867680
#3 NA NA 0 NA NA 0 -16.19142 13.41795 -2.695838
#4 NA NA 0 NA NA 0 -15.63819 12.18732 -2.583140
#5 NA NA 0 NA NA 0 -15.42051 13.60202 -2.416878
#6 NA NA 0 NA NA 0 NA NA 0.000000
#7 NA NA 0 NA NA 0 NA NA 0.000000
#8 NA NA 0 NA NA 0 NA NA 0.000000
#9 NA NA 0 NA NA 0 NA NA 0.000000
#10 NA NA 0 NA NA 0 NA NA 0.000000
test.df[names(out)] <- out
test.df
# n X1_lo X1_up x_lo x_up y_lo y_up X1_pe x_pe y_pe
#1 205 NA NA NA NA -24.71177 14.07412 0 0 -2.984101
#2 205 NA NA NA NA -25.13779 14.05121 0 0 -2.867680
#3 205 NA NA NA NA -16.19142 13.41795 0 0 -2.695838
#4 205 NA NA NA NA -15.63819 12.18732 0 0 -2.583140
#5 205 NA NA NA NA -15.42051 13.60202 0 0 -2.416878
#6 205 NA NA NA NA NA NA 0 0 0.000000
#7 205 NA NA NA NA NA NA 0 0 0.000000
#8 205 NA NA NA NA NA NA 0 0 0.000000
#9 205 NA NA NA NA NA NA 0 0 0.000000
#10 205 NA NA NA NA NA NA 0 0 0.000000
或使用tidyverse
library(dplyr)
library(tidyr)
library(stringr)
test.df %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -c(n, rn), names_sep="_",
names_to = c("group", ".value")) %>%
mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
names_repair = ~ str_replace(., "(.*)_(.*)", "\2_\1")) %>%
select(-rn)
# A tibble: 10 x 10
# n X1_lo x_lo y_lo X1_pe x_pe y_pe X1_up x_up y_up
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 205 NA NA -24.7 0 0 -2.98 NA NA 14.1
# 2 205 NA NA -25.1 0 0 -2.87 NA NA 14.1
# 3 205 NA NA -16.2 0 0 -2.70 NA NA 13.4
# 4 205 NA NA -15.6 0 0 -2.58 NA NA 12.2
# 5 205 NA NA -15.4 0 0 -2.42 NA NA 13.6
# 6 205 NA NA NA 0 0 0 NA NA NA
# 7 205 NA NA NA 0 0 0 NA NA NA
# 8 205 NA NA NA 0 0 0 NA NA NA
# 9 205 NA NA NA 0 0 0 NA NA NA
#10 205 NA NA NA 0 0 0 NA NA NA
数据
test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)
我更喜欢 akrun 的回答,但这里有一个使用 tidyr 的替代方法 mutate
:
library(tidyr)
test.df %>% mutate(y_up = ifelse(y_pe == 0, NA, y_up),
y_lo = ifelse(y_pe == 0, NA, y_lo))
我目前有一个dataframe consisting of lower and upper bounds of confidence intervals
and point estimates. I want to create a new dataframe so I can plot the variables where point estimates are not equal to zero. For each value of n
, is it possible to change the values of the upper and lower bounds to na如果点估计值是0
?例如,在 n = 205
时提供的数据框中, y_pe = 0
所以我想将相应的 y_lo
和 y_up
更改为 NA
.
X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939, 1.317839 ,11.891103, 15.165398, 1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000, 0.000000 , 0.000000 , 0.000000, 0.000000)
test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)
提前致谢。
在i
中指定逻辑向量,在j
中指定列名,然后将'y_pe'为0的列赋值给NA
test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA
如果数据是data.frame
,想应用到除第一列('n')以外的所有列组,那么我们可以split
成一组data.frames 并进行替换
nm1 <- sub("_.*", "", colnames(test.df)[-1])
out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
function(x) {
i1 <- endsWith(names(x), "pe")
x[!x[,i1], !i1] <- NA
x})))
out
# x_lo x_up x_pe X1_lo X1_up X1_pe y_lo y_up y_pe
#1 NA NA 0 NA NA 0 -24.71177 14.07412 -2.984101
#2 NA NA 0 NA NA 0 -25.13779 14.05121 -2.867680
#3 NA NA 0 NA NA 0 -16.19142 13.41795 -2.695838
#4 NA NA 0 NA NA 0 -15.63819 12.18732 -2.583140
#5 NA NA 0 NA NA 0 -15.42051 13.60202 -2.416878
#6 NA NA 0 NA NA 0 NA NA 0.000000
#7 NA NA 0 NA NA 0 NA NA 0.000000
#8 NA NA 0 NA NA 0 NA NA 0.000000
#9 NA NA 0 NA NA 0 NA NA 0.000000
#10 NA NA 0 NA NA 0 NA NA 0.000000
test.df[names(out)] <- out
test.df
# n X1_lo X1_up x_lo x_up y_lo y_up X1_pe x_pe y_pe
#1 205 NA NA NA NA -24.71177 14.07412 0 0 -2.984101
#2 205 NA NA NA NA -25.13779 14.05121 0 0 -2.867680
#3 205 NA NA NA NA -16.19142 13.41795 0 0 -2.695838
#4 205 NA NA NA NA -15.63819 12.18732 0 0 -2.583140
#5 205 NA NA NA NA -15.42051 13.60202 0 0 -2.416878
#6 205 NA NA NA NA NA NA 0 0 0.000000
#7 205 NA NA NA NA NA NA 0 0 0.000000
#8 205 NA NA NA NA NA NA 0 0 0.000000
#9 205 NA NA NA NA NA NA 0 0 0.000000
#10 205 NA NA NA NA NA NA 0 0 0.000000
或使用tidyverse
library(dplyr)
library(tidyr)
library(stringr)
test.df %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -c(n, rn), names_sep="_",
names_to = c("group", ".value")) %>%
mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
names_repair = ~ str_replace(., "(.*)_(.*)", "\2_\1")) %>%
select(-rn)
# A tibble: 10 x 10
# n X1_lo x_lo y_lo X1_pe x_pe y_pe X1_up x_up y_up
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 205 NA NA -24.7 0 0 -2.98 NA NA 14.1
# 2 205 NA NA -25.1 0 0 -2.87 NA NA 14.1
# 3 205 NA NA -16.2 0 0 -2.70 NA NA 13.4
# 4 205 NA NA -15.6 0 0 -2.58 NA NA 12.2
# 5 205 NA NA -15.4 0 0 -2.42 NA NA 13.6
# 6 205 NA NA NA 0 0 0 NA NA NA
# 7 205 NA NA NA 0 0 0 NA NA NA
# 8 205 NA NA NA 0 0 0 NA NA NA
# 9 205 NA NA NA 0 0 0 NA NA NA
#10 205 NA NA NA 0 0 0 NA NA NA
数据
test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)
我更喜欢 akrun 的回答,但这里有一个使用 tidyr 的替代方法 mutate
:
library(tidyr)
test.df %>% mutate(y_up = ifelse(y_pe == 0, NA, y_up),
y_lo = ifelse(y_pe == 0, NA, y_lo))