根据特定列更改 Dataframe 中的值

Question

我目前有一个dataframe consisting of lower and upper bounds of confidence intervals and point estimates. I want to create a new dataframe so I can plot the variables where point estimates are not equal to zero. For each value of n, is it possible to change the values of the upper and lower bounds to na如果点估计值是0？例如，在 n = 205 时提供的数据框中， y_pe = 0 所以我想将相应的 y_lo 和 y_up 更改为 NA.

X1_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_up <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
X1_pe <-c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_lo <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_up <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
x_pe <- c(0 ,0 ,0 ,0 ,0 ,0 ,0 ,0, 0 ,0)
y_lo <- c(-24.71177, -25.13779, -16.19142, -15.63819 ,-15.42051 ,-16.11342, -17.10603 ,-18.00848 ,-19.59877, -12.91438)
y_up <- c(14.074116, 14.051209, 13.417954, 12.187319 ,13.602022, 12.943939,  1.317839 ,11.891103, 15.165398,  1.365459)
y_pe <- c(-2.984101, -2.867680, -2.695838 ,-2.583140, -2.416878 , 0.000000,  0.000000 , 0.000000 , 0.000000,  0.000000)


test.df <- cbind(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)

提前致谢。

Answer 1

在i中指定逻辑向量，在j中指定列名，然后将'y_pe'为0的列赋值给NA

test.df[test.df[, "y_pe"] == 0, c("y_lo", "y_up")] <- NA

如果数据是data.frame，想应用到除第一列（'n'）以外的所有列组，那么我们可以split成一组data.frames 并进行替换

nm1 <- sub("_.*", "", colnames(test.df)[-1])
out <- do.call(cbind, unname(lapply(split.default(test.df[-1], nm1),
       function(x) {
         i1 <- endsWith(names(x), "pe")
         x[!x[,i1], !i1] <- NA
        x})))
out
#   x_lo x_up x_pe X1_lo X1_up X1_pe      y_lo     y_up      y_pe
#1    NA   NA    0    NA    NA     0 -24.71177 14.07412 -2.984101
#2    NA   NA    0    NA    NA     0 -25.13779 14.05121 -2.867680
#3    NA   NA    0    NA    NA     0 -16.19142 13.41795 -2.695838
#4    NA   NA    0    NA    NA     0 -15.63819 12.18732 -2.583140
#5    NA   NA    0    NA    NA     0 -15.42051 13.60202 -2.416878
#6    NA   NA    0    NA    NA     0        NA       NA  0.000000
#7    NA   NA    0    NA    NA     0        NA       NA  0.000000
#8    NA   NA    0    NA    NA     0        NA       NA  0.000000
#9    NA   NA    0    NA    NA     0        NA       NA  0.000000
#10   NA   NA    0    NA    NA     0        NA       NA  0.000000

test.df[names(out)] <-  out
test.df
#     n X1_lo X1_up x_lo x_up      y_lo     y_up X1_pe x_pe      y_pe
#1  205    NA    NA   NA   NA -24.71177 14.07412     0    0 -2.984101
#2  205    NA    NA   NA   NA -25.13779 14.05121     0    0 -2.867680
#3  205    NA    NA   NA   NA -16.19142 13.41795     0    0 -2.695838
#4  205    NA    NA   NA   NA -15.63819 12.18732     0    0 -2.583140
#5  205    NA    NA   NA   NA -15.42051 13.60202     0    0 -2.416878
#6  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#7  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#8  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#9  205    NA    NA   NA   NA        NA       NA     0    0  0.000000
#10 205    NA    NA   NA   NA        NA       NA     0    0  0.000000

或使用tidyverse

library(dplyr)
library(tidyr)
library(stringr)
test.df %>%
   mutate(rn = row_number()) %>% 
   pivot_longer(cols = -c(n, rn), names_sep="_", 
        names_to = c("group", ".value")) %>%
   mutate_at(vars(lo, up), ~ replace(., pe == 0, NA)) %>%
   pivot_wider(names_from = "group", values_from = c('lo', 'up', 'pe'),
          names_repair = ~ str_replace(., "(.*)_(.*)", "\2_\1")) %>%
   select(-rn)
# A tibble: 10 x 10
#       n X1_lo  x_lo  y_lo X1_pe  x_pe  y_pe X1_up  x_up  y_up
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1   205    NA    NA -24.7     0     0 -2.98    NA    NA  14.1
# 2   205    NA    NA -25.1     0     0 -2.87    NA    NA  14.1
# 3   205    NA    NA -16.2     0     0 -2.70    NA    NA  13.4
# 4   205    NA    NA -15.6     0     0 -2.58    NA    NA  12.2
# 5   205    NA    NA -15.4     0     0 -2.42    NA    NA  13.6
# 6   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 7   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 8   205    NA    NA  NA       0     0  0       NA    NA  NA  
# 9   205    NA    NA  NA       0     0  0       NA    NA  NA  
#10   205    NA    NA  NA       0     0  0       NA    NA  NA

数据

test.df <- data.frame(n, X1_lo, X1_up, x_lo, x_up, y_lo, y_up, X1_pe, x_pe, y_pe)

Answer 2

我更喜欢 akrun 的回答，但这里有一个使用 tidyr 的替代方法 mutate:

library(tidyr)

test.df %>% mutate(y_up = ifelse(y_pe == 0, NA, y_up),
                   y_lo = ifelse(y_pe == 0, NA, y_lo))

根据特定列更改 Dataframe 中的值

Changing Values in Dataframe based on Specific Columns

r

tidy

dataframe

na

dplyr

数据