从现有对计算几个新变量并根据 R 中的其他变量标准化新变量值
Calculating several new variables from existing pairs and standardising new variable values against other variables in R
我想从变量对中创建新的 [word]_c 变量,从 variable_a 中减去 variable_b,但是因为有 50 对,所以能够这样做而不必写出每个名字。
一旦我有了 [word]_c 列,我想标准化 [word]_c 和 V[word]Q.[number] 列,以便可以比较它们。我知道每个 [word]_a 和 [word]_b 列都是 1-100 的数字,每个 V[word]Q.[number] 列是 1-9 的数字。
例如,从:
Word_b Word_a Six_b Six_a Flute_b Flute_a VWordQ.13 VSixQ.22 VFluteQ.7
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
60 1 1 30 1 1 6.53 5.14 6.68
70 10 3 50 50 10 NA NA 5.60
51 31 1 48 52 1 5.60 5.95 NA
对此(加上 V 变量):
Word_b Word_a Word_c Six_b Six_a Six_c Flute_b Flute_a Flute_c ...
60 1 -50 1 30 29 1 1 0 ...
70 10 -60 3 50 47 50 10 -40 ...
51 31 -20 1 48 47 52 1 -51 ...
...然后仅标准化 _c 和 V 列。
(列的顺序对我来说并不重要)
示例数据:
structure(list(Word_b = c("60", "70", "51", "73", "13",
"60", "30"), Word_a = c("1", "10", "31", "30", "22", "5",
"30"), Six_b = c("1", "3", "1", "0", "0", "0", "40"), Six_a = c("30",
"50", "48", "41", "35", "0", "65"), Flute_b = c("1", "50",
"52", "50", "45", "80", "30"), Flute_a = c("1", "10", "1",
"0", "0", "0", "3"), VWordQ.13 = c(6.53, NA, 5.6, 5.6, 5.21,
5.44, 6), VSixQ.22 = c(5.14, NA, 5.95, 3.25, 3.24, 3, 3),
VFluteQ.7 = c(6.68, NA, 5.6, 6.68, 6.92, NA, 6.68)), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
第一部分任务完成。
library(tidyverse)
df = structure(list(Word_b = c("60", "70", "51", "73", "13",
"60", "30"), Word_a = c("1", "10", "31", "30", "22", "5",
"30"), Six_b = c("1", "3", "1", "0", "0", "0", "40"), Six_a = c("30",
"50", "48", "41", "35", "0", "65"), Flute_b = c("1", "50",
"52", "50", "45", "80", "30"), Flute_a = c("1", "10", "1",
"0", "0", "0", "3"), VWordQ.13 = c(6.53, NA, 5.6, 5.6, 5.21,
5.44, 6), VSixQ.22 = c(5.14, NA, 5.95, 3.25, 3.24, 3, 3),
VFluteQ.7 = c(6.68, NA, 5.6, 6.68, 6.92, NA, 6.68)), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
df = df %>% type.convert(as.is = TRUE)
for(name in names(df) %>% str_match("(^.*)_([a,b])") %>% .[,2] %>% .[!is.na(.)] %>% unique()){
df=df %>% mutate(!!as.name(paste0(name,"_c")) :=
!!as.name(paste0(name,"_a")) -
!!as.name(paste0(name,"_b")))
}
df
输出
# A tibble: 7 x 12
Word_b Word_a Six_b Six_a Flute_b Flute_a VWordQ.13 VSixQ.22 VFluteQ.7 Word_c Six_c Flute_c
<int> <int> <int> <int> <int> <int> <dbl> <dbl> <dbl> <int> <int> <int>
1 60 1 1 30 1 1 6.53 5.14 6.68 -59 29 0
2 70 10 3 50 50 10 NA NA NA -60 47 -40
3 51 31 1 48 52 1 5.6 5.95 5.6 -20 47 -51
4 73 30 0 41 50 0 5.6 3.25 6.68 -43 41 -50
5 13 22 0 35 45 0 5.21 3.24 6.92 9 35 -45
6 60 5 0 0 80 0 5.44 3 NA -55 0 -80
7 30 30 40 65 30 3 6 3 6.68 0 25 -27
但我不明白这是什么意思只对 _c 和 V 列进行标准化。
小更新
可以这样做
for(name in names(df) %>% str_match("(^.*)_([a,b])") %>% .[,2] %>% .[!is.na(.)] %>% unique()){
df=df %>% mutate(!!as.name(paste0(name,"_c")) := scale(
!!as.name(paste0(name,"_a")) -
!!as.name(paste0(name,"_b")))[,1])
}
df %>% mutate_at(vars(contains(".")), ~scale(.x)[,1])
输出
# A tibble: 7 x 12
Word_b Word_a Six_b Six_a Flute_b Flute_a VWordQ.13 VSixQ.22 VFluteQ.7 Word_c Six_c Flute_c
<int> <int> <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 60 1 1 30 1 1 1.70 0.944 0.323 -0.915 -0.182 1.71
2 70 10 3 50 50 10 NA NA NA -0.949 0.912 0.0759
3 51 31 1 48 52 1 -0.277 1.58 -1.75 0.435 0.912 -0.374
4 73 30 0 41 50 0 -0.277 -0.531 0.323 -0.361 0.547 -0.333
5 13 22 0 35 45 0 -1.11 -0.538 0.784 1.44 0.182 -0.128
6 60 5 0 0 80 0 -0.618 -0.726 NA -0.776 -1.95 -1.56
7 30 30 40 65 30 3 0.575 -0.726 0.323 1.13 -0.426 0.607
希望这就是你的意思。
我想从变量对中创建新的 [word]_c 变量,从 variable_a 中减去 variable_b,但是因为有 50 对,所以能够这样做而不必写出每个名字。
一旦我有了 [word]_c 列,我想标准化 [word]_c 和 V[word]Q.[number] 列,以便可以比较它们。我知道每个 [word]_a 和 [word]_b 列都是 1-100 的数字,每个 V[word]Q.[number] 列是 1-9 的数字。
例如,从:
Word_b Word_a Six_b Six_a Flute_b Flute_a VWordQ.13 VSixQ.22 VFluteQ.7
<chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
60 1 1 30 1 1 6.53 5.14 6.68
70 10 3 50 50 10 NA NA 5.60
51 31 1 48 52 1 5.60 5.95 NA
对此(加上 V 变量):
Word_b Word_a Word_c Six_b Six_a Six_c Flute_b Flute_a Flute_c ...
60 1 -50 1 30 29 1 1 0 ...
70 10 -60 3 50 47 50 10 -40 ...
51 31 -20 1 48 47 52 1 -51 ...
...然后仅标准化 _c 和 V 列。
(列的顺序对我来说并不重要)
示例数据:
structure(list(Word_b = c("60", "70", "51", "73", "13",
"60", "30"), Word_a = c("1", "10", "31", "30", "22", "5",
"30"), Six_b = c("1", "3", "1", "0", "0", "0", "40"), Six_a = c("30",
"50", "48", "41", "35", "0", "65"), Flute_b = c("1", "50",
"52", "50", "45", "80", "30"), Flute_a = c("1", "10", "1",
"0", "0", "0", "3"), VWordQ.13 = c(6.53, NA, 5.6, 5.6, 5.21,
5.44, 6), VSixQ.22 = c(5.14, NA, 5.95, 3.25, 3.24, 3, 3),
VFluteQ.7 = c(6.68, NA, 5.6, 6.68, 6.92, NA, 6.68)), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
第一部分任务完成。
library(tidyverse)
df = structure(list(Word_b = c("60", "70", "51", "73", "13",
"60", "30"), Word_a = c("1", "10", "31", "30", "22", "5",
"30"), Six_b = c("1", "3", "1", "0", "0", "0", "40"), Six_a = c("30",
"50", "48", "41", "35", "0", "65"), Flute_b = c("1", "50",
"52", "50", "45", "80", "30"), Flute_a = c("1", "10", "1",
"0", "0", "0", "3"), VWordQ.13 = c(6.53, NA, 5.6, 5.6, 5.21,
5.44, 6), VSixQ.22 = c(5.14, NA, 5.95, 3.25, 3.24, 3, 3),
VFluteQ.7 = c(6.68, NA, 5.6, 6.68, 6.92, NA, 6.68)), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
df = df %>% type.convert(as.is = TRUE)
for(name in names(df) %>% str_match("(^.*)_([a,b])") %>% .[,2] %>% .[!is.na(.)] %>% unique()){
df=df %>% mutate(!!as.name(paste0(name,"_c")) :=
!!as.name(paste0(name,"_a")) -
!!as.name(paste0(name,"_b")))
}
df
输出
# A tibble: 7 x 12
Word_b Word_a Six_b Six_a Flute_b Flute_a VWordQ.13 VSixQ.22 VFluteQ.7 Word_c Six_c Flute_c
<int> <int> <int> <int> <int> <int> <dbl> <dbl> <dbl> <int> <int> <int>
1 60 1 1 30 1 1 6.53 5.14 6.68 -59 29 0
2 70 10 3 50 50 10 NA NA NA -60 47 -40
3 51 31 1 48 52 1 5.6 5.95 5.6 -20 47 -51
4 73 30 0 41 50 0 5.6 3.25 6.68 -43 41 -50
5 13 22 0 35 45 0 5.21 3.24 6.92 9 35 -45
6 60 5 0 0 80 0 5.44 3 NA -55 0 -80
7 30 30 40 65 30 3 6 3 6.68 0 25 -27
但我不明白这是什么意思只对 _c 和 V 列进行标准化。
小更新
可以这样做
for(name in names(df) %>% str_match("(^.*)_([a,b])") %>% .[,2] %>% .[!is.na(.)] %>% unique()){
df=df %>% mutate(!!as.name(paste0(name,"_c")) := scale(
!!as.name(paste0(name,"_a")) -
!!as.name(paste0(name,"_b")))[,1])
}
df %>% mutate_at(vars(contains(".")), ~scale(.x)[,1])
输出
# A tibble: 7 x 12
Word_b Word_a Six_b Six_a Flute_b Flute_a VWordQ.13 VSixQ.22 VFluteQ.7 Word_c Six_c Flute_c
<int> <int> <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 60 1 1 30 1 1 1.70 0.944 0.323 -0.915 -0.182 1.71
2 70 10 3 50 50 10 NA NA NA -0.949 0.912 0.0759
3 51 31 1 48 52 1 -0.277 1.58 -1.75 0.435 0.912 -0.374
4 73 30 0 41 50 0 -0.277 -0.531 0.323 -0.361 0.547 -0.333
5 13 22 0 35 45 0 -1.11 -0.538 0.784 1.44 0.182 -0.128
6 60 5 0 0 80 0 -0.618 -0.726 NA -0.776 -1.95 -1.56
7 30 30 40 65 30 3 0.575 -0.726 0.323 1.13 -0.426 0.607
希望这就是你的意思。