运行 按组对数据帧进行校正
Run corrplot to a data frame by group
我有一个数据框,其中包含代表定量变量和一个定性(组)的列。
数据框与这个结构相同:
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
我想按组对数据应用 corrplot 函数(来自 corrplot 包)。
有人能帮帮我吗?
我尝试按照 user20650 下面的建议进行操作,结果如下:
这是我的数据框的尾部:
structure(list(group = structure(c(4L, 4L, 4L, 4L, 4L, 4L), .Label = c("brooksi",
"copianullum", "fulbrighti", "paratrygonyi"), class = "factor"),
total_length = c(17, 25, 15, 9, 22, 25), max_w = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), n_prog = c(NA, NA, NA, NA, 482L, 432L), ceph_pedun_L = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), bothrid_L = c(NA, 870, NA, NA, NA, NA), bothrid_W = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), n_loculi = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), n_transv_septa = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), stalk_L = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), stalk_W = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), prog_max_W = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), term_seg_L = c(500L,
NA, 400L, 420L, NA, NA), term_seg_L.1 = c(360L, NA, 220L,
230L, NA, NA), ratio_term_seg = c(1.39, NA, 1.82, 1.83, NA,
NA), term_seg_SA = c(1800, NA, 880, 966, NA, NA), pore_pst_mrgn = c(360L,
NA, 260L, 300L, NA, NA), percent_.prog_L = c(72L, NA, 65L,
71L, NA, NA), n_progl_LgrW = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), n_mat_segs = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), n_testes = c(NA, 6L, 6L, 5L, NA, NA), testes_L = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), testes_W = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), length_tst_field = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), term_c_sac_L = c(150L,
NA, 105L, 125L, NA, NA), term_c_sac_W = c(125L, NA, 75L,
95L, NA, NA), ovary_L = c(255L, NA, 140L, 135L, NA, NA),
Ov_ratio_prog = c(51, NA, 35, 32.1, NA, NA), OV_max_W = c(240,
NA, 125, 140, NA, NA)), .Names = c("group", "total_length",
"max_w", "n_prog", "ceph_pedun_L", "bothrid_L", "bothrid_W",
"n_loculi", "n_transv_septa", "stalk_L", "stalk_W", "prog_max_W",
"term_seg_L", "term_seg_L.1", "ratio_term_seg", "term_seg_SA",
"pore_pst_mrgn", "percent_.prog_L", "n_progl_LgrW", "n_mat_segs",
"n_testes", "testes_L", "testes_W", "length_tst_field", "term_c_sac_L",
"term_c_sac_W", "ovary_L", "Ov_ratio_prog", "OV_max_W"), row.names = 563:568, class = "data.frame")
我试着用这段代码按照你说的做:
for(i in unique(data$group)) {
corrplot(cor(data[data$group==i, -match("group", names(data))]))
}
但是我得到了这个错误:
Error in if (min(corr) < -1 - .Machine$double.eps || max(corr) > 1 + .Machine$double.eps) { :
missing value where TRUE/FALSE needed
升级评论
您需要计算每个分组变量的定量变量之间的相关性,然后对每个变量应用corrplot
。
使用 iris
数据集
par(mfrow=c(3,1))
# loop through the grouping variable
for(i in unique(iris$Species)) {
corrplot(cor(iris[iris$Species==i, -match("Species", names(iris))]))
}
iris$Species==i
对每个分组变量的数据行进行子集化,-match("Species", names(iris))
删除分组变量列,因此不包括在相关性计算中。
我有一个数据框,其中包含代表定量变量和一个定性(组)的列。
数据框与这个结构相同:
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
我想按组对数据应用 corrplot 函数(来自 corrplot 包)。
有人能帮帮我吗?
我尝试按照 user20650 下面的建议进行操作,结果如下:
这是我的数据框的尾部:
structure(list(group = structure(c(4L, 4L, 4L, 4L, 4L, 4L), .Label = c("brooksi",
"copianullum", "fulbrighti", "paratrygonyi"), class = "factor"),
total_length = c(17, 25, 15, 9, 22, 25), max_w = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), n_prog = c(NA, NA, NA, NA, 482L, 432L), ceph_pedun_L = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), bothrid_L = c(NA, 870, NA, NA, NA, NA), bothrid_W = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), n_loculi = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), n_transv_septa = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), stalk_L = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), stalk_W = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), prog_max_W = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), term_seg_L = c(500L,
NA, 400L, 420L, NA, NA), term_seg_L.1 = c(360L, NA, 220L,
230L, NA, NA), ratio_term_seg = c(1.39, NA, 1.82, 1.83, NA,
NA), term_seg_SA = c(1800, NA, 880, 966, NA, NA), pore_pst_mrgn = c(360L,
NA, 260L, 300L, NA, NA), percent_.prog_L = c(72L, NA, 65L,
71L, NA, NA), n_progl_LgrW = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), n_mat_segs = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), n_testes = c(NA, 6L, 6L, 5L, NA, NA), testes_L = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), testes_W = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), length_tst_field = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), term_c_sac_L = c(150L,
NA, 105L, 125L, NA, NA), term_c_sac_W = c(125L, NA, 75L,
95L, NA, NA), ovary_L = c(255L, NA, 140L, 135L, NA, NA),
Ov_ratio_prog = c(51, NA, 35, 32.1, NA, NA), OV_max_W = c(240,
NA, 125, 140, NA, NA)), .Names = c("group", "total_length",
"max_w", "n_prog", "ceph_pedun_L", "bothrid_L", "bothrid_W",
"n_loculi", "n_transv_septa", "stalk_L", "stalk_W", "prog_max_W",
"term_seg_L", "term_seg_L.1", "ratio_term_seg", "term_seg_SA",
"pore_pst_mrgn", "percent_.prog_L", "n_progl_LgrW", "n_mat_segs",
"n_testes", "testes_L", "testes_W", "length_tst_field", "term_c_sac_L",
"term_c_sac_W", "ovary_L", "Ov_ratio_prog", "OV_max_W"), row.names = 563:568, class = "data.frame")
我试着用这段代码按照你说的做:
for(i in unique(data$group)) {
corrplot(cor(data[data$group==i, -match("group", names(data))]))
}
但是我得到了这个错误:
Error in if (min(corr) < -1 - .Machine$double.eps || max(corr) > 1 + .Machine$double.eps) { :
missing value where TRUE/FALSE needed
升级评论
您需要计算每个分组变量的定量变量之间的相关性,然后对每个变量应用corrplot
。
使用 iris
数据集
par(mfrow=c(3,1))
# loop through the grouping variable
for(i in unique(iris$Species)) {
corrplot(cor(iris[iris$Species==i, -match("Species", names(iris))]))
}
iris$Species==i
对每个分组变量的数据行进行子集化,-match("Species", names(iris))
删除分组变量列,因此不包括在相关性计算中。