如何根据数据框单元格名称对 PCA 图进行颜色编码?
How to colour code a PCA plot based on the data frame cell names?
data.matrix <- matrix(nrow=100, ncol=10)
colnames(data.matrix) <- c(
paste("wt", 1:5, sep=""),
paste("ko", 1:5, sep=""))
rownames(data.matrix) <- paste("gene", 1:100, sep="")
for (i in 1:100) {
wt.values <- rpois(5, lambda=sample(x=10:1000, size=1))
ko.values <- rpois(5, lambda=sample(x=10:1000, size=1))
data.matrix[i,] <- c(wt.values, ko.values)
}
head(data.matrix)
dim(data.matrix)
pca <- prcomp(t(data.matrix), scale=TRUE)
intall.packages("ggplot2")
library(ggplot2)
pca.data <- data.frame(Sample=rownames(pca$x),
X=pca$x[,1],
Y=pca$x[,2])
pca.data
ggplot(data=pca.data, aes(x=X, y=Y, label=Sample)) +
geom_text() +
xlab(paste("PC1 - ", pca.var.per[1], "%", sep="")) +
ylab(paste("PC2 - ", pca.var.per[2], "%", sep="")) +
theme_bw() +
ggtitle("My PCA Graph")
上面的代码给出了这个最终的主成分分析图:
如何将wt点和ko点变成彩色点? (即,将所有“wt”点更改为蓝点,将所有“ko”点更改为红点)
编辑:问题在我最初的回答后发生了变化,更新后的答案见底部。
你可以用substr()
得到Sample
的第二个字符,然后把它传给col
。这是一个例子:
library(ggplot2)
library(dplyr)
example_data <- data.frame(
Sample = c("A1.1", "H2.1", "F2.1", "B1.1", "C1.1", "S2.1", "J2.1", "K1.1"),
X = rnorm(n = 8),
Y = rnorm(n = 8)
)
example_data %>%
mutate(prop = substr(Sample, 2, 2)) %>% # Make a new column with the values
ggplot(aes(x = X, y = Y, label = Sample, col = prop)) +
geom_text() +
xlab(paste("PC1 - ")) +
xlab(paste("PC2 - ")) +
scale_color_manual(values = c("1" = "blue", "2" = "red")) + # Assigns colour to values
theme_bw() +
theme(legend.position = "none") # Removes legend
由 reprex package (v0.3.0)
于 2021-06-07 创建
对于已编辑的问题,这里是如何获取分数而不是文本。只需将 geom_text()
换成 geom_point()
.
example_data %>%
mutate(prop = substr(Sample, 2, 2)) %>% # This is where the magic happens
ggplot(aes(x = X, y = Y, label = Sample, col = prop)) +
# geom_text() +
geom_point() + # Will add points
xlab(paste("PC1 - ")) +
ylab(paste("PC2 - ")) +
scale_color_manual(values = c("1" = "blue", "2" = "red")) +
theme_bw() +
theme(legend.position = "none")
由 reprex package (v0.3.0)
于 2021-06-07 创建
data.matrix <- matrix(nrow=100, ncol=10)
colnames(data.matrix) <- c(
paste("wt", 1:5, sep=""),
paste("ko", 1:5, sep=""))
rownames(data.matrix) <- paste("gene", 1:100, sep="")
for (i in 1:100) {
wt.values <- rpois(5, lambda=sample(x=10:1000, size=1))
ko.values <- rpois(5, lambda=sample(x=10:1000, size=1))
data.matrix[i,] <- c(wt.values, ko.values)
}
head(data.matrix)
dim(data.matrix)
pca <- prcomp(t(data.matrix), scale=TRUE)
intall.packages("ggplot2")
library(ggplot2)
pca.data <- data.frame(Sample=rownames(pca$x),
X=pca$x[,1],
Y=pca$x[,2])
pca.data
ggplot(data=pca.data, aes(x=X, y=Y, label=Sample)) +
geom_text() +
xlab(paste("PC1 - ", pca.var.per[1], "%", sep="")) +
ylab(paste("PC2 - ", pca.var.per[2], "%", sep="")) +
theme_bw() +
ggtitle("My PCA Graph")
上面的代码给出了这个最终的主成分分析图:
如何将wt点和ko点变成彩色点? (即,将所有“wt”点更改为蓝点,将所有“ko”点更改为红点)
编辑:问题在我最初的回答后发生了变化,更新后的答案见底部。
你可以用substr()
得到Sample
的第二个字符,然后把它传给col
。这是一个例子:
library(ggplot2)
library(dplyr)
example_data <- data.frame(
Sample = c("A1.1", "H2.1", "F2.1", "B1.1", "C1.1", "S2.1", "J2.1", "K1.1"),
X = rnorm(n = 8),
Y = rnorm(n = 8)
)
example_data %>%
mutate(prop = substr(Sample, 2, 2)) %>% # Make a new column with the values
ggplot(aes(x = X, y = Y, label = Sample, col = prop)) +
geom_text() +
xlab(paste("PC1 - ")) +
xlab(paste("PC2 - ")) +
scale_color_manual(values = c("1" = "blue", "2" = "red")) + # Assigns colour to values
theme_bw() +
theme(legend.position = "none") # Removes legend
由 reprex package (v0.3.0)
于 2021-06-07 创建对于已编辑的问题,这里是如何获取分数而不是文本。只需将 geom_text()
换成 geom_point()
.
example_data %>%
mutate(prop = substr(Sample, 2, 2)) %>% # This is where the magic happens
ggplot(aes(x = X, y = Y, label = Sample, col = prop)) +
# geom_text() +
geom_point() + # Will add points
xlab(paste("PC1 - ")) +
ylab(paste("PC2 - ")) +
scale_color_manual(values = c("1" = "blue", "2" = "red")) +
theme_bw() +
theme(legend.position = "none")
由 reprex package (v0.3.0)
于 2021-06-07 创建