在数据框中查找与另一列 R 中的值差异最大的行
Find rows in dataframe with the biggest difference in values in another column, R
我正在尝试查找值(“操作”列)差异最大的两个名字(“名字”列)。有谁知道这是怎么做到的吗?提前致谢!
data = structure(list(`First Name` = c("Till", "Roland", "Otmar", "Christoph",
"Bianca"), Action = c(2, 1, 2, 1, 5), Reflection = c(6, 7, 6,
7, 3), Flexibility_Thinking = c(2, 3, 3, 1, 6), Structure = c(6,
4, 4, 7, 2)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
这是一种基本的 R 方法 -
#Get pairwise differences for all names
mat <- abs(outer(data$Action, data$Action, `-`))
#get the max difference
max_values <- apply(mat, 1, max)
#get the index where the max difference is present
max_index <- apply(mat, 1, which.max)
#Create a dataframe with first name, name of biggest difference person
#and the difference value
result <- cbind(data[1],
biggest_diff = data$`First Name`[max_index], diff = max_values)
result
# First Name biggest_diff diff
#1 Till Bianca 3
#2 Roland Bianca 4
#3 Otmar Bianca 3
#4 Christoph Bianca 4
#5 Bianca Roland 4
#get top 2 results
head(result[order(-result$diff), ], 2)
# First Name biggest_diff diff
#2 Roland Bianca 4
#4 Christoph Bianca 4
您可以使用 dist
,您可以使用 method
选择如何计算距离(euclidean
、maximum
、manhattan
、canberra
、binary
或 minkowski
).
x <- as.matrix(dist(data$Action)) * lower.tri(diag(data$Action))
matrix(data$"First Name"[which(x == max(x), TRUE)], ncol=2)
# [,1] [,2]
#[1,] "Bianca" "Roland"
#[2,] "Bianca" "Christoph
或同时多个列。
x <- as.matrix(dist(data[-1])) * lower.tri(diag(data$Action))
matrix(data$"First Name"[which(x == max(x), TRUE)], ncol=2)
#[1,] "Bianca" "Christoph"
我正在尝试查找值(“操作”列)差异最大的两个名字(“名字”列)。有谁知道这是怎么做到的吗?提前致谢!
data = structure(list(`First Name` = c("Till", "Roland", "Otmar", "Christoph",
"Bianca"), Action = c(2, 1, 2, 1, 5), Reflection = c(6, 7, 6,
7, 3), Flexibility_Thinking = c(2, 3, 3, 1, 6), Structure = c(6,
4, 4, 7, 2)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame"))
这是一种基本的 R 方法 -
#Get pairwise differences for all names
mat <- abs(outer(data$Action, data$Action, `-`))
#get the max difference
max_values <- apply(mat, 1, max)
#get the index where the max difference is present
max_index <- apply(mat, 1, which.max)
#Create a dataframe with first name, name of biggest difference person
#and the difference value
result <- cbind(data[1],
biggest_diff = data$`First Name`[max_index], diff = max_values)
result
# First Name biggest_diff diff
#1 Till Bianca 3
#2 Roland Bianca 4
#3 Otmar Bianca 3
#4 Christoph Bianca 4
#5 Bianca Roland 4
#get top 2 results
head(result[order(-result$diff), ], 2)
# First Name biggest_diff diff
#2 Roland Bianca 4
#4 Christoph Bianca 4
您可以使用 dist
,您可以使用 method
选择如何计算距离(euclidean
、maximum
、manhattan
、canberra
、binary
或 minkowski
).
x <- as.matrix(dist(data$Action)) * lower.tri(diag(data$Action))
matrix(data$"First Name"[which(x == max(x), TRUE)], ncol=2)
# [,1] [,2]
#[1,] "Bianca" "Roland"
#[2,] "Bianca" "Christoph
或同时多个列。
x <- as.matrix(dist(data[-1])) * lower.tri(diag(data$Action))
matrix(data$"First Name"[which(x == max(x), TRUE)], ncol=2)
#[1,] "Bianca" "Christoph"