在数据框中查找与另一列 R 中的值差异最大的行

Find rows in dataframe with the biggest difference in values in another column, R

我正在尝试查找值(“操作”列)差异最大的两个名字(“名字”列)。有谁知道这是怎么做到的吗?提前致谢!

data = structure(list(`First Name` = c("Till", "Roland", "Otmar", "Christoph", 
"Bianca"), Action = c(2, 1, 2, 1, 5), Reflection = c(6, 7, 6, 
7, 3), Flexibility_Thinking = c(2, 3, 3, 1, 6), Structure = c(6, 
4, 4, 7, 2)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", 
"data.frame"))

这是一种基本的 R 方法 -

#Get pairwise differences for all names
mat <- abs(outer(data$Action, data$Action, `-`))
#get the max difference
max_values <- apply(mat, 1, max)
#get the index where the max difference is present
max_index <- apply(mat, 1, which.max)
#Create a dataframe with first name, name of biggest difference person 
#and the difference value
result <- cbind(data[1], 
                biggest_diff = data$`First Name`[max_index], diff = max_values)

result

#  First Name biggest_diff diff
#1       Till       Bianca    3
#2     Roland       Bianca    4
#3      Otmar       Bianca    3
#4  Christoph       Bianca    4
#5     Bianca       Roland    4

#get top 2 results
head(result[order(-result$diff), ], 2)

#  First Name biggest_diff diff
#2     Roland       Bianca    4
#4  Christoph       Bianca    4

您可以使用 dist,您可以使用 method 选择如何计算距离(euclideanmaximummanhattancanberrabinaryminkowski).

x <- as.matrix(dist(data$Action)) * lower.tri(diag(data$Action))
matrix(data$"First Name"[which(x == max(x), TRUE)], ncol=2)
#     [,1]     [,2]       
#[1,] "Bianca" "Roland"   
#[2,] "Bianca" "Christoph

或同时多个列

x <- as.matrix(dist(data[-1])) * lower.tri(diag(data$Action))
matrix(data$"First Name"[which(x == max(x), TRUE)], ncol=2)
#[1,] "Bianca" "Christoph"