如何按 r 中数据框中的行对数字数据进行排名?
How to rank numeric data by rows in a dataframe in r?
我有一个大约有 5000 列的数据框。这是数据框的片段
df= data.frame(a=c(13,17,19,7,9),
b=c(1,3,50,NA,3),
c=c(NA,NA,NA,NA,9))
我想对数据框单元格的值进行排名 w.r.t。行
预期输出
df= data.frame(a=c(1,1,2,1,1),
b=c(2,2,1,NA,2),
c=c(NA,NA,NA,NA,1))
我们可以使用 pmap
遍历每一行(与 rowwise
相比会更快)并应用 dense_rank
library(purrr)
library(dplyr)
df %>%
pmap_dfr(~ setNames(dense_rank(-c(...)), names(c(...))))
-输出
# A tibble: 5 x 3
a b c
<int> <int> <int>
1 1 2 NA
2 1 2 NA
3 2 1 NA
4 1 NA NA
5 1 2 1
或者更快的选择可能是使用 collapse
中的 dapply
library(collapse)
library(data.table)
dapply(df, MARGIN = 1, FUN = frank, ties.method = 'dense', na.last = "keep")
a b c
1 2 1 NA
2 2 1 NA
3 1 2 NA
4 1 NA NA
5 2 1 2
df <- data.frame(a=c(13,17,19,7,9), b=c(1,3,50,NA,3), c=c(NA,NA,NA,NA,9))
apply(X = -df, MARGIN = 1, FUN = rank, ties.method = "min", na.last = "keep")
#> [,1] [,2] [,3] [,4] [,5]
#> a 1 1 2 1 1
#> b 2 2 1 NA 3
#> c NA NA NA NA 1
转置
t(apply(X = -df, MARGIN = 1, FUN = rank, ties.method = "min", na.last = "keep"))
#> a b c
#> [1,] 1 2 NA
#> [2,] 1 2 NA
#> [3,] 2 1 NA
#> [4,] 1 NA NA
#> [5,] 1 3 1
注意并列的行为与您预期的不同,例如,第五行。
df= data.frame(a=c(13,17,19,7,9),
b=c(1,3,50,NA,3),
c=c(NA,NA,NA,NA,9))
library(tidyverse)
out <- df %>%
rowwise() %>%
transmute(res = list(dense_rank(-c_across(a:c)))) %>%
unnest_wider(res)
names(out) <- names(df)
out
#> # A tibble: 5 x 3
#> a b c
#> <int> <int> <int>
#> 1 1 2 NA
#> 2 1 2 NA
#> 3 2 1 NA
#> 4 1 NA NA
#> 5 1 2 1
由 reprex package (v2.0.1)
于 2021-09-20 创建
我有一个大约有 5000 列的数据框。这是数据框的片段
df= data.frame(a=c(13,17,19,7,9),
b=c(1,3,50,NA,3),
c=c(NA,NA,NA,NA,9))
我想对数据框单元格的值进行排名 w.r.t。行
预期输出
df= data.frame(a=c(1,1,2,1,1),
b=c(2,2,1,NA,2),
c=c(NA,NA,NA,NA,1))
我们可以使用 pmap
遍历每一行(与 rowwise
相比会更快)并应用 dense_rank
library(purrr)
library(dplyr)
df %>%
pmap_dfr(~ setNames(dense_rank(-c(...)), names(c(...))))
-输出
# A tibble: 5 x 3
a b c
<int> <int> <int>
1 1 2 NA
2 1 2 NA
3 2 1 NA
4 1 NA NA
5 1 2 1
或者更快的选择可能是使用 collapse
dapply
library(collapse)
library(data.table)
dapply(df, MARGIN = 1, FUN = frank, ties.method = 'dense', na.last = "keep")
a b c
1 2 1 NA
2 2 1 NA
3 1 2 NA
4 1 NA NA
5 2 1 2
df <- data.frame(a=c(13,17,19,7,9), b=c(1,3,50,NA,3), c=c(NA,NA,NA,NA,9))
apply(X = -df, MARGIN = 1, FUN = rank, ties.method = "min", na.last = "keep")
#> [,1] [,2] [,3] [,4] [,5]
#> a 1 1 2 1 1
#> b 2 2 1 NA 3
#> c NA NA NA NA 1
转置
t(apply(X = -df, MARGIN = 1, FUN = rank, ties.method = "min", na.last = "keep"))
#> a b c
#> [1,] 1 2 NA
#> [2,] 1 2 NA
#> [3,] 2 1 NA
#> [4,] 1 NA NA
#> [5,] 1 3 1
注意并列的行为与您预期的不同,例如,第五行。
df= data.frame(a=c(13,17,19,7,9),
b=c(1,3,50,NA,3),
c=c(NA,NA,NA,NA,9))
library(tidyverse)
out <- df %>%
rowwise() %>%
transmute(res = list(dense_rank(-c_across(a:c)))) %>%
unnest_wider(res)
names(out) <- names(df)
out
#> # A tibble: 5 x 3
#> a b c
#> <int> <int> <int>
#> 1 1 2 NA
#> 2 1 2 NA
#> 3 2 1 NA
#> 4 1 NA NA
#> 5 1 2 1
由 reprex package (v2.0.1)
于 2021-09-20 创建