按特定列的数据顺序创建新列 table 不起作用
creating a new column with the order of the data table by certain column does not work
我有以下 data.table
:
library (data.table)
dt <- structure(list(variable = c("var1", "var1",
"var2", "var2", "var3", "var3",
"var4", "var4", "var5", "var5"
), src = c("A", "B",
"A", "B",
"A", "B",
"A", "B",
"A", "B"
), effect = c(0.00479503189634099, 0.00520945223353099, 0.0414826536175954,
0.0534395645078372, 0.00321202935447758, 0.00426201973099257,
0.0207942299790097, 0.0268992334286609, 0.304833654398739, 0.291310303107859
)), .Names = c("variable", "src", "effect" ), class = c("data.table", "data.frame"), row.names = c(NA, -10L))
而且我想要src
列的效果排名。所以对于 src
来说,variable
var5
应该 rank
等于 1
(对于 src
来说,var5
最高 effect
)。所以我运行
dt[, rank:=order(effect, decreasing = T), by=c('src')]
但是我明白了
> dt
variable src effect rank
1: var1 A 0.0047950319 5
2: var1 B 0.0052094522 5
3: var2 A 0.0414826536 2
4: var2 B 0.0534395645 2
5: var3 A 0.0032120294 4
6: var3 B 0.0042620197 4
7: var4 A 0.0207942300 1
8: var4 B 0.0268992334 1
9: var5 A 0.3048336544 3
10: var5 B 0.2913103031 3
知道为什么会这样吗?
使用dplyr
:
dt %>%
group_by(src) %>%
mutate(rank = dense_rank(desc(effect)))
# A tibble: 10 x 4
# Groups: src [2]
variable src effect rank
<chr> <chr> <dbl> <int>
1 var1 A 0.00480 4
2 var1 B 0.00521 4
3 var2 A 0.0415 2
4 var2 B 0.0534 2
5 var3 A 0.00321 5
6 var3 B 0.00426 5
7 var4 A 0.0208 3
8 var4 B 0.0269 3
9 var5 A 0.305 1
10 var5 B 0.291 1
或 data.table
:
dt[, rank := rank(desc(effect)), by = list(src)]
variable src effect rank
1: var1 A 0.004795032 4
2: var1 B 0.005209452 4
3: var2 A 0.041482654 2
4: var2 B 0.053439565 2
5: var3 A 0.003212029 5
6: var3 B 0.004262020 5
7: var4 A 0.020794230 3
8: var4 B 0.026899233 3
9: var5 A 0.304833654 1
10: var5 B 0.291310303 1
另一个data.table
setorder(dt, src, -effect)[, rank := 1:.N, by = src][]
我有以下 data.table
:
library (data.table)
dt <- structure(list(variable = c("var1", "var1",
"var2", "var2", "var3", "var3",
"var4", "var4", "var5", "var5"
), src = c("A", "B",
"A", "B",
"A", "B",
"A", "B",
"A", "B"
), effect = c(0.00479503189634099, 0.00520945223353099, 0.0414826536175954,
0.0534395645078372, 0.00321202935447758, 0.00426201973099257,
0.0207942299790097, 0.0268992334286609, 0.304833654398739, 0.291310303107859
)), .Names = c("variable", "src", "effect" ), class = c("data.table", "data.frame"), row.names = c(NA, -10L))
而且我想要src
列的效果排名。所以对于 src
来说,variable
var5
应该 rank
等于 1
(对于 src
来说,var5
最高 effect
)。所以我运行
dt[, rank:=order(effect, decreasing = T), by=c('src')]
但是我明白了
> dt
variable src effect rank
1: var1 A 0.0047950319 5
2: var1 B 0.0052094522 5
3: var2 A 0.0414826536 2
4: var2 B 0.0534395645 2
5: var3 A 0.0032120294 4
6: var3 B 0.0042620197 4
7: var4 A 0.0207942300 1
8: var4 B 0.0268992334 1
9: var5 A 0.3048336544 3
10: var5 B 0.2913103031 3
知道为什么会这样吗?
使用dplyr
:
dt %>%
group_by(src) %>%
mutate(rank = dense_rank(desc(effect)))
# A tibble: 10 x 4
# Groups: src [2]
variable src effect rank
<chr> <chr> <dbl> <int>
1 var1 A 0.00480 4
2 var1 B 0.00521 4
3 var2 A 0.0415 2
4 var2 B 0.0534 2
5 var3 A 0.00321 5
6 var3 B 0.00426 5
7 var4 A 0.0208 3
8 var4 B 0.0269 3
9 var5 A 0.305 1
10 var5 B 0.291 1
或 data.table
:
dt[, rank := rank(desc(effect)), by = list(src)]
variable src effect rank
1: var1 A 0.004795032 4
2: var1 B 0.005209452 4
3: var2 A 0.041482654 2
4: var2 B 0.053439565 2
5: var3 A 0.003212029 5
6: var3 B 0.004262020 5
7: var4 A 0.020794230 3
8: var4 B 0.026899233 3
9: var5 A 0.304833654 1
10: var5 B 0.291310303 1
另一个data.table
setorder(dt, src, -effect)[, rank := 1:.N, by = src][]