R - 根据其他列查找三个最高值
R - Find three top values based on other column
我有一个数据框(但也可能是数据 table),其中植被类型(第一列 "type")在多个站点(其余列 "a" 到 "l").这是它的样子:
dat <- structure(list(type = structure(1:17, class = "factor", .Label = c("not_vegetated",
"needleleaf_evergreen_temperate_tree", "needleleaf_evergreen_boreal_tree",
"needleleaf_deciduous_boreal_tree", "broadleaf_evergreen_tropical_tree",
"broadleaf_evergreen_temperate_tree", "broadleaf_deciduous_tropical_tree",
"broadleaf_deciduous_temperate_tree", "broadleaf_deciduous_boreal_tree",
"broadleaf_evergreen_shrub", "broadleaf_deciduous_temperate_shrub",
"broadleaf_deciduous_boreal_shrub", "c3_arctic_grass", "c3_non.arctic_grass",
"c4_grass", "c3_crop", "c3_irrigated")), a = c("0", "1.55", "0",
"0", "0", "0", "0", "11.59", "0", "0", "0", "0", "0", "31.5",
"0.26", "52.29", "0"), b = c("0", "8.27", "0.02", "0", "0", "0",
"0", "35.5", "0.05", "0", "0.04", "0", "0", "26.02", "0", "15.7",
"0"), c = c("1.42", "7.55", "0", "0", "0", "0", "0", "14.24",
"2.38", "0", "0.06", "0", "0", "31.79", "0", "36.56", "0"), d = c("0",
"13.87", "3.97", "0", "0", "0", "0", "51.66", "7.68", "0", "0.07",
"0", "0", "8.18", "0", "10.23", "0"), e = c("0", "16.23", "0.24",
"0", "0", "0", "0", "67.15", "2.12", "0", "0", "0", "0", "6.52",
"0.1", "4.68", "0"), f = c("0.26", "6.29", "60.98", "0", "0",
"0", "0", "3.72", "10.49", "0", "0", "0.3", "2.45", "3.61", "0",
"3.9", "0"), g = c("0.11", "38.16", "10.9", "0", "0", "0", "0",
"31.72", "13.53", "0", "0", "0", "0", "0.68", "0", "0.9", "0"
), h = c("0", "10.42", "0.42", "0", "0", "0", "0", "55.44", "1.49",
"0", "0", "0", "0", "16.54", "0", "13.33", "0"), i = c("0", "1.39",
"0", "0", "0", "0", "0", "11.56", "0", "0", "0", "0", "0", "33.94",
"0", "49.26", "0"), j = c("0.45", "16.48", "0", "0", "0", "0",
"0", "40.02", "0", "0", "0.02", "0", "0", "17.53", "0", "10.51",
"0"), k = c("6.02", "2.78", "0", "0", "0", "0", "0", "1.27",
"2.51", "0", "0.03", "0", "0", "18.29", "0", "69.1", "0"), l = c("6.13",
"22.77", "2.72", "0", "0", "0", "0", "22.69", "3.85", "0", "0.06",
"0", "0", "1.25", "0", "1.53", "0")), .Names = c("type",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"), row.names = c(NA,
-17L), class = "data.frame")
我需要做的是计算每个站点的三个更丰富的植被类型分数,并将其汇总到类似于此的数据框(或数据 table)中(仅显示前两个站点)为了简洁起见):
a b
c3_crops (52.29) broadleaf_deciduous_temperate_tree (35.5)
c3_non.arctic_grass (31.5) c3_non.arctic_grass (26.02)
broadleaf_deciduous_temperate_tree (11.59) c3_crop (15.7)
关于如何做到这一点的任何提示?
这是适合您的一种方法。如果我没看错你的问题,你想选择三个数字最高的三个网站。您首先要将数据格式化为长格式,将字符转换为数字,然后按 site
和 value
对数据进行排序。然后,您按 site
定义组,并为每个组取前三行。
library(dplyr)
library(tidyr)
gather(dat, key = site, value = value, -type) %>%
mutate(value = as.numeric(value)) %>%
arrange(site, desc(value)) %>%
group_by(site) %>%
top_n(3)
type site value
<fct> <chr> <dbl>
1 c3_crop a 52.3
2 c3_non.arctic_grass a 31.5
3 broadleaf_deciduous_temperate_tree a 11.6
4 broadleaf_deciduous_temperate_tree b 35.5
5 c3_non.arctic_grass b 26.0
6 c3_crop b 15.7
7 c3_crop c 36.6
8 c3_non.arctic_grass c 31.8
9 broadleaf_deciduous_temperate_tree c 14.2
10 broadleaf_deciduous_temperate_tree d 51.7
# ... with 26 more rows
考虑将数据从宽改成长,并将 by
与 order
和 head(...,3)
全部与基数 R 一起使用。在 by
returns 下方命名数据框列表。
# RESHAPE DATA
rdat <- reshape(dat, timevar = "site", times = letters[1:12], varying = list(2:13),
v.names = "value", new.row.names = 1:1000, direction = "long")
rdat$value <- as.numeric(rdat$value)
# ORDER DATA
rdat <- with(rdat, rdat[order(site, -value),])
# SUBSET BY SITE, RETURN TOP 3 ROWS
by(rdat, rdat$site, FUN=function(i) head(i, 3))
# rdat$site: a
# type site value id
# 16 c3_crop a 52.29 16
# 14 c3_non.arctic_grass a 31.50 14
# 8 broadleaf_deciduous_temperate_tree a 11.59 8
# --------------------------------------------------------------------------------
# rdat$site: b
# type site value id
# 25 broadleaf_deciduous_temperate_tree b 35.50 8
# 31 c3_non.arctic_grass b 26.02 14
# 33 c3_crop b 15.70 16
# --------------------------------------------------------------------------------
# rdat$site: c
# type site value id
# 50 c3_crop c 36.56 16
# 48 c3_non.arctic_grass c 31.79 14
# 42 broadleaf_deciduous_temperate_tree c 14.24 8
# ...
我有一个数据框(但也可能是数据 table),其中植被类型(第一列 "type")在多个站点(其余列 "a" 到 "l").这是它的样子:
dat <- structure(list(type = structure(1:17, class = "factor", .Label = c("not_vegetated",
"needleleaf_evergreen_temperate_tree", "needleleaf_evergreen_boreal_tree",
"needleleaf_deciduous_boreal_tree", "broadleaf_evergreen_tropical_tree",
"broadleaf_evergreen_temperate_tree", "broadleaf_deciduous_tropical_tree",
"broadleaf_deciduous_temperate_tree", "broadleaf_deciduous_boreal_tree",
"broadleaf_evergreen_shrub", "broadleaf_deciduous_temperate_shrub",
"broadleaf_deciduous_boreal_shrub", "c3_arctic_grass", "c3_non.arctic_grass",
"c4_grass", "c3_crop", "c3_irrigated")), a = c("0", "1.55", "0",
"0", "0", "0", "0", "11.59", "0", "0", "0", "0", "0", "31.5",
"0.26", "52.29", "0"), b = c("0", "8.27", "0.02", "0", "0", "0",
"0", "35.5", "0.05", "0", "0.04", "0", "0", "26.02", "0", "15.7",
"0"), c = c("1.42", "7.55", "0", "0", "0", "0", "0", "14.24",
"2.38", "0", "0.06", "0", "0", "31.79", "0", "36.56", "0"), d = c("0",
"13.87", "3.97", "0", "0", "0", "0", "51.66", "7.68", "0", "0.07",
"0", "0", "8.18", "0", "10.23", "0"), e = c("0", "16.23", "0.24",
"0", "0", "0", "0", "67.15", "2.12", "0", "0", "0", "0", "6.52",
"0.1", "4.68", "0"), f = c("0.26", "6.29", "60.98", "0", "0",
"0", "0", "3.72", "10.49", "0", "0", "0.3", "2.45", "3.61", "0",
"3.9", "0"), g = c("0.11", "38.16", "10.9", "0", "0", "0", "0",
"31.72", "13.53", "0", "0", "0", "0", "0.68", "0", "0.9", "0"
), h = c("0", "10.42", "0.42", "0", "0", "0", "0", "55.44", "1.49",
"0", "0", "0", "0", "16.54", "0", "13.33", "0"), i = c("0", "1.39",
"0", "0", "0", "0", "0", "11.56", "0", "0", "0", "0", "0", "33.94",
"0", "49.26", "0"), j = c("0.45", "16.48", "0", "0", "0", "0",
"0", "40.02", "0", "0", "0.02", "0", "0", "17.53", "0", "10.51",
"0"), k = c("6.02", "2.78", "0", "0", "0", "0", "0", "1.27",
"2.51", "0", "0.03", "0", "0", "18.29", "0", "69.1", "0"), l = c("6.13",
"22.77", "2.72", "0", "0", "0", "0", "22.69", "3.85", "0", "0.06",
"0", "0", "1.25", "0", "1.53", "0")), .Names = c("type",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"), row.names = c(NA,
-17L), class = "data.frame")
我需要做的是计算每个站点的三个更丰富的植被类型分数,并将其汇总到类似于此的数据框(或数据 table)中(仅显示前两个站点)为了简洁起见):
a b
c3_crops (52.29) broadleaf_deciduous_temperate_tree (35.5)
c3_non.arctic_grass (31.5) c3_non.arctic_grass (26.02)
broadleaf_deciduous_temperate_tree (11.59) c3_crop (15.7)
关于如何做到这一点的任何提示?
这是适合您的一种方法。如果我没看错你的问题,你想选择三个数字最高的三个网站。您首先要将数据格式化为长格式,将字符转换为数字,然后按 site
和 value
对数据进行排序。然后,您按 site
定义组,并为每个组取前三行。
library(dplyr)
library(tidyr)
gather(dat, key = site, value = value, -type) %>%
mutate(value = as.numeric(value)) %>%
arrange(site, desc(value)) %>%
group_by(site) %>%
top_n(3)
type site value
<fct> <chr> <dbl>
1 c3_crop a 52.3
2 c3_non.arctic_grass a 31.5
3 broadleaf_deciduous_temperate_tree a 11.6
4 broadleaf_deciduous_temperate_tree b 35.5
5 c3_non.arctic_grass b 26.0
6 c3_crop b 15.7
7 c3_crop c 36.6
8 c3_non.arctic_grass c 31.8
9 broadleaf_deciduous_temperate_tree c 14.2
10 broadleaf_deciduous_temperate_tree d 51.7
# ... with 26 more rows
考虑将数据从宽改成长,并将 by
与 order
和 head(...,3)
全部与基数 R 一起使用。在 by
returns 下方命名数据框列表。
# RESHAPE DATA
rdat <- reshape(dat, timevar = "site", times = letters[1:12], varying = list(2:13),
v.names = "value", new.row.names = 1:1000, direction = "long")
rdat$value <- as.numeric(rdat$value)
# ORDER DATA
rdat <- with(rdat, rdat[order(site, -value),])
# SUBSET BY SITE, RETURN TOP 3 ROWS
by(rdat, rdat$site, FUN=function(i) head(i, 3))
# rdat$site: a
# type site value id
# 16 c3_crop a 52.29 16
# 14 c3_non.arctic_grass a 31.50 14
# 8 broadleaf_deciduous_temperate_tree a 11.59 8
# --------------------------------------------------------------------------------
# rdat$site: b
# type site value id
# 25 broadleaf_deciduous_temperate_tree b 35.50 8
# 31 c3_non.arctic_grass b 26.02 14
# 33 c3_crop b 15.70 16
# --------------------------------------------------------------------------------
# rdat$site: c
# type site value id
# 50 c3_crop c 36.56 16
# 48 c3_non.arctic_grass c 31.79 14
# 42 broadleaf_deciduous_temperate_tree c 14.24 8
# ...