R - 识别数据框列中的公共元素
R - Identify common elements in data frame columns
如何识别此数据框的所有列(不包括 NA
)共有的元素。我怎样才能做到这一点?我尝试了一些使用 intersect
和 unique
的方法但没有成功。
df <- structure(list(cloudiness = structure(1:47, .Label = c("ACCESS1-0",
"ACCESS1-3", "BNU-ESM", "CCSM4", "CESM1-BGC", "CESM1-CAM5", "CESM1-CAM5-1-FV2",
"CESM1-FASTCHEM", "CESM1-WACCM", "CMCC-CESM", "CMCC-CM", "CMCC-CMS",
"CNRM-CM5", "CNRM-CM5-2", "CSIRO-Mk3-6-0", "CanESM2", "FGOALS-g2",
"FIO-ESM", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M", "GISS-E2-H",
"GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", "HadCM3", "HadGEM2-AO",
"HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR",
"MIROC-ESM", "MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MPI-ESM-LR",
"MPI-ESM-MR", "MPI-ESM-P", "MRI-CGCM3", "MRI-ESM1", "NorESM1-M",
"NorESM1-ME", "bcc-csm1-1", "bcc-csm1-1-m", "concat", "inmcm4"
), class = "factor"), humidity = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), .Label = c("ACCESS1-0", "ACCESS1-3", "BNU-ESM", "CCSM4",
"CESM1-BGC", "CESM1-CAM5", "CESM1-FASTCHEM", "CESM1-WACCM", "CNRM-CM5",
"CSIRO-Mk3-6-0", "CanESM2", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M",
"GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", "HadCM3",
"HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-MR", "IPSL-CM5B-LR",
"MIROC-ESM", "MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MRI-CGCM3",
"MRI-ESM1", "NorESM1-M", "NorESM1-ME", "bcc-csm1-1", "bcc-csm1-1-m",
"inmcm4"), class = "factor"), precipitation = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
NA, NA, NA, NA, NA, NA), .Label = c("BNU-ESM", "CCSM4", "CESM1-BGC",
"CESM1-CAM5", "CESM1-FASTCHEM", "CESM1-WACCM", "CMCC-CESM", "CMCC-CMS",
"CNRM-CM5-2", "CanCM4", "CanESM2", "FGOALS-g2", "FIO-ESM", "GFDL-CM2p1",
"GFDL-CM3", "GFDL-ESM2M", "GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R",
"GISS-E2-R-CC", "HadCM3", "HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES",
"IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM",
"MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MPI-ESM-LR", "MPI-ESM-MR",
"MPI-ESM-P", "MRI-CGCM3", "MRI-ESM1", "NorESM1-M", "NorESM1-ME",
"bcc-csm1-1", "bcc-csm1-1-m", "inmcm4"), class = "factor"), temperature = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), .Label = character(0), class = "factor"), wind = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, NA, NA,
NA, NA, NA, NA, NA, NA), .Label = c("ACCESS1-0", "ACCESS1-3",
"BNU-ESM", "CMCC-CESM", "CMCC-CM", "CMCC-CMS", "CNRM-CM5", "CNRM-CM5-2",
"CSIRO-Mk3-6-0", "CanESM2", "GFDL-CM2p1", "GFDL-CM3", "GFDL-ESM2G",
"GFDL-ESM2M", "GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC",
"HadCM3", "HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-LR",
"IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM", "MIROC-ESM-CHEM",
"MIROC4h", "MIROC5", "MPI-ESM-LR", "MPI-ESM-MR", "MPI-ESM-P",
"MRI-CGCM3", "MRI-ESM1", "NorESM1-M", "NorESM1-ME", "bcc-csm1-1",
"bcc-csm1-1-m", "inmcm4"), class = "factor")), .Names = c("cloudiness",
"humidity", "precipitation", "temperature", "wind"), row.names = c(NA,
-47L), class = "data.frame")
您可以在删除全部为 NAs
(colSums[!is.na(df))!=0]
)
的列后尝试 Reduce
和 intersect
Reduce(intersect,df[colSums(!is.na(df))!=0])
#[1] "BNU-ESM" "CanESM2" "GFDL-CM3" "GFDL-ESM2M"
#[5] "GISS-E2-H" "GISS-E2-H-CC" "GISS-E2-R" "GISS-E2-R-CC"
#[9] "HadCM3" "HadGEM2-AO" "HadGEM2-CC" "HadGEM2-ES"
#[13] "IPSL-CM5A-MR" "IPSL-CM5B-LR" "MIROC-ESM" "MIROC-ESM-CHEM"
#[17] "MIROC4h" "MIROC5" "MRI-CGCM3" "MRI-ESM1"
#[21] "NorESM1-M" "NorESM1-ME" "bcc-csm1-1" "bcc-csm1-1-m"
#[25] "inmcm4"
如何识别此数据框的所有列(不包括 NA
)共有的元素。我怎样才能做到这一点?我尝试了一些使用 intersect
和 unique
的方法但没有成功。
df <- structure(list(cloudiness = structure(1:47, .Label = c("ACCESS1-0",
"ACCESS1-3", "BNU-ESM", "CCSM4", "CESM1-BGC", "CESM1-CAM5", "CESM1-CAM5-1-FV2",
"CESM1-FASTCHEM", "CESM1-WACCM", "CMCC-CESM", "CMCC-CM", "CMCC-CMS",
"CNRM-CM5", "CNRM-CM5-2", "CSIRO-Mk3-6-0", "CanESM2", "FGOALS-g2",
"FIO-ESM", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M", "GISS-E2-H",
"GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", "HadCM3", "HadGEM2-AO",
"HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR",
"MIROC-ESM", "MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MPI-ESM-LR",
"MPI-ESM-MR", "MPI-ESM-P", "MRI-CGCM3", "MRI-ESM1", "NorESM1-M",
"NorESM1-ME", "bcc-csm1-1", "bcc-csm1-1-m", "concat", "inmcm4"
), class = "factor"), humidity = structure(c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), .Label = c("ACCESS1-0", "ACCESS1-3", "BNU-ESM", "CCSM4",
"CESM1-BGC", "CESM1-CAM5", "CESM1-FASTCHEM", "CESM1-WACCM", "CNRM-CM5",
"CSIRO-Mk3-6-0", "CanESM2", "GFDL-CM3", "GFDL-ESM2G", "GFDL-ESM2M",
"GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC", "HadCM3",
"HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-MR", "IPSL-CM5B-LR",
"MIROC-ESM", "MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MRI-CGCM3",
"MRI-ESM1", "NorESM1-M", "NorESM1-ME", "bcc-csm1-1", "bcc-csm1-1-m",
"inmcm4"), class = "factor"), precipitation = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
NA, NA, NA, NA, NA, NA), .Label = c("BNU-ESM", "CCSM4", "CESM1-BGC",
"CESM1-CAM5", "CESM1-FASTCHEM", "CESM1-WACCM", "CMCC-CESM", "CMCC-CMS",
"CNRM-CM5-2", "CanCM4", "CanESM2", "FGOALS-g2", "FIO-ESM", "GFDL-CM2p1",
"GFDL-CM3", "GFDL-ESM2M", "GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R",
"GISS-E2-R-CC", "HadCM3", "HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES",
"IPSL-CM5A-LR", "IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM",
"MIROC-ESM-CHEM", "MIROC4h", "MIROC5", "MPI-ESM-LR", "MPI-ESM-MR",
"MPI-ESM-P", "MRI-CGCM3", "MRI-ESM1", "NorESM1-M", "NorESM1-ME",
"bcc-csm1-1", "bcc-csm1-1-m", "inmcm4"), class = "factor"), temperature = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), .Label = character(0), class = "factor"), wind = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, NA, NA,
NA, NA, NA, NA, NA, NA), .Label = c("ACCESS1-0", "ACCESS1-3",
"BNU-ESM", "CMCC-CESM", "CMCC-CM", "CMCC-CMS", "CNRM-CM5", "CNRM-CM5-2",
"CSIRO-Mk3-6-0", "CanESM2", "GFDL-CM2p1", "GFDL-CM3", "GFDL-ESM2G",
"GFDL-ESM2M", "GISS-E2-H", "GISS-E2-H-CC", "GISS-E2-R", "GISS-E2-R-CC",
"HadCM3", "HadGEM2-AO", "HadGEM2-CC", "HadGEM2-ES", "IPSL-CM5A-LR",
"IPSL-CM5A-MR", "IPSL-CM5B-LR", "MIROC-ESM", "MIROC-ESM-CHEM",
"MIROC4h", "MIROC5", "MPI-ESM-LR", "MPI-ESM-MR", "MPI-ESM-P",
"MRI-CGCM3", "MRI-ESM1", "NorESM1-M", "NorESM1-ME", "bcc-csm1-1",
"bcc-csm1-1-m", "inmcm4"), class = "factor")), .Names = c("cloudiness",
"humidity", "precipitation", "temperature", "wind"), row.names = c(NA,
-47L), class = "data.frame")
您可以在删除全部为 NAs
(colSums[!is.na(df))!=0]
)
Reduce
和 intersect
Reduce(intersect,df[colSums(!is.na(df))!=0])
#[1] "BNU-ESM" "CanESM2" "GFDL-CM3" "GFDL-ESM2M"
#[5] "GISS-E2-H" "GISS-E2-H-CC" "GISS-E2-R" "GISS-E2-R-CC"
#[9] "HadCM3" "HadGEM2-AO" "HadGEM2-CC" "HadGEM2-ES"
#[13] "IPSL-CM5A-MR" "IPSL-CM5B-LR" "MIROC-ESM" "MIROC-ESM-CHEM"
#[17] "MIROC4h" "MIROC5" "MRI-CGCM3" "MRI-ESM1"
#[21] "NorESM1-M" "NorESM1-ME" "bcc-csm1-1" "bcc-csm1-1-m"
#[25] "inmcm4"