在 R 中对多列使用聚集函数时出错 "Attribute not identical"

Error "Attribute not identical" in using gather function for the multi column in R

我正在使用以下数据框。

df2<-final.data%>% gather(Hospital,Attendance,contains("Attendance"))
df2 %>% spread(Hospital, Attendance)
    > dput(final.data[0:2,])
    structure(list(RoyalPerth.Attendance = c(235, 209), RoyalPerth.Admissions = c(99, 
97), RoyalPerth.Tri1 = c("8", "N/A"), RoyalPerth.Tri2 = c(33, 
41), RoyalPerth.Tri3 = c(89, 73), RoyalPerth.Tri4 = c(85, 80), 
    RoyalPert

h.Tri5 = c("20", "14"), Fremantle.Attendance = c(155, 
    145), Fremantle.Admissions = c(70, 56), Fremantle.Tri1 = c("N/A", 
    "N/A"), Fremantle.Tri2 = c(25, 22), Fremantle.Tri3 = c(67, 
    51), Fremantle.Tri4 = c(54, 47), Fremantle.Tri5 = c(9, 24
    ), PrincessMargaret.Attendance = c(252, 219), PrincessMargaret.Admissions = c(59, 
    47), PrincessMargaret.Tri1 = c("N/A", "N/A"), PrincessMargaret.Tri2 = c("13", 
    "14"), PrincessMargaret.Tri3 = c(75, 61), PrincessMargaret.Tri4 = c(159, 
    139), PrincessMargaret.Tri5 = c("4", "4"), KingEdward.Attendance = c(52, 
    43), KingEdward.Admissions = c("6", "7"), KingEdward.Tri1 = c("N/A", 
    "N/A"), KingEdward.Tri2 = c("N/A", "N/A"), KingEdward.Tri3 = c("7", 
    "N/A"), KingEdward.Tri4 = c(20, 25), KingEdward.Tri5 = c("25", 
    "17"), SirCharles.Attendance = c(209, 184), SirCharles.Admissions = c(109, 
    112), SirCharles.Tri1 = c("N/A", "N/A"), SirCharles.Tri2 = c(42, 
    43), SirCharles.Tri3 = c(108, 73), SirCharles.Tri4 = c(47, 
    61), SirCharles.Tri5 = c("11", "5"), Armadale.Attendance = c(166, 
    175), Armadale.Admissions = c(19, 25), Armadale.Tri1 = c("N/A", 
    "N/A"), Armadale.Tri2 = c(16, 26), Armadale.Tri3 = c(62, 
    73), Armadale.Tri4 = c(79, 55), Armadale.Tri5 = c("9", "19"
    ), Swan.Attendance = c(133, 129), Swan.Admissions = c(17, 
    25), Swan.Tri1 = c("N/A", "N/A"), Swan.Tri2 = c(29, 25), 
    Swan.Tri3 = c(59, 57), Swan.Tri4 = c(42, 43), Swan.Tri5 = c("N/A", 
    "4"), Rockingham.Attendance = c(155, 145), Rockingham.Admissions = c("10", 
    "24"), Rockingham.Tri1 = c("N/A", "N/A"), Rockingham.Tri2 = c(12, 
    26), Rockingham.Tri3 = c(51, 45), Rockingham.Tri4 = c(81, 
    65), Rockingham.Tri5 = c("11", "8"), Joondalup.Attendance = c(267, 
    241), Joondalup.Admissions = c(73, 81), Joondalup.Tri1 = c("N/A", 
    "N/A"), Joondalup.Tri2 = c(27, 23), Joondalup.Tri3 = c(75, 
    78), Joondalup.Tri4 = c(151, 133), Joondalup.Tri5 = c("12", 
    "7")), row.names = 1:2, class = "data.frame")

错误:

警告信息: 测量变量之间的属性不相同; 他们将被删除

我试过以下方法: hospital.dataset<-gather(hospital,triage,sum,Tri1:Tri5) 收集分类 在数据集上使用 cbind 之后。

我想使用 gather 将其转换为长数据集。

 dput(hospital.dataset[1:2,])
structure(list(Date = structure(c(-714598, -714597), class = "Date"), [enter image description here][1]
    Attendance = c(235, 209), Admissions = c(99, 97), Hospital = structure(c(1L, 
    1L), .Label = c("RoyalPerth Hospital", "Fremantle Hospital", 
    "Princess Margaret Hospital", "KingEdward Hospital", "SirCharles Hospital", 
    "Armadale Hospital", "Swan Hospital", "Rockingham Hospital", 
    "Joondalup Hospital"), class = "factor"), triage = c("Tri1", 
    "Tri1"), sum = c(8, 0)), row.names = 1:2, class = "data.frame")

像这样。

提前致谢。

Expected Dataframe

注:这个方案感觉很费力。所以请考虑可能有更优雅的方法可用。

此数据的一个问题是您想要“宽”值(AttendanceAdmissions)与您想要“长”值(Tri1Tri1Tri2,等等)。

这个解决方案在整个数据框上使用pivot_longer(注意:pivot_longer是新的gather语法),然后separate拉出医院名称来自特定数据字段。

然后它分成两个数据帧,将pivot_wider应用于Attendance/Admissions列,然后重新加入。

library(tidyverse)

final_data_long <- final.data.raw %>% 
  mutate_all(as.character) %>%
  mutate(row_n = row_number()) %>%
  pivot_longer(-row_n, names_to = "field", values_to = "value") %>%
  separate(field, into = c("hospital", "category")) 

attend_admit <- final_data_long %>%
  filter(str_detect(category, "Attendance|Admissions"))

triage <- final_data_long %>% anti_join(attend_admit)

attend_admit_long <- 
  attend_admit %>%
  group_by(row_n) %>%
  pivot_wider(id_cols = c(row_n, hospital), names_from = category, 
              values_from = value)

triage %>% 
  inner_join(attend_admit_long, by = c("row_n", "hospital")) %>%
  arrange(hospital) %>%
  select(-row_n)

输出

# A tibble: 90 x 5
   hospital category value Attendance Admissions
   <chr>    <chr>    <chr> <chr>      <chr>     
 1 Armadale Tri1     N/A   166        19        
 2 Armadale Tri2     16    166        19        
 3 Armadale Tri3     62    166        19        
 4 Armadale Tri4     79    166        19        
 5 Armadale Tri5     9     166        19        
 6 Armadale Tri1     N/A   175        25        
 7 Armadale Tri2     26    175        25        
 8 Armadale Tri3     73    175        25        
 9 Armadale Tri4     55    175        25        
10 Armadale Tri5     19    175        25        
# … with 80 more rows

数据
*我无法让 OP 的 dput 工作,这里的版本可以是 copy/pasted:

final.data.raw <- structure(
  list(RoyalPerth.Attendance = c(235, 209), RoyalPerth.Admissions = c(99, 97), RoyalPerth.Tri1 = c("8", "N/A"), 
       RoyalPerth.Tri2 = c(33, 41), RoyalPerth.Tri3 = c(89, 73), RoyalPerth.Tri4 = c(85, 80), 
       RoyalPerth.Tri5 = c("20", "14"), Fremantle.Attendance = c(155, 145), Fremantle.Admissions = c(70, 56), 
       Fremantle.Tri1 = c("N/A", "N/A"), Fremantle.Tri2 = c(25, 22), Fremantle.Tri3 = c(67, 51), 
       Fremantle.Tri4 = c(54, 47), Fremantle.Tri5 = c(9, 24), PrincessMargaret.Attendance = c(252, 219), 
       PrincessMargaret.Admissions = c(59,47), PrincessMargaret.Tri1 = c("N/A", "N/A"), PrincessMargaret.Tri2 = c("13", "14"), 
       PrincessMargaret.Tri3 = c(75, 61), PrincessMargaret.Tri4 = c(159, 139), PrincessMargaret.Tri5 = c("4", "4"), 
       KingEdward.Attendance = c(52, 43), KingEdward.Admissions = c("6", "7"), KingEdward.Tri1 = c("N/A", "N/A"), 
       KingEdward.Tri2 = c("N/A", "N/A"), KingEdward.Tri3 = c("7", "N/A"), KingEdward.Tri4 = c(20, 25), 
       KingEdward.Tri5 = c("25", "17"), SirCharles.Attendance = c(209, 184), SirCharles.Admissions = c(109, 112), 
       SirCharles.Tri1 = c("N/A", "N/A"), SirCharles.Tri2 = c(42, 43), SirCharles.Tri3 = c(108, 73), 
       SirCharles.Tri4 = c(47, 61), SirCharles.Tri5 = c("11", "5"), Armadale.Attendance = c(166, 175), 
       Armadale.Admissions = c(19, 25), Armadale.Tri1 = c("N/A", "N/A"), Armadale.Tri2 = c(16, 26), 
       Armadale.Tri3 = c(62, 73), Armadale.Tri4 = c(79, 55), Armadale.Tri5 = c("9", "19"), 
       Swan.Attendance = c(133, 129), Swan.Admissions = c(17, 25), Swan.Tri1 = c("N/A", "N/A"), 
       Swan.Tri2 = c(29, 25), Swan.Tri3 = c(59, 57), Swan.Tri4 = c(42, 43), 
       Swan.Tri5 = c("N/A", "4"), Rockingham.Attendance = c(155, 145), Rockingham.Admissions = c("10", "24"), 
       Rockingham.Tri1 = c("N/A", "N/A"), Rockingham.Tri2 = c(12, 26), Rockingham.Tri3 = c(51, 45), 
       Rockingham.Tri4 = c(81, 65), Rockingham.Tri5 = c("11", "8"), Joondalup.Attendance = c(267, 241), 
       Joondalup.Admissions = c(73, 81), Joondalup.Tri1 = c("N/A", "N/A"), Joondalup.Tri2 = c(27, 23), 
       Joondalup.Tri3 = c(75, 78), Joondalup.Tri4 = c(151, 133), Joondalup.Tri5 = c("12", "7")), 
  row.names = 1:2, class = "data.frame")