二级邻居按行重复到缺失

Rowwise duplicate to missing for second degree neighbors

我可能只是没有找到正确的搜索词,但如果此条目之前出现在同一行中,我想删除条目(设置为 NA)。

从 df 开始,我想到达 df2。

df <- data.frame(t(data.frame(c("Ashanti","Brong Ahafo","Central","Eastern","Western",NA,
                 "Ashanti","Eastern","Northern","Volta","Western"),
                 c("Brong Ahafo","Ashanti","Eastern","Northern","Volta",
                   "Western","Brong Ahafo","Central","Eastern","Western",NA))))
rownames(df) <- NULL
names(df) <- c("id","nbr_1","nbr_2","nbr_3","nbr_4","nbr_5","scdnbr_1",
               "scdnbr_2","scdnbr_3","scdnbr_4","scdnbr_5")

df2 <- data.frame(t(data.frame(c("Ashanti","Brong Ahafo","Central","Eastern","Western",NA,
                  NA,NA,"Northern","Volta",NA),
                  c("Brong Ahafo","Ashanti","Eastern","Northern","Volta","Western",NA,
                    "Central",NA,NA,NA))))
rownames(df2) <- NULL
names(df2) <- c("id","nbr_1","nbr_2","nbr_3","nbr_4","nbr_5","scdnbr_1",
                "scdnbr_2","scdnbr_3","scdnbr_4","scdnbr_5")

可能没有必要,但应用的上下文是使用 poly2nb 命令获取加纳境内的二阶相邻区域

pacman::p_load("spdep","sp","expp","raster","dplyr","tidyr")
ghana <- getData('GADM', country='GHA', level=1)


# first degree neighbors
nb <- poly2nb(ghana, row.names=ghana$NAME_1)
nb <- neighborsDataFrame(nb)

nb <- nb%>% group_by(id) %>% mutate(nbr = sequence(n())) %>% 
  spread(key = nbr, value = id_neigh, sep="_")

# second degree neighbors

nb2_2 <- nb2 %>% 
  rename(scdnbr_1=nbr_1,
         scdnbr_2=nbr_2,
         scdnbr_3=nbr_3,
         scdnbr_4=nbr_4,
         scdnbr_5=nbr_5)

nb3 <- nb2 %>% 
  left_join(nb2_2, by=c("nbr_1"="id"))

然后我会继续为剩下的四个一级邻居加入二级邻居。 但在那一步之前,我想实现我上面描述的(如 df 到 df2)。

谢谢大家!

is.na(df)<-duplicated(as.list(df))
df
       id       nbr_1   nbr_2   nbr_3   nbr_4 nbr_5 scdnbr_1 scdnbr_2 scdnbr_3 scdnbr_4
1 Ashanti Brong Ahafo Central Eastern Western    NA       NA       NA Northern    Volta
  scdnbr_5
1       NA

要获得所需的输出,我们可以这样做:

df1 <- t(apply(df, 1, function(x) replace(x, duplicated(x), NA)))

x <- df1 %>% 
  as_tibble() %>% 
  pivot_longer(
    everything()
  ) %>%
  group_by(value) %>% 
  mutate(id = row_number()-1,
         value = paste0("X.",value,"."),
         value = ifelse(value == "X.NA." & id > 0, paste0(NA, "..", id), value),
         value = ifelse(value == "X.NA.", NA, value)) %>% 
  select(-id) %>% 
  mutate(value = str_replace(value, " ", ".")) %>% 
  pivot_wider(
    names_from = name,
    values_from = value
  )

colnames(df1) <- x

df1
     X.Ashanti. X.Brong.Ahafo. X.Central. X.Eastern. X.Western. <NA> NA..1 NA..2 X.Northern. X.Volta. NA..3
[1,] "Ashanti"  "Brong Ahafo"  "Central"  "Eastern"  "Western"  NA   NA    NA    "Northern"  "Volta"  NA