在 sapply 中使用 rep 根据另一个向量拉伸一个向量
Using rep inside sapply to strech a vector according to another vector
我想生成 data.frame
个边。当许多边在一个节点上结束时,就会出现问题。边在向量 from
和 to
.
中定义
# Data
vertices <- data.frame(id = 1:3, label = c("a", "b", "c"), stringsAsFactors = FALSE)
to <- c("a", "b", "c")
from1 <- c("c", "a", "b")
from2 <- c("c", "a", "a,b,c")
我尝试了什么:
# Attempt 1
create_edges_1 <- function(from, to) {
to <- sapply(to, function(x){vertices$id[vertices$label == x]})
from <- sapply(from, function(x){vertices$id[vertices$label == x]})
data.frame(from = from, to = to, stringsAsFactors = FALSE)
}
这适用于例如 create_edges_1(from1, to)
,输出是:
from to
c 3 1
a 1 2
b 2 3
但是,例如 from2
这种尝试失败了。
所以我尝试了以下方法:
# Attempt 2
create_edges_2 <- function(from, to) {
to <- sapply(unlist(sapply(strsplit(to, ","), function(x){vertices$id[vertices$label == x]})), function(x){rep(x, sapply(strsplit(from2, ","), length))})
from <- unlist(sapply(strsplit(from2, ","), function(x){vertices$id[vertices$label == x]}))
data.frame(from = from, to = to, stringsAsFactors = FALSE)
}
我们的想法是 "stretch" to
对于每个有多个边结束的节点。但是 create_edges_2(from1, to)
和 create_edges_2(from2, to)
都抛出错误
Error in rep(x, sapply(strsplit(from2, ","), length)) :
invalid 'times' argument
我在 sapply
语句中做错了什么?
create_edges_2(from2, to)
的预期输出是:
from to
3 1
1 2
1 3
2 3
3 3
这里有一个方法:
# Attempt 3
library(dplyr)
to <- sapply(to, function(x){vertices$id[vertices$label == x]})
from0 <- sapply(from2, function(x) strsplit(x, ",")) %>% unlist() %>% as.character()
lengths0 <- lapply(sapply(from2, function(x) strsplit(x, ",")), length) %>% unlist()
to0 <- c()
for( i in 1:length(lengths0)) to0 <- c(to0, rep(to[i], lengths0[i]))
from <- sapply(from0, function(x){vertices$id[vertices$label == x]})
edges <- data.frame(from = from, to = to0, stringsAsFactors = FALSE)
edges
按要求给出此结果:
from to
1 3 1
2 1 2
3 1 3
4 2 3
5 3 3
想法是用逗号分隔符拆分from
,并存储每个元素的大小以便"stretch"每个节点。这里完成了一个 for
循环
您可以为此使用联接或 match
f2 <- strsplit(from2, ',')
df <- data.frame(from = unlist(f2)
, to = rep(to, lengths(f2))
, stringsAsFactors = FALSE)
和match
library(tidyverse)
map_dfc(df, ~ with(vertices, id[match(.x, label)]))
# # A tibble: 5 x 2
# from to
# <int> <int>
# 1 3 1
# 2 1 2
# 3 1 3
# 4 2 3
# 5 3 3
有联接
library(dplyr)
df %>%
inner_join(vertices, by = c(from = 'label')) %>%
inner_join(vertices, by = c(to = 'label')) %>%
select_at(vars(matches('.x|.y')))
# id.x id.y
# 1 3 1
# 2 1 2
# 3 1 3
# 4 2 3
# 5 3 3
我想生成 data.frame
个边。当许多边在一个节点上结束时,就会出现问题。边在向量 from
和 to
.
# Data
vertices <- data.frame(id = 1:3, label = c("a", "b", "c"), stringsAsFactors = FALSE)
to <- c("a", "b", "c")
from1 <- c("c", "a", "b")
from2 <- c("c", "a", "a,b,c")
我尝试了什么:
# Attempt 1
create_edges_1 <- function(from, to) {
to <- sapply(to, function(x){vertices$id[vertices$label == x]})
from <- sapply(from, function(x){vertices$id[vertices$label == x]})
data.frame(from = from, to = to, stringsAsFactors = FALSE)
}
这适用于例如 create_edges_1(from1, to)
,输出是:
from to
c 3 1
a 1 2
b 2 3
但是,例如 from2
这种尝试失败了。
所以我尝试了以下方法:
# Attempt 2
create_edges_2 <- function(from, to) {
to <- sapply(unlist(sapply(strsplit(to, ","), function(x){vertices$id[vertices$label == x]})), function(x){rep(x, sapply(strsplit(from2, ","), length))})
from <- unlist(sapply(strsplit(from2, ","), function(x){vertices$id[vertices$label == x]}))
data.frame(from = from, to = to, stringsAsFactors = FALSE)
}
我们的想法是 "stretch" to
对于每个有多个边结束的节点。但是 create_edges_2(from1, to)
和 create_edges_2(from2, to)
都抛出错误
Error in rep(x, sapply(strsplit(from2, ","), length)) : invalid 'times' argument
我在 sapply
语句中做错了什么?
create_edges_2(from2, to)
的预期输出是:
from to
3 1
1 2
1 3
2 3
3 3
这里有一个方法:
# Attempt 3
library(dplyr)
to <- sapply(to, function(x){vertices$id[vertices$label == x]})
from0 <- sapply(from2, function(x) strsplit(x, ",")) %>% unlist() %>% as.character()
lengths0 <- lapply(sapply(from2, function(x) strsplit(x, ",")), length) %>% unlist()
to0 <- c()
for( i in 1:length(lengths0)) to0 <- c(to0, rep(to[i], lengths0[i]))
from <- sapply(from0, function(x){vertices$id[vertices$label == x]})
edges <- data.frame(from = from, to = to0, stringsAsFactors = FALSE)
edges
按要求给出此结果:
from to
1 3 1
2 1 2
3 1 3
4 2 3
5 3 3
想法是用逗号分隔符拆分from
,并存储每个元素的大小以便"stretch"每个节点。这里完成了一个 for
循环
您可以为此使用联接或 match
f2 <- strsplit(from2, ',')
df <- data.frame(from = unlist(f2)
, to = rep(to, lengths(f2))
, stringsAsFactors = FALSE)
和match
library(tidyverse)
map_dfc(df, ~ with(vertices, id[match(.x, label)]))
# # A tibble: 5 x 2
# from to
# <int> <int>
# 1 3 1
# 2 1 2
# 3 1 3
# 4 2 3
# 5 3 3
有联接
library(dplyr)
df %>%
inner_join(vertices, by = c(from = 'label')) %>%
inner_join(vertices, by = c(to = 'label')) %>%
select_at(vars(matches('.x|.y')))
# id.x id.y
# 1 3 1
# 2 1 2
# 3 1 3
# 4 2 3
# 5 3 3