根据属性组件匹配列表元素
Match list elements based on attribute component
我有一个数据集,我将其分成两个列表 int1
和 int2
。
library(lubridate)
library(tidyverse)
library(purrr)
date <- rep_len(seq(dmy("01-01-2011"), dmy("01-01-2013"), by = "days"), 300)
ID <- rep(c("A","B", "C"), 300)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000),
ID)
df$month <- month(df$date)
df$year <- year(df$date)
# Create first list
int1 <- df %>%
mutate(new = floor_date(date, '10 day')) %>%
mutate(new = if_else(day(new) == 31, new - days(10), new)) %>%
group_by(ID, new) %>%
filter(month == "1") %>%
group_split()
# Create second list
int2 <- df %>%
mutate(new = floor_date(date, '10 day')) %>%
mutate(new = if_else(day(new) == 31, new - days(10), new)) %>%
group_by(ID, new) %>%
filter(month == "2") %>%
group_split()
names(int1) <- sapply(int1, function(x) paste(x$ID[1],
x$year[1], sep = '_'))
names(int2) <- sapply(int2, function(x) paste(x$ID[1],
x$year[1], sep = '_'))
然后我为每个列表分配一个属性 (match
)。我创建了一个函数 check
来更轻松地获取此属性。我为这个例子从一个列表中删除了一些元素。
int1 <- int1[-c(3,6)]
# Convenience function to grab the attributes for you
check <- function(x) {
return(attr(x, "match"))
}
# Add an attribute to hold the attributes of each list element
attr(int1, "match") <- data.frame(id = sapply(int1, function(x) paste(x$ID[1])),
interval_start_date = sapply(int1, function(x) paste(x$new[1]))
)
# Check the attributes
check(int1)
# Add an attribute "tab" to hold the attributes of each list element
attr(int2, "match") <- data.frame(id = sapply(int2, function(x) paste(x$ID[1])),
interval_start_date = sapply(int2, function(x) paste(x$new[1]))
)
# Check the attributes
check(int2)
我想根据我添加的属性删除不在另一个元素中的元素。具体来说,我想删除任何不具有相同 interval_start_date
和 ID
关联的内容。对于 interval_start_date
,只有年份和日期必须匹配,因为两个列表之间的月份很可能不同。在这种情况下,我希望 int2
匹配 int1
。关于如何做到这一点的任何想法?如果可能,首选基本 r 方法。
# Expected results
expected_int2 <- list(int2[[1]], int2[[2]], int2[[3]], int2[[4]], int2[[5]],
int2[[6]], int2[[7]])
names(expected_int2) <- sapply(int1, function(x) paste(x$ID[1],
x$year[1], sep = "_"))
我们可以在 paste
ing 'id' 和 format
ted 'interval_start_date' 之后创建一个 %in%
的索引,即删除 'month' 部分
i1 <- with(check(int2), paste(id, format(as.Date(interval_start_date),
"%Y-%d"))) %in% with(check(int1), paste(id,
format(as.Date(interval_start_date), "%Y-%d")))
> which(i1)
[1] 1 2 4 5 7 8 9
out <- int2[i1]
我有一个数据集,我将其分成两个列表 int1
和 int2
。
library(lubridate)
library(tidyverse)
library(purrr)
date <- rep_len(seq(dmy("01-01-2011"), dmy("01-01-2013"), by = "days"), 300)
ID <- rep(c("A","B", "C"), 300)
df <- data.frame(date = date,
x = runif(length(date), min = 60000, max = 80000),
y = runif(length(date), min = 800000, max = 900000),
ID)
df$month <- month(df$date)
df$year <- year(df$date)
# Create first list
int1 <- df %>%
mutate(new = floor_date(date, '10 day')) %>%
mutate(new = if_else(day(new) == 31, new - days(10), new)) %>%
group_by(ID, new) %>%
filter(month == "1") %>%
group_split()
# Create second list
int2 <- df %>%
mutate(new = floor_date(date, '10 day')) %>%
mutate(new = if_else(day(new) == 31, new - days(10), new)) %>%
group_by(ID, new) %>%
filter(month == "2") %>%
group_split()
names(int1) <- sapply(int1, function(x) paste(x$ID[1],
x$year[1], sep = '_'))
names(int2) <- sapply(int2, function(x) paste(x$ID[1],
x$year[1], sep = '_'))
然后我为每个列表分配一个属性 (match
)。我创建了一个函数 check
来更轻松地获取此属性。我为这个例子从一个列表中删除了一些元素。
int1 <- int1[-c(3,6)]
# Convenience function to grab the attributes for you
check <- function(x) {
return(attr(x, "match"))
}
# Add an attribute to hold the attributes of each list element
attr(int1, "match") <- data.frame(id = sapply(int1, function(x) paste(x$ID[1])),
interval_start_date = sapply(int1, function(x) paste(x$new[1]))
)
# Check the attributes
check(int1)
# Add an attribute "tab" to hold the attributes of each list element
attr(int2, "match") <- data.frame(id = sapply(int2, function(x) paste(x$ID[1])),
interval_start_date = sapply(int2, function(x) paste(x$new[1]))
)
# Check the attributes
check(int2)
我想根据我添加的属性删除不在另一个元素中的元素。具体来说,我想删除任何不具有相同 interval_start_date
和 ID
关联的内容。对于 interval_start_date
,只有年份和日期必须匹配,因为两个列表之间的月份很可能不同。在这种情况下,我希望 int2
匹配 int1
。关于如何做到这一点的任何想法?如果可能,首选基本 r 方法。
# Expected results
expected_int2 <- list(int2[[1]], int2[[2]], int2[[3]], int2[[4]], int2[[5]],
int2[[6]], int2[[7]])
names(expected_int2) <- sapply(int1, function(x) paste(x$ID[1],
x$year[1], sep = "_"))
我们可以在 paste
ing 'id' 和 format
ted 'interval_start_date' 之后创建一个 %in%
的索引,即删除 'month' 部分
i1 <- with(check(int2), paste(id, format(as.Date(interval_start_date),
"%Y-%d"))) %in% with(check(int1), paste(id,
format(as.Date(interval_start_date), "%Y-%d")))
> which(i1)
[1] 1 2 4 5 7 8 9
out <- int2[i1]