将向量的最小值排列并匹配到下一个向量中的对应值
Arrange and match minimum value of vector to corresponding value in the next vector
我有一个dataframe
,它是根据每个id
(从高到低)的最小值排列的。我要实现的是每个id
的最小值与下一个id
中对应值的位置重合,依此类推。创建了一个新向量 Time
,即 1:nrow(df)
。
我已经手动完成了这个,它看起来像这样:
我想知道如何使这个过程自动化?
这是我的数据示例:
MA_vol <- c(0.2486667, 0.2463333, 0.2426667, 0.2423333, 0.2376667, 0.2323333, 0.2270000, 0.2246667, 0.2216667, 0.2203333, 0.2183333, 0.2126667, 0.2076667, 0.2060000)
R_id <- rep(15, length(MA_vol))
df1 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2073333, 0.2053333, 0.2013333, 0.1993333, 0.1973333, 0.1970000, 0.1966667, 0.1946667, 0.1920000, 0.1890000, 0.1883333, 0.1866667, 0.1843333, 0.1823333, 0.1810000)
R_id <- rep(13, length(MA_vol))
df2 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2016667, 0.1996667, 0.1980000, 0.1970000, 0.1963333, 0.1956667, 0.1930000, 0.1913333, 0.1900000, 0.1893333, 0.1890000, 0.1863333, 0.1853333, 0.1820000, 0.1800000, 0.1780000, 0.1763333)
R_id <- rep(4, length(MA_vol))
df3 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2180000, 0.2146667, 0.2126667, 0.2103333, 0.2070000, 0.2040000, 0.2010000, 0.1993333, 0.1956667, 0.1950000, 0.1926667, 0.1920000, 0.1896667, 0.1890000, 0.1856667, 0.1830000, 0.1786667, 0.1763333, 0.1733333, 0.1720000, 0.1700000, 0.1686667, 0.1670000)
R_id <- rep(8, length(MA_vol))
df4 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2096667, 0.2063333, 0.2030000, 0.1993333, 0.1953333, 0.1916667, 0.1880000, 0.1870000, 0.1850000, 0.1830000, 0.1783333, 0.1753333, 0.1726667, 0.1716667, 0.1673333, 0.1666667, 0.1656667)
R_id <- rep(2, length(MA_vol))
df5 <- data.frame(R_id, MA_vol)
df <- bind_rows(df1, df2, df3, df4, df5)
感谢您的帮助!
这可行,但可能有更好的方法。由于每个组的大小都不同,这使事情变得复杂。
library(tidyverse)
# first made a list of data frames
df1 <- df %>% arrange(MA_vol) %>%
group_by(R_id) %>%
group_nest()
# then created a graph for each data frame in the list
df2 <- map(1:nrow(df1),
~ggplot(df1$data[[.x]],
aes(x = 1:nrow(df1$data[[.x]]),
y = MA_vol)) +
geom_path() +
# label the x-axis the group name
xlab(paste0("Group ",
as.character(df1$R_id[[.x]]))))
# then stacked the graphs for comparison
cowplot::plot_grid(df2[[1]], df2[[2]], df2[[3]],
df2[[4]], df2[[5]], ncol = 1)
# Order based on each min value (high to low)
R_minvalues <- df %>%
group_by(R_id) %>% # group by recession id
slice(which.min(MA_vol)) %>% # extract min volume values for each recession
select(R_id, MA_vol)
x <- R_minvalues[with(R_minvalues, order(-MA_vol)), ] # order by MA-vol min value (high to low)
R_id_order <- as.numeric(x$R_id)
# Reorder dataframe based on R_minvalues (high to low)
MRC_DF <- df %>%
arrange(match(R_id, R_id_order)) %>% # match R_id rows with R_id_order
transform(t = 1:nrow(df)) %>% # create t (time) column the length of the df
select(t, R_id, MA_vol) # select columns
R_order_chr <- as.character(R_id_order) # convert R_id_order to character so can rearrange columns
MRC_DF_wide <- dcast(setDT(MRC_DF), t ~ R_id, value.var = "MA_vol") %>% # convert df to wide format
select(all_of(R_order_chr)) # rearrange column order
colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)] <-
paste("R", colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)], sep = "") # add "R_" to start of numbers so syntax is correct
matching.strip.fn <- function(.x, .y) {
.y <- .y[!is.na(.y)]
pos <- which.min(.x) - which.min(abs(min(.x, na.rm = T) - .y))
c(rep(NA, pos), .y, rep(NA, length(.x) - pos - length(.y)))
}
MRC_DF_wide <- MRC_DF_wide %>%
accumulate(matching.strip.fn) %>%
set_names(names(MRC_DF_wide)) %>%
as.data.frame()
MRC_DF_wide <-
MRC_DF_wide[rowSums(is.na(MRC_DF_wide)) != ncol(MRC_DF_wide),]
MRC_DF_wide <- MRC_DF_wide %>%
transform(t = 1:nrow(MRC_DF_wide)) %>%
select(t, everything())
我有一个dataframe
,它是根据每个id
(从高到低)的最小值排列的。我要实现的是每个id
的最小值与下一个id
中对应值的位置重合,依此类推。创建了一个新向量 Time
,即 1:nrow(df)
。
我已经手动完成了这个,它看起来像这样:
我想知道如何使这个过程自动化?
这是我的数据示例:
MA_vol <- c(0.2486667, 0.2463333, 0.2426667, 0.2423333, 0.2376667, 0.2323333, 0.2270000, 0.2246667, 0.2216667, 0.2203333, 0.2183333, 0.2126667, 0.2076667, 0.2060000)
R_id <- rep(15, length(MA_vol))
df1 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2073333, 0.2053333, 0.2013333, 0.1993333, 0.1973333, 0.1970000, 0.1966667, 0.1946667, 0.1920000, 0.1890000, 0.1883333, 0.1866667, 0.1843333, 0.1823333, 0.1810000)
R_id <- rep(13, length(MA_vol))
df2 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2016667, 0.1996667, 0.1980000, 0.1970000, 0.1963333, 0.1956667, 0.1930000, 0.1913333, 0.1900000, 0.1893333, 0.1890000, 0.1863333, 0.1853333, 0.1820000, 0.1800000, 0.1780000, 0.1763333)
R_id <- rep(4, length(MA_vol))
df3 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2180000, 0.2146667, 0.2126667, 0.2103333, 0.2070000, 0.2040000, 0.2010000, 0.1993333, 0.1956667, 0.1950000, 0.1926667, 0.1920000, 0.1896667, 0.1890000, 0.1856667, 0.1830000, 0.1786667, 0.1763333, 0.1733333, 0.1720000, 0.1700000, 0.1686667, 0.1670000)
R_id <- rep(8, length(MA_vol))
df4 <- data.frame(R_id, MA_vol)
MA_vol <- c(0.2096667, 0.2063333, 0.2030000, 0.1993333, 0.1953333, 0.1916667, 0.1880000, 0.1870000, 0.1850000, 0.1830000, 0.1783333, 0.1753333, 0.1726667, 0.1716667, 0.1673333, 0.1666667, 0.1656667)
R_id <- rep(2, length(MA_vol))
df5 <- data.frame(R_id, MA_vol)
df <- bind_rows(df1, df2, df3, df4, df5)
感谢您的帮助!
这可行,但可能有更好的方法。由于每个组的大小都不同,这使事情变得复杂。
library(tidyverse)
# first made a list of data frames
df1 <- df %>% arrange(MA_vol) %>%
group_by(R_id) %>%
group_nest()
# then created a graph for each data frame in the list
df2 <- map(1:nrow(df1),
~ggplot(df1$data[[.x]],
aes(x = 1:nrow(df1$data[[.x]]),
y = MA_vol)) +
geom_path() +
# label the x-axis the group name
xlab(paste0("Group ",
as.character(df1$R_id[[.x]]))))
# then stacked the graphs for comparison
cowplot::plot_grid(df2[[1]], df2[[2]], df2[[3]],
df2[[4]], df2[[5]], ncol = 1)
# Order based on each min value (high to low)
R_minvalues <- df %>%
group_by(R_id) %>% # group by recession id
slice(which.min(MA_vol)) %>% # extract min volume values for each recession
select(R_id, MA_vol)
x <- R_minvalues[with(R_minvalues, order(-MA_vol)), ] # order by MA-vol min value (high to low)
R_id_order <- as.numeric(x$R_id)
# Reorder dataframe based on R_minvalues (high to low)
MRC_DF <- df %>%
arrange(match(R_id, R_id_order)) %>% # match R_id rows with R_id_order
transform(t = 1:nrow(df)) %>% # create t (time) column the length of the df
select(t, R_id, MA_vol) # select columns
R_order_chr <- as.character(R_id_order) # convert R_id_order to character so can rearrange columns
MRC_DF_wide <- dcast(setDT(MRC_DF), t ~ R_id, value.var = "MA_vol") %>% # convert df to wide format
select(all_of(R_order_chr)) # rearrange column order
colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)] <-
paste("R", colnames(MRC_DF_wide)[1:ncol(MRC_DF_wide)], sep = "") # add "R_" to start of numbers so syntax is correct
matching.strip.fn <- function(.x, .y) {
.y <- .y[!is.na(.y)]
pos <- which.min(.x) - which.min(abs(min(.x, na.rm = T) - .y))
c(rep(NA, pos), .y, rep(NA, length(.x) - pos - length(.y)))
}
MRC_DF_wide <- MRC_DF_wide %>%
accumulate(matching.strip.fn) %>%
set_names(names(MRC_DF_wide)) %>%
as.data.frame()
MRC_DF_wide <-
MRC_DF_wide[rowSums(is.na(MRC_DF_wide)) != ncol(MRC_DF_wide),]
MRC_DF_wide <- MRC_DF_wide %>%
transform(t = 1:nrow(MRC_DF_wide)) %>%
select(t, everything())