如何根据来自另一个数据框的多次计算创建具有多列的数据框?
How to create a dataframe with multiple columns based off multiple calculations from another dataframe?
我有一个数据框
df = structure(list(Date_Time_GMT_3 = structure(c(1625141700, 1625142600,
1625143500, 1625144400, 1625145300, 1625146200), class = c("POSIXct",
"POSIXt"), tzone = "EST"), X20822244_27LH_U_Stationary = c(22.525,
22.525, 22.429, 22.429, 22.429, 22.429), X20822244_27LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874232_B2LH_U_Stationary = c(25.805,
25.902, 25.902, 25.902, 25.902, 26), X20874232_B2LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874286_X1LH_U_Stationary = c(24.835,
24.835, 24.835, 24.835, 24.835, 24.835), X20874286_X1LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X2322844_X1LH_AIR = c(21.282,
21.664, 21.76, 21.855, 21.855, 22.142), X20817728_X3LH_U_Stationary = c(25.222,
25.125, 25.125, 25.125, 25.028, 24.931), X20817728_X3LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676884_X3LH_U_StationaryCompare = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819743_X6LH_U_Stationary = c(24.931,
24.931, 24.931, 24.835, 24.931, 24.835), X20819743_X6LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819831_8LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676900_50LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822214_73LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676887_44LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822223_46LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874231_56LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874287_86LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874298_71LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874309_51LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676887_X2LH_S = c(24.641,
24.545, 24.545, 24.545, 24.545, 24.545), X20819831_11LH_S = c(24.641,
24.641, 24.641, 24.641, 24.545, 24.545), X20822214_X4LH_S = c(25.61,
25.61, 25.61, 25.61, 25.61, 25.61), X20822223_B3LH_S = c(24.931,
24.835, 24.738, 24.641, 24.545, 24.545), X20874231_62LH_S = c(24.931,
24.835, 24.835, 24.835, 24.835, 24.738), X20874298_B5LH_S = c(25.319,
25.319, 25.319, 25.319, 25.319, 25.416), X20874309_B4LH_S = c(26.781,
26.683, 26.683, 26.683, 26.683, 26.683), X20676900_X5LH_S = c(24.158,
24.158, 24.158, 24.158, 24.158, 24.158), X20874287_58LH_U = c(22.525,
22.429, 22.429, 22.429, 22.429, 22.429), X20676900_54LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819831_B1LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822214_76LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874231_31LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874298_55LHA_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822223_55LHB_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874287_70LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874309_69LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X10694645_20LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676900_57LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819831_3LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822223_4LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874231_79LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874287_67LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874298_47LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874309_74LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676887_49LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_)), row.names = c(NA,
6L), class = "data.frame")
对于每一列,我想计算该点是否比它之前的点 >0.5,并创建一个新列来显示这一点。因此,在我提供的数据框中的每一列之后,都会有一个与之关联的新列标记为 diff
我已经尝试了几个代码,但我不确定如何对所有列进行计算。以下是我尝试过的一些例子
for (i in c(2:length(df))){
df[[paste0("diff", i, ".")]] =
as.integer(c(FALSE, abs(diff(#Just using column numbers
2:48)) > 0.50))
return(df)
}
for (i in c(2:length(df))){
df[[paste0("diff", i, ".")]] =
as.integer(c(FALSE, abs(diff(#Indexing
df[,2:48])) > 0.50))
return(df)
}
for (i in c(2:length(df))){
df[[paste0("diff", i, ".")]] =
as.integer(c(FALSE, abs(diff(#Using i
i)) > 0.50))
return(df)
}
但其中 none 有效。我不断收到此错误
Error in r[i1] - r[-length(r):-(length(r) - lag + 1L)] :
non-numeric argument to binary operator
知道怎么做吗?
您在找这样的东西吗?
library(tidyverse)
# Create simple function to take the absolute difference of previous value
diff_fx <- function(x) {abs(x - lag(x))}
# Create new columns with this difference calculated on every column except for the first column, with "diff" added to name
test <- df %>%
mutate(across(-"Date_Time_GMT_3", ~ diff_fx(.x), .names = "diff_{col}"))
仅查看“差异”列的输出:
glimpse(test %>% select(starts_with("diff")))
Rows: 6
Columns: 47
$ diff_X20822244_27LH_U_Stationary <dbl> NA, 0.000, 0.096, 0.000, 0.000, 0.000
$ diff_X20822244_27LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874232_B2LH_U_Stationary <dbl> NA, 0.097, 0.000, 0.000, 0.000, 0.098
$ diff_X20874232_B2LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874286_X1LH_U_Stationary <dbl> NA, 0, 0, 0, 0, 0
$ diff_X20874286_X1LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X2322844_X1LH_AIR <dbl> NA, 0.382, 0.096, 0.095, 0.000, 0.287
$ diff_X20817728_X3LH_U_Stationary <dbl> NA, 0.097, 0.000, 0.000, 0.097, 0.097
$ diff_X20817728_X3LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676884_X3LH_U_StationaryCompare <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819743_X6LH_U_Stationary <dbl> NA, 0.000, 0.000, 0.096, 0.096, 0.096
$ diff_X20819743_X6LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819831_8LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676900_50LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822214_73LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676887_44LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822223_46LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874231_56LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874287_86LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874298_71LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874309_51LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676887_X2LH_S <dbl> NA, 0.096, 0.000, 0.000, 0.000, 0.000
$ diff_X20819831_11LH_S <dbl> NA, 0.000, 0.000, 0.000, 0.096, 0.000
$ diff_X20822214_X4LH_S <dbl> NA, 0, 0, 0, 0, 0
$ diff_X20822223_B3LH_S <dbl> NA, 0.096, 0.097, 0.097, 0.096, 0.000
$ diff_X20874231_62LH_S <dbl> NA, 0.096, 0.000, 0.000, 0.000, 0.097
$ diff_X20874298_B5LH_S <dbl> NA, 0.000, 0.000, 0.000, 0.000, 0.097
$ diff_X20874309_B4LH_S <dbl> NA, 0.098, 0.000, 0.000, 0.000, 0.000
$ diff_X20676900_X5LH_S <dbl> NA, 0, 0, 0, 0, 0
$ diff_X20874287_58LH_U <dbl> NA, 0.096, 0.000, 0.000, 0.000, 0.000
$ diff_X20676900_54LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819831_B1LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822214_76LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874231_31LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874298_55LHA_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822223_55LHB_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874287_70LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874309_69LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X10694645_20LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676900_57LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819831_3LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822223_4LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874231_79LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874287_67LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874298_47LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874309_74LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676887_49LH_S <dbl> NA, NA, NA, NA, NA, NA
我有一个数据框
df = structure(list(Date_Time_GMT_3 = structure(c(1625141700, 1625142600,
1625143500, 1625144400, 1625145300, 1625146200), class = c("POSIXct",
"POSIXt"), tzone = "EST"), X20822244_27LH_U_Stationary = c(22.525,
22.525, 22.429, 22.429, 22.429, 22.429), X20822244_27LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874232_B2LH_U_Stationary = c(25.805,
25.902, 25.902, 25.902, 25.902, 26), X20874232_B2LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874286_X1LH_U_Stationary = c(24.835,
24.835, 24.835, 24.835, 24.835, 24.835), X20874286_X1LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X2322844_X1LH_AIR = c(21.282,
21.664, 21.76, 21.855, 21.855, 22.142), X20817728_X3LH_U_Stationary = c(25.222,
25.125, 25.125, 25.125, 25.028, 24.931), X20817728_X3LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676884_X3LH_U_StationaryCompare = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819743_X6LH_U_Stationary = c(24.931,
24.931, 24.931, 24.835, 24.931, 24.835), X20819743_X6LH_S_Stationary = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819831_8LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676900_50LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822214_73LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676887_44LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822223_46LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874231_56LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874287_86LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874298_71LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874309_51LH_U = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676887_X2LH_S = c(24.641,
24.545, 24.545, 24.545, 24.545, 24.545), X20819831_11LH_S = c(24.641,
24.641, 24.641, 24.641, 24.545, 24.545), X20822214_X4LH_S = c(25.61,
25.61, 25.61, 25.61, 25.61, 25.61), X20822223_B3LH_S = c(24.931,
24.835, 24.738, 24.641, 24.545, 24.545), X20874231_62LH_S = c(24.931,
24.835, 24.835, 24.835, 24.835, 24.738), X20874298_B5LH_S = c(25.319,
25.319, 25.319, 25.319, 25.319, 25.416), X20874309_B4LH_S = c(26.781,
26.683, 26.683, 26.683, 26.683, 26.683), X20676900_X5LH_S = c(24.158,
24.158, 24.158, 24.158, 24.158, 24.158), X20874287_58LH_U = c(22.525,
22.429, 22.429, 22.429, 22.429, 22.429), X20676900_54LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819831_B1LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822214_76LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874231_31LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874298_55LHA_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822223_55LHB_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874287_70LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874309_69LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X10694645_20LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676900_57LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20819831_3LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20822223_4LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874231_79LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874287_67LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874298_47LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20874309_74LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), X20676887_49LH_S = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_)), row.names = c(NA,
6L), class = "data.frame")
对于每一列,我想计算该点是否比它之前的点 >0.5,并创建一个新列来显示这一点。因此,在我提供的数据框中的每一列之后,都会有一个与之关联的新列标记为 diff
我已经尝试了几个代码,但我不确定如何对所有列进行计算。以下是我尝试过的一些例子
for (i in c(2:length(df))){
df[[paste0("diff", i, ".")]] =
as.integer(c(FALSE, abs(diff(#Just using column numbers
2:48)) > 0.50))
return(df)
}
for (i in c(2:length(df))){
df[[paste0("diff", i, ".")]] =
as.integer(c(FALSE, abs(diff(#Indexing
df[,2:48])) > 0.50))
return(df)
}
for (i in c(2:length(df))){
df[[paste0("diff", i, ".")]] =
as.integer(c(FALSE, abs(diff(#Using i
i)) > 0.50))
return(df)
}
但其中 none 有效。我不断收到此错误
Error in r[i1] - r[-length(r):-(length(r) - lag + 1L)] :
non-numeric argument to binary operator
知道怎么做吗?
您在找这样的东西吗?
library(tidyverse)
# Create simple function to take the absolute difference of previous value
diff_fx <- function(x) {abs(x - lag(x))}
# Create new columns with this difference calculated on every column except for the first column, with "diff" added to name
test <- df %>%
mutate(across(-"Date_Time_GMT_3", ~ diff_fx(.x), .names = "diff_{col}"))
仅查看“差异”列的输出:
glimpse(test %>% select(starts_with("diff")))
Rows: 6
Columns: 47
$ diff_X20822244_27LH_U_Stationary <dbl> NA, 0.000, 0.096, 0.000, 0.000, 0.000
$ diff_X20822244_27LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874232_B2LH_U_Stationary <dbl> NA, 0.097, 0.000, 0.000, 0.000, 0.098
$ diff_X20874232_B2LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874286_X1LH_U_Stationary <dbl> NA, 0, 0, 0, 0, 0
$ diff_X20874286_X1LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X2322844_X1LH_AIR <dbl> NA, 0.382, 0.096, 0.095, 0.000, 0.287
$ diff_X20817728_X3LH_U_Stationary <dbl> NA, 0.097, 0.000, 0.000, 0.097, 0.097
$ diff_X20817728_X3LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676884_X3LH_U_StationaryCompare <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819743_X6LH_U_Stationary <dbl> NA, 0.000, 0.000, 0.096, 0.096, 0.096
$ diff_X20819743_X6LH_S_Stationary <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819831_8LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676900_50LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822214_73LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676887_44LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822223_46LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874231_56LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874287_86LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874298_71LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874309_51LH_U <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676887_X2LH_S <dbl> NA, 0.096, 0.000, 0.000, 0.000, 0.000
$ diff_X20819831_11LH_S <dbl> NA, 0.000, 0.000, 0.000, 0.096, 0.000
$ diff_X20822214_X4LH_S <dbl> NA, 0, 0, 0, 0, 0
$ diff_X20822223_B3LH_S <dbl> NA, 0.096, 0.097, 0.097, 0.096, 0.000
$ diff_X20874231_62LH_S <dbl> NA, 0.096, 0.000, 0.000, 0.000, 0.097
$ diff_X20874298_B5LH_S <dbl> NA, 0.000, 0.000, 0.000, 0.000, 0.097
$ diff_X20874309_B4LH_S <dbl> NA, 0.098, 0.000, 0.000, 0.000, 0.000
$ diff_X20676900_X5LH_S <dbl> NA, 0, 0, 0, 0, 0
$ diff_X20874287_58LH_U <dbl> NA, 0.096, 0.000, 0.000, 0.000, 0.000
$ diff_X20676900_54LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819831_B1LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822214_76LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874231_31LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874298_55LHA_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822223_55LHB_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874287_70LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874309_69LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X10694645_20LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676900_57LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20819831_3LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20822223_4LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874231_79LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874287_67LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874298_47LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20874309_74LH_S <dbl> NA, NA, NA, NA, NA, NA
$ diff_X20676887_49LH_S <dbl> NA, NA, NA, NA, NA, NA