如何计算 15 天分组的平均值、中值、最大值和最小值,而 R 中没有异常值影响
How to Calculate mean, median, max and min values for group by 15 days without outlier effect in R
我有各种机器的数据,看起来类似于下面输入样本数据中给出的图片
Input Sample
我需要做的是根据每 15 天创建的组计算平均值、中位数、最大值和最小值,例如:第 1 组 = 7 月 1-15 日,第 2 组 = 7 月 15-31 日,等等。但是虽然计算数据不应该考虑离群值
输出应该是这样的
Output Sample
我首先使用以下代码将分钟级数据聚合到天级别
DipDay = aggregate(Dipminute2[,c(4:18)], by=list(Dipminute2$Date), sum)
DipDay[,c(2:9)] = round(DipDay[,c(2:9)]*9*38/100000,2)
DipDay[,c(10:16)] = round(DipDay[,c(10:16)]*12*38/100000,2)
DipDay<-DipDay%>%rename("Date"="Group.1")
DipDay$Month = month(ymd(DipDay$Date) ,label = TRUE)
DipDay$MonthNumber = month(ymd(DipDay$Date))
DipDay$Day = day(ymd(DipDay$Date))
DipDay$Biweek = as.factor(ifelse(DipDay$Day < 16, "1st_15_days", "last_15_days"))
DipDay$MonthBiweek = paste(DipDay$Month, "_", DipDay$Biweek)
Dip = DipDay[c(21,2:16)]
BiweekDip = aggregate(DipDay[c(2:16,18)], by=list(Biweek=DipDay$MonthBiweek), FUN=median)
BiweekDip = BiweekDip%>%arrange((BiweekDip$MonthNumber))
BiweekDipT= as.data.frame(t(BiweekDip[,c(1:16)]))
BiweekDipT= janitor::row_to_names(BiweekDipT,row_number = 1)
write.csv(BiweekDipT,"Fornightly Dipping Median Count.csv")
前 10 行的示例输入数据如下所示
structure(list(Timestamp = structure(c(1593714765.03054, 1593714824.99918,
1593714884.96776, 1593714945.01468, 1593715005.01454, 1593715064.98314,
1593715125.03002, 1593715185.18608, 1593715244.99822, 1593715304.96657
), class = c("POSIXct", "POSIXt"), tzone = "Asia/Kolkata"), Date = structure(c(18446,
18446, 18446, 18446, 18446, 18446, 18446, 18446, 18446, 18446
), class = "Date"), Time = c("00:02", "00:03", "00:04", "00:05",
"00:06", "00:07", "00:08", "00:09", "00:10", "00:11"), `HCM 18` = c(7,
8, 7, 8, 7, 7, 8, 7, 8, 7), `HCM 19` = c(8, 8, 8, 8, 8, 7, 8,
8, 8, 8), `HCM 20` = c(7, 7, 7, 7, 8, 7, 4, 8, 7, 7), `HCM 21` = c(8,
7, 8, 8, 8, 7, 8, 8, 8, 8), `HCM 22` = c(8, 7, 8, 7, 7, 8, 7,
8, 7, 7), `HCM 23` = c(8, 7, 8, 8, 8, 7, 8, 8, 8, 8), `HCM 24` = c(7,
7, 7, 6, 7, 7, 7, 6, 4, 0), `HCM 25` = c(8, 8, 8, 7, 8, 8, 8,
7, 8, 8), `HCM 54` = c(7, 8, 8, 7, 6, 8, 7, 8, 8, 7), `HCM 55` = c(8,
7, 8, 7, 8, 8, 7, 8, 8, 7), `HCM 56` = c(8, 8, 8, 7, 8, 8, 7,
8, 8, 7), `HCM 57` = c(8, 8, 7, 8, 8, 7, 8, 8, 7, 8), `HCM 58` = c(8,
8, 7, 8, 7, 8, 7, 8, 7, 8), `HCM 59` = c(8, 1, 6, 8, 8, 7, 8,
8, 7, 8), `HCM 60` = c(8, 7, 8, 8, 7, 8, 8, 7, 8, 8)), row.names = c(NA,
10L), class = "data.frame")
尝试:
library(dplyr)
result <- df %>%
#Get month name
mutate(month = format(Timestamp, '%b'),
#get day of the month
day = as.integer(format(Timestamp, '%d')),
#assign labels based on dat of the month
day = ifelse(day > 15, 'last_15', 'first_15')) %>%
#combine month name and labels
tidyr::unite(Biweek, month, day) %>%
#For each 15 days
group_by(Biweek) %>%
#Change outliers to NA
mutate(across(`HCM 18`:`HCM 60`, ~replace(., . < quantile(., 0.01) |
. > quantile(., 0.99), NA))) %>%
#Calculate min, median, mean, max for each column for every 15 days.
summarise(across(`HCM 18`:`HCM 60`, list(min = ~min(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
mean = ~mean(., na.rm = TRUE),
max = ~max(., na.rm = TRUE))))
我有各种机器的数据,看起来类似于下面输入样本数据中给出的图片
Input Sample
我需要做的是根据每 15 天创建的组计算平均值、中位数、最大值和最小值,例如:第 1 组 = 7 月 1-15 日,第 2 组 = 7 月 15-31 日,等等。但是虽然计算数据不应该考虑离群值
输出应该是这样的 Output Sample
我首先使用以下代码将分钟级数据聚合到天级别
DipDay = aggregate(Dipminute2[,c(4:18)], by=list(Dipminute2$Date), sum)
DipDay[,c(2:9)] = round(DipDay[,c(2:9)]*9*38/100000,2)
DipDay[,c(10:16)] = round(DipDay[,c(10:16)]*12*38/100000,2)
DipDay<-DipDay%>%rename("Date"="Group.1")
DipDay$Month = month(ymd(DipDay$Date) ,label = TRUE)
DipDay$MonthNumber = month(ymd(DipDay$Date))
DipDay$Day = day(ymd(DipDay$Date))
DipDay$Biweek = as.factor(ifelse(DipDay$Day < 16, "1st_15_days", "last_15_days"))
DipDay$MonthBiweek = paste(DipDay$Month, "_", DipDay$Biweek)
Dip = DipDay[c(21,2:16)]
BiweekDip = aggregate(DipDay[c(2:16,18)], by=list(Biweek=DipDay$MonthBiweek), FUN=median)
BiweekDip = BiweekDip%>%arrange((BiweekDip$MonthNumber))
BiweekDipT= as.data.frame(t(BiweekDip[,c(1:16)]))
BiweekDipT= janitor::row_to_names(BiweekDipT,row_number = 1)
write.csv(BiweekDipT,"Fornightly Dipping Median Count.csv")
前 10 行的示例输入数据如下所示
structure(list(Timestamp = structure(c(1593714765.03054, 1593714824.99918,
1593714884.96776, 1593714945.01468, 1593715005.01454, 1593715064.98314,
1593715125.03002, 1593715185.18608, 1593715244.99822, 1593715304.96657
), class = c("POSIXct", "POSIXt"), tzone = "Asia/Kolkata"), Date = structure(c(18446,
18446, 18446, 18446, 18446, 18446, 18446, 18446, 18446, 18446
), class = "Date"), Time = c("00:02", "00:03", "00:04", "00:05",
"00:06", "00:07", "00:08", "00:09", "00:10", "00:11"), `HCM 18` = c(7,
8, 7, 8, 7, 7, 8, 7, 8, 7), `HCM 19` = c(8, 8, 8, 8, 8, 7, 8,
8, 8, 8), `HCM 20` = c(7, 7, 7, 7, 8, 7, 4, 8, 7, 7), `HCM 21` = c(8,
7, 8, 8, 8, 7, 8, 8, 8, 8), `HCM 22` = c(8, 7, 8, 7, 7, 8, 7,
8, 7, 7), `HCM 23` = c(8, 7, 8, 8, 8, 7, 8, 8, 8, 8), `HCM 24` = c(7,
7, 7, 6, 7, 7, 7, 6, 4, 0), `HCM 25` = c(8, 8, 8, 7, 8, 8, 8,
7, 8, 8), `HCM 54` = c(7, 8, 8, 7, 6, 8, 7, 8, 8, 7), `HCM 55` = c(8,
7, 8, 7, 8, 8, 7, 8, 8, 7), `HCM 56` = c(8, 8, 8, 7, 8, 8, 7,
8, 8, 7), `HCM 57` = c(8, 8, 7, 8, 8, 7, 8, 8, 7, 8), `HCM 58` = c(8,
8, 7, 8, 7, 8, 7, 8, 7, 8), `HCM 59` = c(8, 1, 6, 8, 8, 7, 8,
8, 7, 8), `HCM 60` = c(8, 7, 8, 8, 7, 8, 8, 7, 8, 8)), row.names = c(NA,
10L), class = "data.frame")
尝试:
library(dplyr)
result <- df %>%
#Get month name
mutate(month = format(Timestamp, '%b'),
#get day of the month
day = as.integer(format(Timestamp, '%d')),
#assign labels based on dat of the month
day = ifelse(day > 15, 'last_15', 'first_15')) %>%
#combine month name and labels
tidyr::unite(Biweek, month, day) %>%
#For each 15 days
group_by(Biweek) %>%
#Change outliers to NA
mutate(across(`HCM 18`:`HCM 60`, ~replace(., . < quantile(., 0.01) |
. > quantile(., 0.99), NA))) %>%
#Calculate min, median, mean, max for each column for every 15 days.
summarise(across(`HCM 18`:`HCM 60`, list(min = ~min(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
mean = ~mean(., na.rm = TRUE),
max = ~max(., na.rm = TRUE))))