仅显示 R 中的最高异常值
Disply only the top outliers in R
是否可以在箱线图上方显示数据,即单独的异常值?
代码:
charges <- read.csv("tempcharges.csv")
data = read.csv("discharges.csv")
#MERGING THE TWO DATA FRAMES :
cdata <- merge.data.frame(data,charges,by.x = "Enc",by.y = "Enc")
#TRANSFORMING VARIABLES
aaa <- mdy(cdata$discharge_date)
dates <- format(aaa,"%b%y")
charge <- cdata$TotalCharge
e <- (cdata$Enc)
#PLOT
plots <- ggplot(cdata, aes(x=aaa,y=charge,group=month(aaa)))+
scale_x_date(labels = function(z) format(z, format = "%b%y"))+
geom_boxplot(notch=TRUE,na.rm=TRUE)+
labs(title="INPATIENT CHARGE DATA TREND",
x="Period Data",
y="Charges") + ylim(0,60000)
plots
您可以隔离异常值(即 > 或 < 1.5 IQR)并仅绘制那些异常值。例如:
library(dplyr)
library(magrittr)
library(ggplot2)
library(ggrepel)
dat <- data.frame(row=seq(1:100), value=rnorm(100))
iqr <- IQR(dat$value)
outliers <- dat %>% filter(value > 1.5*iqr | value < -1.5*iqr)
ggplot(outliers, aes(x=0, y=value)) +
geom_point() +
geom_text(aes(label=row, hjust=-2), cex=3)
是否可以在箱线图上方显示数据,即单独的异常值?
代码:
charges <- read.csv("tempcharges.csv")
data = read.csv("discharges.csv")
#MERGING THE TWO DATA FRAMES :
cdata <- merge.data.frame(data,charges,by.x = "Enc",by.y = "Enc")
#TRANSFORMING VARIABLES
aaa <- mdy(cdata$discharge_date)
dates <- format(aaa,"%b%y")
charge <- cdata$TotalCharge
e <- (cdata$Enc)
#PLOT
plots <- ggplot(cdata, aes(x=aaa,y=charge,group=month(aaa)))+
scale_x_date(labels = function(z) format(z, format = "%b%y"))+
geom_boxplot(notch=TRUE,na.rm=TRUE)+
labs(title="INPATIENT CHARGE DATA TREND",
x="Period Data",
y="Charges") + ylim(0,60000)
plots
您可以隔离异常值(即 > 或 < 1.5 IQR)并仅绘制那些异常值。例如:
library(dplyr)
library(magrittr)
library(ggplot2)
library(ggrepel)
dat <- data.frame(row=seq(1:100), value=rnorm(100))
iqr <- IQR(dat$value)
outliers <- dat %>% filter(value > 1.5*iqr | value < -1.5*iqr)
ggplot(outliers, aes(x=0, y=value)) +
geom_point() +
geom_text(aes(label=row, hjust=-2), cex=3)