如何计算真阳性率?

How to calculate true positive rate?

我已经制作了预测迟到 flights.I 的模型,希望看到真阳性率,给定 50% 的误报率。我可以在绘制的 ROC 曲线中看到这一点。但我想准确计算该值,而不仅仅是从图中读取它。有人知道怎么做吗?

library(modelr)
library(dplyr)
library(sparklyr)
library(ggplot2)
library(nycflights13)
data(flights)

RNGkind(sample.kind="Rounding")
set.seed(42)

flights <- mutate(flights, late_arrival = ifelse(arr_delay > 30, 1, 0))

spark_install()
sc <- spark_connect(master = "local")

flights_tbl <- copy_to(sc, flights, "flights")
flights_tbl <- flights_tbl %>% na.omit(flights_tbl)

partition <- flights_tbl %>% 
  select(late_arrival, carrier, dep_delay, month, year) %>%
  sdf_random_split(train = 0.75, test = 0.25)

train_tbl <- partition$train
test_tbl <- partition$test

########### my model
ml_formula <- formula(late_arrival ~ carrier + dep_delay + month + year)
ml_log <- ml_logistic_regression(train_tbl,ml_formula)
ml_log

pred_lr <- ml_predict(ml_log, test_tbl) %>% collect
pred_lr$p1 <- unlist(pred_lr$probability)[ c(FALSE,TRUE) ]

########## my ROC curve plot
ROC_lr <- get_roc(L = pred_lr$late_arrival, f = pred_lr$p1)
ggplot(ROC_lr, aes(x = FPR, y = TPR)) + geom_line(aes(col = "my prediction")) + ggtitle("ROC curve of my prediction", "logistic regression to predict late arrivals based on carrier, departure delay, month, and year")

我不熟悉 get_roc(),但您当然可以打印出 ROC_lr 以获得一些附近的值:

print(ROC_lr)

但您可以尝试另一个包:包 pROC has function coords() 来计算 ROC 曲线中某个点的值:

library(pROC)

# only some random values for example
labels <- c(0, 1, 0, 1, 0, 0, 0, 1, 0, 0)
scores <- 1:10

# instead of get_roc() you can use pROC::roc()
roc <- roc(labels, scores)

# let's say you want FPR 0.5 and calculate TPR
fpr <- 0.5

# coords() needs specificity (1 - FPR) to calculate sensitivity (TPR)
fpr50 <- coords(roc, 1 - fpr, input = "specificity")

# get TPR from the result
tpr <- fpr50$sensitivity

tpr
#[1] 0.6666667