在不使用 Hmisc 的情况下在 R 中进行外推
Extrapolating in R without using Hmisc
上一个问题的答案:extrapolate in R for a time-series data 由于 R 版本不适合我。
我有一个数据框 NEI_othertier1_long
,看起来类似于:
state pollutant Sector Fuel description year value
AK Ammonia Refining Diesel industrial 2008 1.18
AK Ammonia Refining Diesel industrial 2009 NA
AK Ammonia Refining Diesel industrial 2010 NA
AK Ammonia Refining Diesel industrial 2011 5.76
AK Ammonia Refining Diesel industrial 2012 NA
AK Ammonia Refining Diesel industrial 2013 NA
AK Ammonia Refining Diesel industrial 2014 5.83
AK Ammonia Refining Diesel industrial 2015 NA
AK Ammonia Refining Diesel industrial 2016 NA
AK Ammonia Refining Diesel industrial 2017 8.96
AK Ammonia Refining Diesel industrial 2018 NA
AK Ammonia Refining Diesel industrial 2019 NA
我有 2008 年、2011 年、2014 年和 2017 年的值。我已经能够使用以下代码成功地对 2009-2016 年进行线性插值:
NEI_othertier1_long %>%
dplyr::mutate( value = na.approx(value, na.rm = FALSE, rule = 2) ) -> NEI_othertier1_interpolated
但是插值将 2017 年的值向前推到 2018 年和 2019 年。我想线性推断前几年的 2018 年和 2019 年的值。
我有 R 版本 3.5.2(无法更新),所以无法安装 latticeExtra
,Hmisc
依赖于使用 approxExtrap
函数。
感谢任何帮助!
dput(head(NEI_othertier1_long)) structure(list(state = c("AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK"),
pollutant = c("Ammonia", "Ammonia", "Ammonia", "Ammonia",
"Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia",
"Ammonia", "Ammonia", "Ammonia"), CEDS_Sector = c("1A1b_Pet-refining",
"1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining",
"1A1b_Pet-refining", "1A1b_Pet-refining",
"1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining",
"1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining"), CEDS_Fuel = c("diesel_oil",
"diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil",
"diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil"
), tier1_description = c("FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL",
"FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL",
"FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL",
"FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL",
"FUEL COMB. INDUSTRIAL",
"FUEL COMB. INDUSTRIAL"), unit = c("TON", "TON", "TON", "TON",
"TON", "TON", "TON", "TON", "TON",
"TON", "TON", "TON"), year = 2008:2019, emissions = c(1.18, NA, NA,
5.76, NA, NA, 5.83, NA, NA, 8.96, NA, NA)), row.names = c(NA, -12L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(state = "AK",
pollutant = "Ammonia", CEDS_Sector = "1A1b_Pet-refining",
CEDS_Fuel = "diesel_oil", tier1_description = "FUEL COMB. INDUSTRIAL",
unit = "TON", .rows = list(1:12)), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
approxExtrap
只是 approx
的包装器,因此您只需复制函数定义并使用它即可。
NEI_othertier1_long %>% dplyr::mutate(x = approxExtrap(year, value, year, na.rm = TRUE)$y)
如果找不到,这里是approxExtrap
:
approxExtrap <- function (x, y, xout, method = "linear", n = 50, rule = 2, f = 0,
ties = "ordered", na.rm = FALSE)
{
if (is.list(x)) {
y <- x[[2]]
x <- x[[1]]
}
if (na.rm) {
d <- !is.na(x + y)
x <- x[d]
y <- y[d]
}
d <- !duplicated(x)
x <- x[d]
y <- y[d]
d <- order(x)
x <- x[d]
y <- y[d]
w <- approx(x, y, xout = xout, method = method, n = n, rule = 2,
f = f, ties = ties)$y
r <- range(x)
d <- xout < r[1]
if (any(is.na(d)))
stop("NAs not allowed in xout")
if (any(d))
w[d] <- (y[2] - y[1])/(x[2] - x[1]) * (xout[d] - x[1]) +
y[1]
d <- xout > r[2]
n <- length(y)
if (any(d))
w[d] <- (y[n] - y[n - 1])/(x[n] - x[n - 1]) * (xout[d] -
x[n - 1]) + y[n - 1]
list(x = xout, y = w)
}
上一个问题的答案:extrapolate in R for a time-series data 由于 R 版本不适合我。
我有一个数据框 NEI_othertier1_long
,看起来类似于:
state pollutant Sector Fuel description year value
AK Ammonia Refining Diesel industrial 2008 1.18
AK Ammonia Refining Diesel industrial 2009 NA
AK Ammonia Refining Diesel industrial 2010 NA
AK Ammonia Refining Diesel industrial 2011 5.76
AK Ammonia Refining Diesel industrial 2012 NA
AK Ammonia Refining Diesel industrial 2013 NA
AK Ammonia Refining Diesel industrial 2014 5.83
AK Ammonia Refining Diesel industrial 2015 NA
AK Ammonia Refining Diesel industrial 2016 NA
AK Ammonia Refining Diesel industrial 2017 8.96
AK Ammonia Refining Diesel industrial 2018 NA
AK Ammonia Refining Diesel industrial 2019 NA
我有 2008 年、2011 年、2014 年和 2017 年的值。我已经能够使用以下代码成功地对 2009-2016 年进行线性插值:
NEI_othertier1_long %>%
dplyr::mutate( value = na.approx(value, na.rm = FALSE, rule = 2) ) -> NEI_othertier1_interpolated
但是插值将 2017 年的值向前推到 2018 年和 2019 年。我想线性推断前几年的 2018 年和 2019 年的值。
我有 R 版本 3.5.2(无法更新),所以无法安装 latticeExtra
,Hmisc
依赖于使用 approxExtrap
函数。
感谢任何帮助!
dput(head(NEI_othertier1_long)) structure(list(state = c("AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK"), pollutant = c("Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia", "Ammonia"), CEDS_Sector = c("1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining", "1A1b_Pet-refining"), CEDS_Fuel = c("diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil", "diesel_oil" ), tier1_description = c("FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL", "FUEL COMB. INDUSTRIAL"), unit = c("TON", "TON", "TON", "TON", "TON", "TON", "TON", "TON", "TON", "TON", "TON", "TON"), year = 2008:2019, emissions = c(1.18, NA, NA, 5.76, NA, NA, 5.83, NA, NA, 8.96, NA, NA)), row.names = c(NA, -12L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(state = "AK", pollutant = "Ammonia", CEDS_Sector = "1A1b_Pet-refining", CEDS_Fuel = "diesel_oil", tier1_description = "FUEL COMB. INDUSTRIAL", unit = "TON", .rows = list(1:12)), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
approxExtrap
只是 approx
的包装器,因此您只需复制函数定义并使用它即可。
NEI_othertier1_long %>% dplyr::mutate(x = approxExtrap(year, value, year, na.rm = TRUE)$y)
如果找不到,这里是approxExtrap
:
approxExtrap <- function (x, y, xout, method = "linear", n = 50, rule = 2, f = 0,
ties = "ordered", na.rm = FALSE)
{
if (is.list(x)) {
y <- x[[2]]
x <- x[[1]]
}
if (na.rm) {
d <- !is.na(x + y)
x <- x[d]
y <- y[d]
}
d <- !duplicated(x)
x <- x[d]
y <- y[d]
d <- order(x)
x <- x[d]
y <- y[d]
w <- approx(x, y, xout = xout, method = method, n = n, rule = 2,
f = f, ties = ties)$y
r <- range(x)
d <- xout < r[1]
if (any(is.na(d)))
stop("NAs not allowed in xout")
if (any(d))
w[d] <- (y[2] - y[1])/(x[2] - x[1]) * (xout[d] - x[1]) +
y[1]
d <- xout > r[2]
n <- length(y)
if (any(d))
w[d] <- (y[n] - y[n - 1])/(x[n] - x[n - 1]) * (xout[d] -
x[n - 1]) + y[n - 1]
list(x = xout, y = w)
}