使用键替换整个值 data.table
Using a key to replace values across a whole data.table
我有一个大数据 table 如下所示:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: XS0285400197 TR.IssuerRating F1 F1 F1 F1 F1 F1 F1
2: XS0041971275 TR.IssuerRating AAA AAA AAA AAA F1 F1 AAA
3: XS0043098127 TR.IssuerRating WD WD WD WD WD WD WD
structure(list(V1 = c("XS0285400197", "XS0041971275", "XS0043098127"
), V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"
), V3 = c("F1", "AAA", "WD"), V4 = c("F1", "AAA", "WD"), V5 = c("F1",
"AAA", "WD"), V6 = c("F1", "AAA", "WD"), V7 = c("F1", "F1", "WD"
), V8 = c("F1", "F1", "WD"), V9 = c("F1", "AAA", "WD")), class = "data.frame", row.names = c(NA,
-3L))
实际数据table要大得多,但这只是一个例子。此外,我有一把钥匙,我想用数字替换评级(此处为 F1、AAA 和 WD)。
Rating CreditQuality
1: F1 2
2: AAA 1
3: WD 6
4: (P)B2 6
5: (P)Ba1 4
6: (P)Ba2 5
structure(list(Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1",
"(P)Ba2"), CreditQuality = c(2L, 1L, 6L, 6L, 4L, 5L)), class = "data.frame", row.names = c(NA,
-6L))
我想用我在密钥中分配给每个评级的 CreditQuality 替换这些评级。这意味着带有 F1 的单元格现在是 2。带有 WD 的单元格将是 6,依此类推。新的 table 应该如下所示:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
2: XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
3: XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
我试过使用 match
和 mapvalues
但是 match
似乎只适用于单个列而 mapvalues
只适用于原子向量而不适用于 data.table。有些人遇到过类似的问题,但是他们中的大多数只需要替换单个列中的值,而我想替换 data.table
中多个列的值
您可以使用 melt
和 dcast
:
dcast(
rating[melt(df, id=c("V1", "V2"),value.name = "Rating"), on="Rating"],
V1+V2~variable, value.var = "CreditQuality"
)
输出:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
2: XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
3: XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
注意:我假设您的源数据是 df
,您的评分数据是 rating
。我看到你的相框已经是 class data.table
您可以使用 dplyr
和 across
。
library(dplyr)
# Define input data
df <- data.frame(
V1 = c("XS0285400197", "XS0041971275", "XS0043098127"),
V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"),
V3 = c("F1", "AAA", "WD"),
V4 = c("F1", "AAA", "WD"),
V5 = c("F1", "AAA", "WD"),
V6 = c("F1", "AAA", "WD"),
V7 = c("F1", "F1", "WD"),
V8 = c("F1", "F1", "WD"),
V9 = c("F1", "AAA", "WD"),
stringsAsFactors = FALSE
)
lookup <- data.frame(
Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1", "(P)Ba2"),
CreditQuality = c(2, 1, 6, 6, 4, 5)
)
# Make a look up vector
lookup_vec <- lookup$CreditQuality
names(lookup_vec) <- lookup$Rating
# Use dplyr across to apply look up
df_mod <- df %>%
mutate(across(seq(3, dim(df)[2]), ~ lookup_vec[.x]))
# View
df_mod
# V1 V2 V3 V4 V5 V6 V7 V8 V9
# 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
# 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
# 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
df <-
structure(
list(
V1 = c("XS0285400197", "XS0041971275", "XS0043098127"),
V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"),
V3 = c("F1", "AAA", "WD"),
V4 = c("F1", "AAA", "WD"),
V5 = c("F1", "AAA", "WD"),
V6 = c("F1", "AAA", "WD"),
V7 = c("F1", "F1", "WD"),
V8 = c("F1", "F1", "WD"),
V9 = c("F1", "AAA", "WD")),
class = "data.frame",
row.names = c(NA,-3L)
)
rating <-
structure(list(
Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1", "(P)Ba2"),
CreditQuality = c(2L, 1L, 6L, 6L, 4L, 5L)),
class = "data.frame",
row.names = c(NA,-6L))
df
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 XS0285400197 TR.IssuerRating F1 F1 F1 F1 F1 F1 F1
#> 2 XS0041971275 TR.IssuerRating AAA AAA AAA AAA F1 F1 AAA
#> 3 XS0043098127 TR.IssuerRating WD WD WD WD WD WD WD
#tidyverse
library(tidyverse)
df %>%
mutate(across(V3:V9, ~with(rating, CreditQuality[match(.x, table = Rating)])))
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
#> 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
#> 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
# base
df[, 3:9] <- sapply(df[ ,3:9], function(x) with(rating, CreditQuality[match(x, table = Rating)]))
df
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
#> 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
#> 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
由 reprex package (v2.0.1)
创建于 2022-06-01
在基数 R 中:
lut = with(B, setNames(CreditQuality, Rating))
vars = paste0("V", 3:9)
A[vars] = lapply(A[vars], \(x) lut[x])
# V1 V2 V3 V4 V5 V6 V7 V8 V9
# 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
# 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
# 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
data.table
中的相同逻辑:
setDT(A)
A[, (vars) := lapply(.SD, \(x) lut[x]), .SDcols = vars]
数据
A = structure(list(V1 = c("XS0285400197", "XS0041971275", "XS0043098127"
), V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"
), V3 = c("F1", "AAA", "WD"), V4 = c("F1", "AAA", "WD"), V5 = c("F1",
"AAA", "WD"), V6 = c("F1", "AAA", "WD"), V7 = c("F1", "F1", "WD"
), V8 = c("F1", "F1", "WD"), V9 = c("F1", "AAA", "WD")), class = "data.frame", row.names = c(NA,
-3L))
B = structure(list(Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1",
"(P)Ba2"), CreditQuality = c(2L, 1L, 6L, 6L, 4L, 5L)), class = "data.frame", row.names = c(NA,
-6L))
我有一个大数据 table 如下所示:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: XS0285400197 TR.IssuerRating F1 F1 F1 F1 F1 F1 F1
2: XS0041971275 TR.IssuerRating AAA AAA AAA AAA F1 F1 AAA
3: XS0043098127 TR.IssuerRating WD WD WD WD WD WD WD
structure(list(V1 = c("XS0285400197", "XS0041971275", "XS0043098127"
), V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"
), V3 = c("F1", "AAA", "WD"), V4 = c("F1", "AAA", "WD"), V5 = c("F1",
"AAA", "WD"), V6 = c("F1", "AAA", "WD"), V7 = c("F1", "F1", "WD"
), V8 = c("F1", "F1", "WD"), V9 = c("F1", "AAA", "WD")), class = "data.frame", row.names = c(NA,
-3L))
实际数据table要大得多,但这只是一个例子。此外,我有一把钥匙,我想用数字替换评级(此处为 F1、AAA 和 WD)。
Rating CreditQuality
1: F1 2
2: AAA 1
3: WD 6
4: (P)B2 6
5: (P)Ba1 4
6: (P)Ba2 5
structure(list(Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1",
"(P)Ba2"), CreditQuality = c(2L, 1L, 6L, 6L, 4L, 5L)), class = "data.frame", row.names = c(NA,
-6L))
我想用我在密钥中分配给每个评级的 CreditQuality 替换这些评级。这意味着带有 F1 的单元格现在是 2。带有 WD 的单元格将是 6,依此类推。新的 table 应该如下所示:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
2: XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
3: XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
我试过使用 match
和 mapvalues
但是 match
似乎只适用于单个列而 mapvalues
只适用于原子向量而不适用于 data.table。有些人遇到过类似的问题,但是他们中的大多数只需要替换单个列中的值,而我想替换 data.table
您可以使用 melt
和 dcast
:
dcast(
rating[melt(df, id=c("V1", "V2"),value.name = "Rating"), on="Rating"],
V1+V2~variable, value.var = "CreditQuality"
)
输出:
V1 V2 V3 V4 V5 V6 V7 V8 V9
1: XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
2: XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
3: XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
注意:我假设您的源数据是 df
,您的评分数据是 rating
。我看到你的相框已经是 class data.table
您可以使用 dplyr
和 across
。
library(dplyr)
# Define input data
df <- data.frame(
V1 = c("XS0285400197", "XS0041971275", "XS0043098127"),
V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"),
V3 = c("F1", "AAA", "WD"),
V4 = c("F1", "AAA", "WD"),
V5 = c("F1", "AAA", "WD"),
V6 = c("F1", "AAA", "WD"),
V7 = c("F1", "F1", "WD"),
V8 = c("F1", "F1", "WD"),
V9 = c("F1", "AAA", "WD"),
stringsAsFactors = FALSE
)
lookup <- data.frame(
Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1", "(P)Ba2"),
CreditQuality = c(2, 1, 6, 6, 4, 5)
)
# Make a look up vector
lookup_vec <- lookup$CreditQuality
names(lookup_vec) <- lookup$Rating
# Use dplyr across to apply look up
df_mod <- df %>%
mutate(across(seq(3, dim(df)[2]), ~ lookup_vec[.x]))
# View
df_mod
# V1 V2 V3 V4 V5 V6 V7 V8 V9
# 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
# 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
# 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
df <-
structure(
list(
V1 = c("XS0285400197", "XS0041971275", "XS0043098127"),
V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"),
V3 = c("F1", "AAA", "WD"),
V4 = c("F1", "AAA", "WD"),
V5 = c("F1", "AAA", "WD"),
V6 = c("F1", "AAA", "WD"),
V7 = c("F1", "F1", "WD"),
V8 = c("F1", "F1", "WD"),
V9 = c("F1", "AAA", "WD")),
class = "data.frame",
row.names = c(NA,-3L)
)
rating <-
structure(list(
Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1", "(P)Ba2"),
CreditQuality = c(2L, 1L, 6L, 6L, 4L, 5L)),
class = "data.frame",
row.names = c(NA,-6L))
df
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 XS0285400197 TR.IssuerRating F1 F1 F1 F1 F1 F1 F1
#> 2 XS0041971275 TR.IssuerRating AAA AAA AAA AAA F1 F1 AAA
#> 3 XS0043098127 TR.IssuerRating WD WD WD WD WD WD WD
#tidyverse
library(tidyverse)
df %>%
mutate(across(V3:V9, ~with(rating, CreditQuality[match(.x, table = Rating)])))
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
#> 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
#> 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
# base
df[, 3:9] <- sapply(df[ ,3:9], function(x) with(rating, CreditQuality[match(x, table = Rating)]))
df
#> V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
#> 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
#> 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
由 reprex package (v2.0.1)
创建于 2022-06-01在基数 R 中:
lut = with(B, setNames(CreditQuality, Rating))
vars = paste0("V", 3:9)
A[vars] = lapply(A[vars], \(x) lut[x])
# V1 V2 V3 V4 V5 V6 V7 V8 V9
# 1 XS0285400197 TR.IssuerRating 2 2 2 2 2 2 2
# 2 XS0041971275 TR.IssuerRating 1 1 1 1 2 2 1
# 3 XS0043098127 TR.IssuerRating 6 6 6 6 6 6 6
data.table
中的相同逻辑:
setDT(A)
A[, (vars) := lapply(.SD, \(x) lut[x]), .SDcols = vars]
数据
A = structure(list(V1 = c("XS0285400197", "XS0041971275", "XS0043098127"
), V2 = c("TR.IssuerRating", "TR.IssuerRating", "TR.IssuerRating"
), V3 = c("F1", "AAA", "WD"), V4 = c("F1", "AAA", "WD"), V5 = c("F1",
"AAA", "WD"), V6 = c("F1", "AAA", "WD"), V7 = c("F1", "F1", "WD"
), V8 = c("F1", "F1", "WD"), V9 = c("F1", "AAA", "WD")), class = "data.frame", row.names = c(NA,
-3L))
B = structure(list(Rating = c("F1", "AAA", "WD", "(P)B2", "(P)Ba1",
"(P)Ba2"), CreditQuality = c(2L, 1L, 6L, 6L, 4L, 5L)), class = "data.frame", row.names = c(NA,
-6L))