如何在 R 中将列名的编码更改为 utf 8
How to change encding of column names to utf 8 in R
我有一个 UTF-8 格式的俄语 colnames
数据帧。我用 purrr
做了一些线性建模,用 broom::tidy()
探索了 coefficients
,然后我收到了一个混合列名称编码的数据框,我无法将其传递给 girafe()
,它崩溃了出现错误。
我试过 stringi::stri_enc_toutf8(colnames(df))
:没有用
`Encoding(colnames(df)) <- "UTF-8" 没有帮助
rem %>%
group_by(МАГАЗИН, `ТИП МАГАЗИНА`, Дата) %>%
summarise(`Количество, шт` = sum(`Количество, шт`, na.rm = TRUE)) %>%
select(МАГАЗИН, `ТИП МАГАЗИНА`, `Количество, шт`, Дата) %>%
group_by(`ТИП МАГАЗИНА`, МАГАЗИН) %>%
nest() %>%
mutate(lm = map(data, ~lm(formula = `Количество, шт` ~ Дата, data = .x)),
fit = map(lm, tidy)) %>%
unnest(fit) %>%
filter(term != "(Intercept)") %>%
colnames() %>% Encoding()
[1] "UTF-8" "UTF-8" "unknown" "unknown" "unknown" "unknown" "unknown"
>
然后,当我绘制它时,我得到了这个:
girafe_options(
girafe( code = print(
rem %>%
group_by(МАГАЗИН, `ТИП МАГАЗИНА`, Дата) %>%
summarise(`Количество, шт` = sum(`Количество, шт`, na.rm = TRUE)) %>%
select(МАГАЗИН, `ТИП МАГАЗИНА`, `Количество, шт`, Дата) %>%
group_by(`ТИП МАГАЗИНА`, МАГАЗИН) %>%
nest() %>%
mutate(lm = map(data, ~lm(formula = `Количество, шт` ~ Дата, data = .x)),
fit = map(lm, tidy)) %>%
unnest(fit) %>%
filter(term != "(Intercept)") %>%
mutate(term = "Дата") %>%
ggplot(aes(x = reorder(МАГАЗИН, estimate), y = estimate,col = `ТИП МАГАЗИНА` , shape = `ТИП МАГАЗИНА` )) +
geom_point_interactive(aes(tooltip = paste("Изменение: ", round(estimate, 6), "<br>",
"Среднеквадратическое отклонение: ", round(std.error, 6), "<br>",
"ВВероятность случайного изменения: ", round(p.value, 6)))) +
geom_errorbar(aes(x = reorder(МАГАЗИН, estimate), ymin = estimate - (estimate + 1.96*std.error),
ymax = estimate + (estimate + 1.96*std.error)))+
geom_segment(aes(y = 0, yend = estimate, xend = МАГАЗИН)) +
geom_hline(yintercept = 0, col = "black", size = 0.4, linetype = "dashed")+
coord_flip() +
theme_light() +
theme(text = element_text(size = 16)) +
theme(axis.text.x = element_text(angle = 90, vjust = 1)) +
theme(legend.position='bottom',
legend.justification='left',
legend.direction='horizontal')+
labs(title = "Выручка и количество проданных товаров Реми и Экономыча",
subtitle = "по номенклатуре",
y = "",
x = "Номенклатура")),height_svg = 2, width_svg = 16),opts_tooltip(use_fill = TRUE), opts_zoom(max = 5))
Error in doc_parse_file(con, encoding = encoding, as_html = as_html, options = options) :
Input is not proper UTF-8, indicate encoding !
Bytes: 0xC8 0xE7 0xEC 0xE5 [9]
在模拟数据集上:
x <- seq.Date(as.Date("2010-01-01"), as.Date("2018-12-01"), "months")
y <- c(arima.sim(model = list(order = c(2,1,1), ar = c(1.5, -0.75), ma = 15), n = 107, sd = 15 ),
arima.sim(model = list(order = c(2,1,1), ar = c(1.5, -.95), ma = 11), n = 107, sd = 15 ),
arima.sim(model = list(order = c(4,0,1), ar = c(0.8, -.75, 0.6, 0.3), ma = 32), n = 108, sd = 10))
df <- data.frame(Дата = rep(x,3), y = y, Группа = c(rep("G1", 108), rep("G2", 108),rep("G3", 108)))
一切正常,但所有编码都相同
stringi::stri_enc_mark(colnames(df))
[1] "native" "ASCII" "native"
如何更改编码以便绘制它?
我想我设法缩小了错误范围:我不确定为什么,但问题似乎出在 geom_point_interactive()
中的 tooltip
中的西里尔字母。这是一个例子:
library(ggplot2)
library(ggiraph)
Sys.setlocale(locale = "Russian")
#> [1] "LC_COLLATE=Russian_Russia.1251;LC_CTYPE=Russian_Russia.1251;LC_MONETARY=Russian_Russia.1251;LC_NUMERIC=C;LC_TIME=Russian_Russia.1251"
set.seed(42)
x <- seq.Date(as.Date("2010-01-01"), as.Date("2018-12-01"), "months")
y <- c(
arima.sim(model = list(order = c(2, 1, 1), ar = c(1.5, -0.75), ma = 15), n = 107, sd = 15),
arima.sim(model = list(order = c(2, 1, 1), ar = c(1.5, -.95), ma = 11), n = 107, sd = 15),
arima.sim(model = list(order = c(4, 0, 1), ar = c(0.8, -.75, 0.6, 0.3), ma = 32), n = 108, sd = 10)
)
df <- data.frame(Дата = rep(x, 3), y = y, Группа = rep(c("G1", "G2", "G3"), each = 108))
p <- ggplot(df, aes(Дата, y, colour = Группа)) + geom_line()
girafe(ggobj = p + geom_point_interactive(aes(tooltip = "Изменение")))
#> Error in doc_parse_file(con, encoding = encoding, as_html = as_html, options = options): Input is not proper UTF-8, indicate encoding !
#> Bytes: 0xC8 0xE7 0xEC 0xE5 [9]
将 tooltip
包裹在 enc2utf8()
中应该可以修复它:
girafe(ggobj = p + geom_point_interactive(aes(tooltip = enc2utf8("Изменение"))))
由 reprex package (v0.3.0.9000)
于 2019-07-05 创建
我有一个 UTF-8 格式的俄语 colnames
数据帧。我用 purrr
做了一些线性建模,用 broom::tidy()
探索了 coefficients
,然后我收到了一个混合列名称编码的数据框,我无法将其传递给 girafe()
,它崩溃了出现错误。
我试过 stringi::stri_enc_toutf8(colnames(df))
:没有用
`Encoding(colnames(df)) <- "UTF-8" 没有帮助
rem %>%
group_by(МАГАЗИН, `ТИП МАГАЗИНА`, Дата) %>%
summarise(`Количество, шт` = sum(`Количество, шт`, na.rm = TRUE)) %>%
select(МАГАЗИН, `ТИП МАГАЗИНА`, `Количество, шт`, Дата) %>%
group_by(`ТИП МАГАЗИНА`, МАГАЗИН) %>%
nest() %>%
mutate(lm = map(data, ~lm(formula = `Количество, шт` ~ Дата, data = .x)),
fit = map(lm, tidy)) %>%
unnest(fit) %>%
filter(term != "(Intercept)") %>%
colnames() %>% Encoding()
[1] "UTF-8" "UTF-8" "unknown" "unknown" "unknown" "unknown" "unknown"
>
然后,当我绘制它时,我得到了这个:
girafe_options(
girafe( code = print(
rem %>%
group_by(МАГАЗИН, `ТИП МАГАЗИНА`, Дата) %>%
summarise(`Количество, шт` = sum(`Количество, шт`, na.rm = TRUE)) %>%
select(МАГАЗИН, `ТИП МАГАЗИНА`, `Количество, шт`, Дата) %>%
group_by(`ТИП МАГАЗИНА`, МАГАЗИН) %>%
nest() %>%
mutate(lm = map(data, ~lm(formula = `Количество, шт` ~ Дата, data = .x)),
fit = map(lm, tidy)) %>%
unnest(fit) %>%
filter(term != "(Intercept)") %>%
mutate(term = "Дата") %>%
ggplot(aes(x = reorder(МАГАЗИН, estimate), y = estimate,col = `ТИП МАГАЗИНА` , shape = `ТИП МАГАЗИНА` )) +
geom_point_interactive(aes(tooltip = paste("Изменение: ", round(estimate, 6), "<br>",
"Среднеквадратическое отклонение: ", round(std.error, 6), "<br>",
"ВВероятность случайного изменения: ", round(p.value, 6)))) +
geom_errorbar(aes(x = reorder(МАГАЗИН, estimate), ymin = estimate - (estimate + 1.96*std.error),
ymax = estimate + (estimate + 1.96*std.error)))+
geom_segment(aes(y = 0, yend = estimate, xend = МАГАЗИН)) +
geom_hline(yintercept = 0, col = "black", size = 0.4, linetype = "dashed")+
coord_flip() +
theme_light() +
theme(text = element_text(size = 16)) +
theme(axis.text.x = element_text(angle = 90, vjust = 1)) +
theme(legend.position='bottom',
legend.justification='left',
legend.direction='horizontal')+
labs(title = "Выручка и количество проданных товаров Реми и Экономыча",
subtitle = "по номенклатуре",
y = "",
x = "Номенклатура")),height_svg = 2, width_svg = 16),opts_tooltip(use_fill = TRUE), opts_zoom(max = 5))
Error in doc_parse_file(con, encoding = encoding, as_html = as_html, options = options) :
Input is not proper UTF-8, indicate encoding !
Bytes: 0xC8 0xE7 0xEC 0xE5 [9]
在模拟数据集上:
x <- seq.Date(as.Date("2010-01-01"), as.Date("2018-12-01"), "months")
y <- c(arima.sim(model = list(order = c(2,1,1), ar = c(1.5, -0.75), ma = 15), n = 107, sd = 15 ),
arima.sim(model = list(order = c(2,1,1), ar = c(1.5, -.95), ma = 11), n = 107, sd = 15 ),
arima.sim(model = list(order = c(4,0,1), ar = c(0.8, -.75, 0.6, 0.3), ma = 32), n = 108, sd = 10))
df <- data.frame(Дата = rep(x,3), y = y, Группа = c(rep("G1", 108), rep("G2", 108),rep("G3", 108)))
一切正常,但所有编码都相同
stringi::stri_enc_mark(colnames(df))
[1] "native" "ASCII" "native"
如何更改编码以便绘制它?
我想我设法缩小了错误范围:我不确定为什么,但问题似乎出在 geom_point_interactive()
中的 tooltip
中的西里尔字母。这是一个例子:
library(ggplot2)
library(ggiraph)
Sys.setlocale(locale = "Russian")
#> [1] "LC_COLLATE=Russian_Russia.1251;LC_CTYPE=Russian_Russia.1251;LC_MONETARY=Russian_Russia.1251;LC_NUMERIC=C;LC_TIME=Russian_Russia.1251"
set.seed(42)
x <- seq.Date(as.Date("2010-01-01"), as.Date("2018-12-01"), "months")
y <- c(
arima.sim(model = list(order = c(2, 1, 1), ar = c(1.5, -0.75), ma = 15), n = 107, sd = 15),
arima.sim(model = list(order = c(2, 1, 1), ar = c(1.5, -.95), ma = 11), n = 107, sd = 15),
arima.sim(model = list(order = c(4, 0, 1), ar = c(0.8, -.75, 0.6, 0.3), ma = 32), n = 108, sd = 10)
)
df <- data.frame(Дата = rep(x, 3), y = y, Группа = rep(c("G1", "G2", "G3"), each = 108))
p <- ggplot(df, aes(Дата, y, colour = Группа)) + geom_line()
girafe(ggobj = p + geom_point_interactive(aes(tooltip = "Изменение")))
#> Error in doc_parse_file(con, encoding = encoding, as_html = as_html, options = options): Input is not proper UTF-8, indicate encoding !
#> Bytes: 0xC8 0xE7 0xEC 0xE5 [9]
将 tooltip
包裹在 enc2utf8()
中应该可以修复它:
girafe(ggobj = p + geom_point_interactive(aes(tooltip = enc2utf8("Изменение"))))
由 reprex package (v0.3.0.9000)
于 2019-07-05 创建