词云 "Error in if (min.freq > max(freq)) min.freq <- 0"
wordcloud "Error in if (min.freq > max(freq)) min.freq <- 0"
我不确定出了什么问题:
t1 <- read.csv("vac_sec_cloud.csv")
library(tm)
library(wordcloud)
df <- data.frame(t1, stringsAsFactors = FALSE)
wordcloud(words = df$Words, freq = df$AC)
Error in if (min.freq > max(freq)) min.freq <- 0 :
missing value where TRUE/FALSE needed
数据框有 Words
都是单词,然后是一些频率不同的列。
做了一些检查:
is.integer(df$AC)
[1] TRUE
is.character(df$Words)
[1] FALSE
所以我做了:
df$Words <- as.character(df$Words)
is.character(df$Words)
[1] TRUE
wordcloud(words = df$Words, freq = df$AC)
Error in if (min.freq > max(freq)) min.freq <- 0 :
missing value where TRUE/FALSE needed
我试过使用 min.freq = 100
或 max.words = 1000
以及两者。
如果我忽略 freq=
,我可以制作云,但这会破坏我的数据框的点,不同列的频率不同。怎么了??
此外,旁注,我使用了 stringsAsFactors = FALSE
但 Words
仍然是一个因素?
以下是更多信息:
dput(head(df))
structure(list(Words = c("will", "experience", "role", "team",
"business", "work"), AC = c(2431L, 1800L, 1664L, 1216L, 1428L,
1048L), AU = c(1934L, 2016L, 1011L, 1039L, 770L, 1250L), AV = c(1009L,
1109L, 649L, 478L, 590L, 537L), CH = c(909L, 462L, 488L, 572L,
59L, 599L), CN = c(2661L, 1800L, 1105L, 1011L, 786L, 1489L),
CU = c(2827L, 1632L, 1391L, 1777L, 936L, 1413L), CY = c(1058L,
1156L, 591L, 629L, 828L, 503L), DE = c(758L, 864L, 473L,
448L, 494L, 475L), DI = c(1360L, 949L, 534L, 669L, 490L,
807L), ED = c(1714L, 1024L, 735L, 563L, 310L, 877L), EL = c(1827L,
1778L, 861L, 873L, 524L, 848L), EN = c(3466L, 3036L, 2012L,
1447L, 997L, 1889L), FI = c(2460L, 2208L, 1438L, 1509L, 2253L,
1046L), GR = c(2606L, 451L, 1346L, 807L, 1738L, 661L), HR = c(1570L,
1307L, 820L, 806L, 1102L, 734L), HS = c(238L, 141L, 105L,
88L, 65L, 126L), IN = c(1868L, 1344L, 961L, 1065L, 1498L,
599L), IT = c(4288L, 4377L, 2087L, 2311L, 2402L, 1949L),
LO = c(2260L, 1413L, 954L, 905L, 751L, 1281L), LW = c(2046L,
1067L, 1119L, 1163L, 595L, 957L), MA = c(2429L, 1776L, 1267L,
1273L, 1077L, 1191L), MD = c(1263L, 1270L, 717L, 805L, 493L,
818L), ME = c(777L, 611L, 367L, 359L, 488L, 365L), MI = c(128L,
70L, 50L, 11L, 6L, 44L), MK = c(2989L, 2461L, 1616L, 1761L,
1456L, 1287L), OG = c(383L, 176L, 79L, 83L, 98L, 118L), OT = c(2879L,
2280L, 1497L, 1408L, 911L, 1315L), PO = c(777L, 400L, 362L,
347L, 224L, 422L), PS = c(1089L, 807L, 601L, 427L, 442L,
463L), PU = c(1410L, 1178L, 724L, 603L, 683L, 483L), RC = c(2692L,
970L, 1130L, 1152L, 1812L, 1520L), RT = c(2837L, 1721L, 1467L,
2048L, 1512L, 1141L), SC = c(1417L, 864L, 662L, 496L, 326L,
605L), SE = c(2364L, 1368L, 1368L, 1348L, 1027L, 1227L),
SM = c(4519L, 2328L, 2434L, 1849L, 3791L, 1428L), TE = c(1122L,
1359L, 655L, 737L, 981L, 674L), TR = c(332L, 224L, 174L,
191L, 180L, 159L), TV = c(1823L, 1346L, 758L, 1721L, 813L,
1028L), UT = c(714L, 436L, 326L, 287L, 360L, 338L), Grand.Total = c(71234L,
51579L, 36598L, 36282L, 35296L, 33714L)), .Names = c("Words",
"AC", "AU", "AV", "CH", "CN", "CU", "CY", "DE", "DI", "ED", "EL",
"EN", "FI", "GR", "HR", "HS", "IN", "IT", "LO", "LW", "MA", "MD",
"ME", "MI", "MK", "OG", "OT", "PO", "PS", "PU", "RC", "RT", "SC",
"SE", "SM", "TE", "TR", "TV", "UT", "Grand.Total"), row.names = c(NA,
6L), class = "data.frame")
尝试以不同的方式读取 csv:
t1 <- read.csv("vac_sec_cloud.csv", stringAsFactor = FALSE)
我认为问题是当你创建 df 时你已经有了 "factor-issue".
当您将 data.frame 减少到 1000 行时有效。
无论如何,先排序和缩减数据帧可能更明智。
感谢您的帮助。
我遇到了同样的问题。这些值必须来自 colSums,带有名称,但不能有任何 NA 值。当我删除 NA 时,词云就起作用了。例如:
poorNA <- poor[complete.cases(poor$Rating1termfrequency),]
Poor1 <- as.data.frame(t(poorNA$Rating1termfrequency))
colnames(Poor1) <- poorNA$term
freqPoor <- colSums(as.matrix(Poor1))
wordcloud(names(freqPoor), freqPoor, min.freq=20,
colors=brewer.pal(6,'Dark2'))
我不确定出了什么问题:
t1 <- read.csv("vac_sec_cloud.csv")
library(tm)
library(wordcloud)
df <- data.frame(t1, stringsAsFactors = FALSE)
wordcloud(words = df$Words, freq = df$AC)
Error in if (min.freq > max(freq)) min.freq <- 0 :
missing value where TRUE/FALSE needed
数据框有 Words
都是单词,然后是一些频率不同的列。
做了一些检查:
is.integer(df$AC)
[1] TRUE
is.character(df$Words)
[1] FALSE
所以我做了:
df$Words <- as.character(df$Words)
is.character(df$Words)
[1] TRUE
wordcloud(words = df$Words, freq = df$AC)
Error in if (min.freq > max(freq)) min.freq <- 0 :
missing value where TRUE/FALSE needed
我试过使用 min.freq = 100
或 max.words = 1000
以及两者。
如果我忽略 freq=
,我可以制作云,但这会破坏我的数据框的点,不同列的频率不同。怎么了??
此外,旁注,我使用了 stringsAsFactors = FALSE
但 Words
仍然是一个因素?
以下是更多信息:
dput(head(df))
structure(list(Words = c("will", "experience", "role", "team",
"business", "work"), AC = c(2431L, 1800L, 1664L, 1216L, 1428L,
1048L), AU = c(1934L, 2016L, 1011L, 1039L, 770L, 1250L), AV = c(1009L,
1109L, 649L, 478L, 590L, 537L), CH = c(909L, 462L, 488L, 572L,
59L, 599L), CN = c(2661L, 1800L, 1105L, 1011L, 786L, 1489L),
CU = c(2827L, 1632L, 1391L, 1777L, 936L, 1413L), CY = c(1058L,
1156L, 591L, 629L, 828L, 503L), DE = c(758L, 864L, 473L,
448L, 494L, 475L), DI = c(1360L, 949L, 534L, 669L, 490L,
807L), ED = c(1714L, 1024L, 735L, 563L, 310L, 877L), EL = c(1827L,
1778L, 861L, 873L, 524L, 848L), EN = c(3466L, 3036L, 2012L,
1447L, 997L, 1889L), FI = c(2460L, 2208L, 1438L, 1509L, 2253L,
1046L), GR = c(2606L, 451L, 1346L, 807L, 1738L, 661L), HR = c(1570L,
1307L, 820L, 806L, 1102L, 734L), HS = c(238L, 141L, 105L,
88L, 65L, 126L), IN = c(1868L, 1344L, 961L, 1065L, 1498L,
599L), IT = c(4288L, 4377L, 2087L, 2311L, 2402L, 1949L),
LO = c(2260L, 1413L, 954L, 905L, 751L, 1281L), LW = c(2046L,
1067L, 1119L, 1163L, 595L, 957L), MA = c(2429L, 1776L, 1267L,
1273L, 1077L, 1191L), MD = c(1263L, 1270L, 717L, 805L, 493L,
818L), ME = c(777L, 611L, 367L, 359L, 488L, 365L), MI = c(128L,
70L, 50L, 11L, 6L, 44L), MK = c(2989L, 2461L, 1616L, 1761L,
1456L, 1287L), OG = c(383L, 176L, 79L, 83L, 98L, 118L), OT = c(2879L,
2280L, 1497L, 1408L, 911L, 1315L), PO = c(777L, 400L, 362L,
347L, 224L, 422L), PS = c(1089L, 807L, 601L, 427L, 442L,
463L), PU = c(1410L, 1178L, 724L, 603L, 683L, 483L), RC = c(2692L,
970L, 1130L, 1152L, 1812L, 1520L), RT = c(2837L, 1721L, 1467L,
2048L, 1512L, 1141L), SC = c(1417L, 864L, 662L, 496L, 326L,
605L), SE = c(2364L, 1368L, 1368L, 1348L, 1027L, 1227L),
SM = c(4519L, 2328L, 2434L, 1849L, 3791L, 1428L), TE = c(1122L,
1359L, 655L, 737L, 981L, 674L), TR = c(332L, 224L, 174L,
191L, 180L, 159L), TV = c(1823L, 1346L, 758L, 1721L, 813L,
1028L), UT = c(714L, 436L, 326L, 287L, 360L, 338L), Grand.Total = c(71234L,
51579L, 36598L, 36282L, 35296L, 33714L)), .Names = c("Words",
"AC", "AU", "AV", "CH", "CN", "CU", "CY", "DE", "DI", "ED", "EL",
"EN", "FI", "GR", "HR", "HS", "IN", "IT", "LO", "LW", "MA", "MD",
"ME", "MI", "MK", "OG", "OT", "PO", "PS", "PU", "RC", "RT", "SC",
"SE", "SM", "TE", "TR", "TV", "UT", "Grand.Total"), row.names = c(NA,
6L), class = "data.frame")
尝试以不同的方式读取 csv:
t1 <- read.csv("vac_sec_cloud.csv", stringAsFactor = FALSE)
我认为问题是当你创建 df 时你已经有了 "factor-issue".
当您将 data.frame 减少到 1000 行时有效。
无论如何,先排序和缩减数据帧可能更明智。
感谢您的帮助。
我遇到了同样的问题。这些值必须来自 colSums,带有名称,但不能有任何 NA 值。当我删除 NA 时,词云就起作用了。例如:
poorNA <- poor[complete.cases(poor$Rating1termfrequency),]
Poor1 <- as.data.frame(t(poorNA$Rating1termfrequency))
colnames(Poor1) <- poorNA$term
freqPoor <- colSums(as.matrix(Poor1))
wordcloud(names(freqPoor), freqPoor, min.freq=20,
colors=brewer.pal(6,'Dark2'))