R: "Error: unexpected string constant in" with read_fwf()
R: "Error: unexpected string constant in" with read_fwf()
我正在尝试从 U.S 读取固定宽度的文件。使用 read_fwf()
将人口普查局转换为 R。我一直在列名列表的同一个地方收到错误。我曾多次尝试更改该位置的特定列名,但 R 不断抛出错误。我将 R 重新启动到一个新会话,但我不断收到错误消息。在列名称列表中,第 39 项似乎有问题。在我包含在代码中的一次尝试中,我更改了第 39 个位置的名称,有时是第 38 个位置。代码块中的第一行代码具有原始列名称值。在该行中,第 39 个名称是 "cbsac",但错误将其打印为“...”“”。和第38位的名字"cbsa"很接近,但是列表其他部分连续出现的很多名字都非常相似,不会出错。我不知道那应该说明什么。 "cbsac" 在 R 中有什么我不知道的意思吗?
library(readr)
> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub", "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", "cbsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", ""
> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub", "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "BCas", "CBsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "BCas", ""
> sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17763)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] readr_1.3.1
loaded via a namespace (and not attached):
[1] compiler_3.6.1 backports_1.1.5 R6_2.4.0 hms_0.5.1
[5] pillar_1.4.2 tibble_2.1.3 Rcpp_1.0.2 crayon_1.3.4
[9] vctrs_0.2.0 zeallot_0.1.0 pkgconfig_2.0.3 rlang_0.4.0
This links 到包含源文件的 zip。该文件是 "orgeo2010.sf1"。我应该说,拉链有点大。对于那个很抱歉。
这能解决您的问题吗?
widths <- c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5,
2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5,
2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5,
2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8,
8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18)
vars <- c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno",
"region", "division", "state", "county", "countycc", "countysc", "cousub",
"cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp", "block",
"iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli",
"aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", "cbsac",
"metdiv", "csa", "necta", "nectasc", "nectadiv", "cnecta", "cbsapci", "nectapci",
"ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2",
"zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr",
"name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc",
"partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens",
"concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115",
"sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc",
"cnectasc", "memi", "nmemi", "puma", "reserved")
td <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths(widths)
names(td) <- vars
意外的字符串常量是由于未正确定义字符向量(您漏掉了一个逗号)造成的
我正在尝试从 U.S 读取固定宽度的文件。使用 read_fwf()
将人口普查局转换为 R。我一直在列名列表的同一个地方收到错误。我曾多次尝试更改该位置的特定列名,但 R 不断抛出错误。我将 R 重新启动到一个新会话,但我不断收到错误消息。在列名称列表中,第 39 项似乎有问题。在我包含在代码中的一次尝试中,我更改了第 39 个位置的名称,有时是第 38 个位置。代码块中的第一行代码具有原始列名称值。在该行中,第 39 个名称是 "cbsac",但错误将其打印为“...”“”。和第38位的名字"cbsa"很接近,但是列表其他部分连续出现的很多名字都非常相似,不会出错。我不知道那应该说明什么。 "cbsac" 在 R 中有什么我不知道的意思吗?
library(readr)
> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub", "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", "cbsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", ""
> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub", "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "BCas", "CBsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp", "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "BCas", ""
> sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17763)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] readr_1.3.1
loaded via a namespace (and not attached):
[1] compiler_3.6.1 backports_1.1.5 R6_2.4.0 hms_0.5.1
[5] pillar_1.4.2 tibble_2.1.3 Rcpp_1.0.2 crayon_1.3.4
[9] vctrs_0.2.0 zeallot_0.1.0 pkgconfig_2.0.3 rlang_0.4.0
This links 到包含源文件的 zip。该文件是 "orgeo2010.sf1"。我应该说,拉链有点大。对于那个很抱歉。
这能解决您的问题吗?
widths <- c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5,
2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5,
2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5,
2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8,
8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18)
vars <- c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno",
"region", "division", "state", "county", "countycc", "countysc", "cousub",
"cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp", "block",
"iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli",
"aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", "cbsac",
"metdiv", "csa", "necta", "nectasc", "nectadiv", "cnecta", "cbsapci", "nectapci",
"ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2",
"zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr",
"name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc",
"partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens",
"concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115",
"sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc",
"cnectasc", "memi", "nmemi", "puma", "reserved")
td <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths(widths)
names(td) <- vars
意外的字符串常量是由于未正确定义字符向量(您漏掉了一个逗号)造成的