R: "Error: unexpected string constant in" with read_fwf()

R: "Error: unexpected string constant in" with read_fwf()

我正在尝试从 U.S 读取固定宽度的文件。使用 read_fwf() 将人口普查局转换为 R。我一直在列名列表的同一个地方收到错误。我曾多次尝试更改该位置的特定列名,但 R 不断抛出错误。我将 R 重新启动到一个新会话,但我不断收到错误消息。在列名称列表中,第 39 项似乎有问题。在我包含在代码中的一次尝试中,我更改了第 39 个位置的名称,有时是第 38 个位置。代码块中的第一行代码具有原始列名称值。在该行中,第 39 个名称是 "cbsac",但错误将其打印为“...”“”。和第38位的名字"cbsa"很接近,但是列表其他部分连续出现的很多名字都非常相似,不会出错。我不知道那应该说明什么。 "cbsac" 在 R 中有什么我不知道的意思吗?

library(readr)

> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8,  8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub",  "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "cbsa", "cbsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "cbsa", ""
> tf <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths( c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5, 2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5, 2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5, 2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 8, 8, 8, 8,  8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18), c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno", "region", "division", "state", "county", "countycc", "countysc", "cousub",  "cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "BCas", "CBsac", "metdiv", "csa", "necta", "nectasc", "nectadiv" "cnecta", "cbsapci", "nectapci", "ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2", "zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr", "name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc", "partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens", "concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115", "sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc", "cnectasc", "memi", "nmemi", "puma", "reserved")))
Error: unexpected string constant in ""tract", "blkgrp",  "block", "iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli", "aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc",  "BCas", ""

> sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17763)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] readr_1.3.1

loaded via a namespace (and not attached):
[1] compiler_3.6.1  backports_1.1.5 R6_2.4.0        hms_0.5.1      
[5] pillar_1.4.2    tibble_2.1.3    Rcpp_1.0.2      crayon_1.3.4   
[9] vctrs_0.2.0     zeallot_0.1.0   pkgconfig_2.0.3 rlang_0.4.0    

This links 到包含源文件的 zip。该文件是 "orgeo2010.sf1"。我应该说,拉链有点大。对于那个很抱歉。

这能解决您的问题吗?

widths <- c(6, 2, 3, 2, 3, 2, 7, 1, 1, 2, 3, 2, 2, 5, 2, 2, 5,
2, 2, 6, 1, 4, 2, 5, 2, 2, 4, 5, 2, 1, 3, 5, 2, 6, 1, 5, 2, 5,
2, 5, 3, 5, 2, 5, 3, 1, 1, 5, 2, 1, 1, 2, 3, 3, 6, 1, 3, 5, 5,
2, 5, 5, 5, 14, 14, 90, 1, 1, 9, 9, 11, 12, 2, 1, 6, 5, 8, 8, 
8, 8, 8, 8,  8, 8, 8, 2, 2, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 1, 1, 5, 18)

vars <- c("fileid", "stusab", "sumlev", "geocomp", "chariter", "cifsn", "logrecno",
"region", "division", "state", "county", "countycc", "countysc", "cousub",
"cousubcc", "cousubsc", "place", "placecc", "placesc", "tract", "blkgrp",  "block",
"iuc", "concit", "concitcc", "concitsc", "aianhh", "aianhhfp", "aianhhcc", "aihhtli",
"aitsce", "aits", "aitscc", "ttract", "tblkgrp", "anrc", "anrccc", "cbsa", "cbsac",
"metdiv", "csa", "necta", "nectasc", "nectadiv", "cnecta", "cbsapci", "nectapci",
"ua", "uasc", "uatype", "ur", "cd", "sldu", "sldl", "vtd", "vtdi", "reserve2",
"zcta5", "submcd", "submcdcc", "sdelem", "sdsec", "sduni", "arealand", "areawatr",
"name", "funcstat", "gcuni", "pop100", "hu100", "intptlat", "intptlon", "lsadc",
"partflag", "reserve3", "uga", "statens", "countyns", "cousubns", "placens",
"concitns", "aianhhns", "aitsns", "anrcns", "submcdns", "cd113", "cd114", "cd115",
"sldu2", "sldu3", "sldu4", "sldl2", "sldl3", "sldl4", "aianhhsc", "csasc",
"cnectasc", "memi", "nmemi", "puma", "reserved")

td <- read_fwf("D:/projects_and_data/data/PostgreSQL/data/data/or2010.sf1/orgeo2010.sf1", fwf_widths(widths)

names(td) <- vars

意外的字符串常量是由于未正确定义字符向量(您漏掉了一个逗号)造成的