R:读取并解析 Json
R: read and parse Json
如果 R 不适合这份工作table,那么很公平,但我相信它应该是。
我正在调用 API,然后将结果转储到 Postman json reader。然后我得到如下结果:
"results": [
{
"personUuid": "***",
"synopsis": {
"fullName": "***",
"headline": "***",
"location": "***",
"image": "***",
"skills": [
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers": [
"***",
"***"
],
"emailAddresses": [
"***"
],
"networks": [
{
"name": "linkedin",
"url": "***",
"type": "canonicalUrl",
"lastAccessed": null
},
{
"name": "***",
"url": "***",
"type": "cvUrl",
"lastAccessed": "*"
},
{
"name": "*",
"url": "***",
"type": "cvUrl",
"lastAccessed": "*"
}
]
}
},
{
首先,我不确定如何将其导入 R,因为我主要处理的是 csv。我已经看到人们使用 Json 包直接调用 URL 的其他问题,但这对我正在做的事情不起作用,所以我想知道如何使用 json在里面。
我用过:
x <- fromJSON(file="Z:/json.csv")
但也许有更好的方法。完成后 json 看起来更像:
...$results[[9]]$synopsis$emailAddresses
[1] "***" "***"
[3] "***" "***"
$results[[9]]$synopsis$networks...
那么我希望每个结果都将标题和电子邮件地址存储到数据中 table。
我试过了:
str_extract_all(x, 'emailAddresses*$')
不过我认为 * 会代表 emailAddresses 和 $ 之间的所有内容,包括新行等,但这不起作用。我还发现当你让 * 工作时使用 extract,它不会提取 * 代表的内容。
例如:
> y <- 'some text. email "oli@oli.o" other text'
> y
[1] "some text. email \"oli@oli.o\" other text"
> str_extract_all(y, 'email \"*"')
[[1]]
[1] "email \""
第 2 部分:
下面的答案有效,但是如果我直接调用 api:
body ='{"start": 0,"count": 105,...}'
x <- POST(url="https://live.*.me/api/v3/person", body=body, add_headers(Accept="application/json", 'Content-Type'="application/json", Authorization = "id=*, apiKey=*"))
y <- content(x)
然后使用
fromJSON(y, flatten=TRUE)$results[c("synopsis.headline",
"synopsis.emailAddresses")]
无效。我尝试了以下方法:
z <- NULL
zz <- NULL
for(i in 1:y$count){
z=rbind(z,data.table(job = y$results[[i]]$synopsis$headline))
}
for(i in 1:y$count){
zz=rbind(zz,data.table(job = y$results[[i]]$synopsis$emailAddresses))
}
df <- cbind(z,zz)
然而,当返回 JSON 列表时,有些人有多个电子邮件。因此上面的方法只记录了每个人的第一封邮件,我如何将多封邮件保存为一个向量(而不是多列)?
其他测试数据可能会有帮助。
考虑:
library(jsonlite)
library(dplyr)
json_data = "{\"results\": [\n {\n\"personUuid\": \"***\",\n\"synopsis\": {\n\"fullName\": \"***\",\n\"headline\": \"***\",\n\"location\": \"***\",\n\"image\": \"***\",\n\"skills\": [\n\"*\",\n\"*\",\n\"*\",\n\"*.\",\n\"*\"\n],\n\"phoneNumbers\": [\n\"***\",\n\"***\"\n],\n\"emailAddresses\": [\n\"***\"\n],\n\"networks\": [\n{\n \"name\": \"linkedin\",\n \"url\": \"***\",\n \"type\": \"canonicalUrl\",\n \"lastAccessed\": null\n},\n {\n \"name\": \"***\",\n \"url\": \"***\",\n \"type\": \"cvUrl\",\n \"lastAccessed\": \"*\"\n },\n {\n \"name\": \"*\",\n \"url\": \"***\",\n \"type\": \"cvUrl\",\n \"lastAccessed\": \"*\"\n }\n ]\n}\n}]}"
(df <- jsonlite::fromJSON(json_data, simplifyDataFrame = TRUE, flatten = TRUE))
#> $results
#> personUuid synopsis.fullName synopsis.headline synopsis.location
#> 1 *** *** *** ***
#> synopsis.image synopsis.skills synopsis.phoneNumbers
#> 1 *** *, *, *, *., * ***, ***
#> synopsis.emailAddresses
#> 1 ***
#> synopsis.networks
#> 1 linkedin, ***, *, ***, ***, ***, canonicalUrl, cvUrl, cvUrl, NA, *, *
df$results %>%
select(headline = synopsis.headline, emails = synopsis.emailAddresses)
#> headline emails
#> 1 *** ***
更新 1:
要从 URL 中读取 json,您只需使用 fromJSON 函数,将字符串与您的 json 数据一起传递 url:
library(jsonlite)
url <- 'http://you.url.com/data.json'
# in this case we pass an URL to the fromJSON function instead of the actual content we want to parse
fromJSON(url, flatten=TRUE)$results[c("synopsis.headline", "synopsis.emailAddresses")]
// end UPDATE 1
您还可以将 flatten 参数传递给 fromJSON,然后使用 'results' 数据框。
fromJSON(json.data, flatten=TRUE)$results[c("synopsis.headline",
"synopsis.emailAddresses")]
synopsis.headline synopsis.emailAddresses
1 *** jane.doe@boo.com
2 *** john.doe@foo.com
这是我定义 json.data 的方式,请注意我有意向您的示例输入中添加了 1 条记录 json。
json.data <- '{
"results":[
{
"personUuid":"***",
"synopsis":{
"fullName":"***",
"headline":"***",
"location":"***",
"image":"***",
"skills":[
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers":[
"***",
"***"
],
"emailAddresses":[
"jane.doe@boo.com"
],
"networks":[
{
"name":"linkedin",
"url":"***",
"type":"canonicalUrl",
"lastAccessed":null
},
{
"name":"***",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
},
{
"name":"*",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
}
]
}
},
{
"personUuid":"***",
"synopsis":{
"fullName":"***",
"headline":"***",
"location":"***",
"image":"***",
"skills":[
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers":[
"***",
"***"
],
"emailAddresses":[
"john.doe@foo.com"
],
"networks":[
{
"name":"linkedin",
"url":"***",
"type":"canonicalUrl",
"lastAccessed":null
},
{
"name":"***",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
},
{
"name":"*",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
}
]
}
}
]
}'
如果 R 不适合这份工作table,那么很公平,但我相信它应该是。
我正在调用 API,然后将结果转储到 Postman json reader。然后我得到如下结果:
"results": [
{
"personUuid": "***",
"synopsis": {
"fullName": "***",
"headline": "***",
"location": "***",
"image": "***",
"skills": [
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers": [
"***",
"***"
],
"emailAddresses": [
"***"
],
"networks": [
{
"name": "linkedin",
"url": "***",
"type": "canonicalUrl",
"lastAccessed": null
},
{
"name": "***",
"url": "***",
"type": "cvUrl",
"lastAccessed": "*"
},
{
"name": "*",
"url": "***",
"type": "cvUrl",
"lastAccessed": "*"
}
]
}
},
{
首先,我不确定如何将其导入 R,因为我主要处理的是 csv。我已经看到人们使用 Json 包直接调用 URL 的其他问题,但这对我正在做的事情不起作用,所以我想知道如何使用 json在里面。
我用过:
x <- fromJSON(file="Z:/json.csv")
但也许有更好的方法。完成后 json 看起来更像:
...$results[[9]]$synopsis$emailAddresses
[1] "***" "***"
[3] "***" "***"
$results[[9]]$synopsis$networks...
那么我希望每个结果都将标题和电子邮件地址存储到数据中 table。
我试过了:
str_extract_all(x, 'emailAddresses*$')
不过我认为 * 会代表 emailAddresses 和 $ 之间的所有内容,包括新行等,但这不起作用。我还发现当你让 * 工作时使用 extract,它不会提取 * 代表的内容。
例如:
> y <- 'some text. email "oli@oli.o" other text'
> y
[1] "some text. email \"oli@oli.o\" other text"
> str_extract_all(y, 'email \"*"')
[[1]]
[1] "email \""
第 2 部分:
下面的答案有效,但是如果我直接调用 api:
body ='{"start": 0,"count": 105,...}'
x <- POST(url="https://live.*.me/api/v3/person", body=body, add_headers(Accept="application/json", 'Content-Type'="application/json", Authorization = "id=*, apiKey=*"))
y <- content(x)
然后使用
fromJSON(y, flatten=TRUE)$results[c("synopsis.headline",
"synopsis.emailAddresses")]
无效。我尝试了以下方法:
z <- NULL
zz <- NULL
for(i in 1:y$count){
z=rbind(z,data.table(job = y$results[[i]]$synopsis$headline))
}
for(i in 1:y$count){
zz=rbind(zz,data.table(job = y$results[[i]]$synopsis$emailAddresses))
}
df <- cbind(z,zz)
然而,当返回 JSON 列表时,有些人有多个电子邮件。因此上面的方法只记录了每个人的第一封邮件,我如何将多封邮件保存为一个向量(而不是多列)?
其他测试数据可能会有帮助。
考虑:
library(jsonlite)
library(dplyr)
json_data = "{\"results\": [\n {\n\"personUuid\": \"***\",\n\"synopsis\": {\n\"fullName\": \"***\",\n\"headline\": \"***\",\n\"location\": \"***\",\n\"image\": \"***\",\n\"skills\": [\n\"*\",\n\"*\",\n\"*\",\n\"*.\",\n\"*\"\n],\n\"phoneNumbers\": [\n\"***\",\n\"***\"\n],\n\"emailAddresses\": [\n\"***\"\n],\n\"networks\": [\n{\n \"name\": \"linkedin\",\n \"url\": \"***\",\n \"type\": \"canonicalUrl\",\n \"lastAccessed\": null\n},\n {\n \"name\": \"***\",\n \"url\": \"***\",\n \"type\": \"cvUrl\",\n \"lastAccessed\": \"*\"\n },\n {\n \"name\": \"*\",\n \"url\": \"***\",\n \"type\": \"cvUrl\",\n \"lastAccessed\": \"*\"\n }\n ]\n}\n}]}"
(df <- jsonlite::fromJSON(json_data, simplifyDataFrame = TRUE, flatten = TRUE))
#> $results
#> personUuid synopsis.fullName synopsis.headline synopsis.location
#> 1 *** *** *** ***
#> synopsis.image synopsis.skills synopsis.phoneNumbers
#> 1 *** *, *, *, *., * ***, ***
#> synopsis.emailAddresses
#> 1 ***
#> synopsis.networks
#> 1 linkedin, ***, *, ***, ***, ***, canonicalUrl, cvUrl, cvUrl, NA, *, *
df$results %>%
select(headline = synopsis.headline, emails = synopsis.emailAddresses)
#> headline emails
#> 1 *** ***
更新 1: 要从 URL 中读取 json,您只需使用 fromJSON 函数,将字符串与您的 json 数据一起传递 url:
library(jsonlite)
url <- 'http://you.url.com/data.json'
# in this case we pass an URL to the fromJSON function instead of the actual content we want to parse
fromJSON(url, flatten=TRUE)$results[c("synopsis.headline", "synopsis.emailAddresses")]
// end UPDATE 1
您还可以将 flatten 参数传递给 fromJSON,然后使用 'results' 数据框。
fromJSON(json.data, flatten=TRUE)$results[c("synopsis.headline",
"synopsis.emailAddresses")]
synopsis.headline synopsis.emailAddresses
1 *** jane.doe@boo.com
2 *** john.doe@foo.com
这是我定义 json.data 的方式,请注意我有意向您的示例输入中添加了 1 条记录 json。
json.data <- '{
"results":[
{
"personUuid":"***",
"synopsis":{
"fullName":"***",
"headline":"***",
"location":"***",
"image":"***",
"skills":[
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers":[
"***",
"***"
],
"emailAddresses":[
"jane.doe@boo.com"
],
"networks":[
{
"name":"linkedin",
"url":"***",
"type":"canonicalUrl",
"lastAccessed":null
},
{
"name":"***",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
},
{
"name":"*",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
}
]
}
},
{
"personUuid":"***",
"synopsis":{
"fullName":"***",
"headline":"***",
"location":"***",
"image":"***",
"skills":[
"*",
"*",
"*",
"*.",
"*"
],
"phoneNumbers":[
"***",
"***"
],
"emailAddresses":[
"john.doe@foo.com"
],
"networks":[
{
"name":"linkedin",
"url":"***",
"type":"canonicalUrl",
"lastAccessed":null
},
{
"name":"***",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
},
{
"name":"*",
"url":"***",
"type":"cvUrl",
"lastAccessed":"*"
}
]
}
}
]
}'