使用花括号取消嵌套 JSON
Unnesting JSON with curly braces
我正在尝试取消嵌套存储在花括号中的一些 JSON 数据。从原始 txt 文件中,我可以使用 jsonlite 将数据集展平,如下所示:
library(jsonlite)
library(tidyverse)
data_full <-read_lines('mytxtfile.txt')
data_questions <- fromJSON(data_full[1], flatten = TRUE) ##the questions are stored in line 1 of 6
data_questions_simple <- data_questions[, c("test_part","responses")]
这会生成一个数据集,其中我实际需要的数据仍嵌套在花括号中,如下所示:
data_questions_simple$responses
[1] NA
[2] "{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}"
[3] "{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}"
[4] "{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}"
[5] "{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}"
[6] "{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}"
我无法取消嵌套在这些花括号中的数据 - 有没有什么方法可以将 test_part 标签保留在问题响应旁边的列中?或者是否需要将每一行分成更多的数据框以确保问题与适当的测试部分相关联?
原始数据如下:
[{"rt":7988.9000000059605,"stimulus":"<p>Click the button below to start the task.</p>","response":0,"trial_type":"html-button-response","trial_index":0,"time_elapsed":7991,"internal_node_id":"0.0-0.0"},{"rt":77266.30000001192,"responses":"{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9]","test_part":"1","trial_type":"survey-likert-tabulate","trial_index":1,"time_elapsed":85264,"internal_node_id":"0.0-1.0-0.0"},{"rt":53696.5,"responses":"{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7]","test_part":"2","trial_type":"survey-likert-tabulate","trial_index":2,"time_elapsed":138966,"internal_node_id":"0.0-1.0-1.0"},{"rt":73241.90000000596,"responses":"{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12]","test_part":"3","trial_type":"survey-likert-tabulate","trial_index":3,"time_elapsed":212215,"internal_node_id":"0.0-1.0-2.0"},{"rt":52712.59999999404,"responses":"{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8]","test_part":"4","trial_type":"survey-likert-tabulate","trial_index":4,"time_elapsed":264933,"internal_node_id":"0.0-1.0-3.0"},{"rt":85120.5,"responses":"{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]","test_part":"5","trial_type":"survey-likert-tabulate","trial_index":5,"time_elapsed":350062,"internal_node_id":"0.0-1.0-4.0"}]
非常感谢任何帮助!
您可以再次使用fromJSON
函数解析嵌套在花括号中的数据并将结果存储在列表中。例如,像这样:
require(tidyverse)
require(jsonlite)
path_to_json <- "../Downloads/test_json.json"
json_string <- readr::read_lines(path_to_json)
data_parsed <- jsonlite::fromJSON(json_string, simplifyVector = FALSE)
data_responses <- map(data_parsed, function(i) {
r <- i$responses
if (!is.null(r) && !is.na(r)) {
r <- jsonlite::fromJSON(r, simplifyVector = FALSE)
}
r
})
test_parts <- map(data_parsed, function(i) {
t <- i$test_part
if (is.null(t) || is.na(t)) {
t <- NA_character_
}
t
})
names(data_responses) <- test_parts
data_responses
需要检查 NULL/NA 来处理已解析 JSON 的第一个元素,该元素不包含名称为“responses”或“test_part”的字段。
我正在尝试取消嵌套存储在花括号中的一些 JSON 数据。从原始 txt 文件中,我可以使用 jsonlite 将数据集展平,如下所示:
library(jsonlite)
library(tidyverse)
data_full <-read_lines('mytxtfile.txt')
data_questions <- fromJSON(data_full[1], flatten = TRUE) ##the questions are stored in line 1 of 6
data_questions_simple <- data_questions[, c("test_part","responses")]
这会生成一个数据集,其中我实际需要的数据仍嵌套在花括号中,如下所示:
data_questions_simple$responses
[1] NA
[2] "{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}"
[3] "{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}"
[4] "{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}"
[5] "{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}"
[6] "{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}"
我无法取消嵌套在这些花括号中的数据 - 有没有什么方法可以将 test_part 标签保留在问题响应旁边的列中?或者是否需要将每一行分成更多的数据框以确保问题与适当的测试部分相关联?
原始数据如下:
[{"rt":7988.9000000059605,"stimulus":"<p>Click the button below to start the task.</p>","response":0,"trial_type":"html-button-response","trial_index":0,"time_elapsed":7991,"internal_node_id":"0.0-0.0"},{"rt":77266.30000001192,"responses":"{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9]","test_part":"1","trial_type":"survey-likert-tabulate","trial_index":1,"time_elapsed":85264,"internal_node_id":"0.0-1.0-0.0"},{"rt":53696.5,"responses":"{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7]","test_part":"2","trial_type":"survey-likert-tabulate","trial_index":2,"time_elapsed":138966,"internal_node_id":"0.0-1.0-1.0"},{"rt":73241.90000000596,"responses":"{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12]","test_part":"3","trial_type":"survey-likert-tabulate","trial_index":3,"time_elapsed":212215,"internal_node_id":"0.0-1.0-2.0"},{"rt":52712.59999999404,"responses":"{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8]","test_part":"4","trial_type":"survey-likert-tabulate","trial_index":4,"time_elapsed":264933,"internal_node_id":"0.0-1.0-3.0"},{"rt":85120.5,"responses":"{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]","test_part":"5","trial_type":"survey-likert-tabulate","trial_index":5,"time_elapsed":350062,"internal_node_id":"0.0-1.0-4.0"}]
非常感谢任何帮助!
您可以再次使用fromJSON
函数解析嵌套在花括号中的数据并将结果存储在列表中。例如,像这样:
require(tidyverse)
require(jsonlite)
path_to_json <- "../Downloads/test_json.json"
json_string <- readr::read_lines(path_to_json)
data_parsed <- jsonlite::fromJSON(json_string, simplifyVector = FALSE)
data_responses <- map(data_parsed, function(i) {
r <- i$responses
if (!is.null(r) && !is.na(r)) {
r <- jsonlite::fromJSON(r, simplifyVector = FALSE)
}
r
})
test_parts <- map(data_parsed, function(i) {
t <- i$test_part
if (is.null(t) || is.na(t)) {
t <- NA_character_
}
t
})
names(data_responses) <- test_parts
data_responses
需要检查 NULL/NA 来处理已解析 JSON 的第一个元素,该元素不包含名称为“responses”或“test_part”的字段。