使用花括号取消嵌套 JSON

Unnesting JSON with curly braces

我正在尝试取消嵌套存储在花括号中的一些 JSON 数据。从原始 txt 文件中,我可以使用 jsonlite 将数据集展平,如下所示:

library(jsonlite)
library(tidyverse)

data_full <-read_lines('mytxtfile.txt')
data_questions <- fromJSON(data_full[1], flatten = TRUE) ##the questions are stored in line 1 of 6
data_questions_simple <- data_questions[, c("test_part","responses")]

这会生成一个数据集,其中我实际需要的数据仍嵌套在花括号中,如下所示:

data_questions_simple$responses

[1] NA                                                                                                                                                                           
[2] "{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}"                                                                                
[3] "{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}"                                                                                                  
[4] "{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}"                                               
[5] "{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}"                                                                                         
[6] "{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}"

我无法取消嵌套在这些花括号中的数据 - 有没有什么方法可以将 test_part 标签保留在问题响应旁边的列中?或者是否需要将每一行分成更多的数据框以确保问题与适当的测试部分相关联?

原始数据如下:

[{"rt":7988.9000000059605,"stimulus":"<p>Click the button below to start the task.</p>","response":0,"trial_type":"html-button-response","trial_index":0,"time_elapsed":7991,"internal_node_id":"0.0-0.0"},{"rt":77266.30000001192,"responses":"{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9]","test_part":"1","trial_type":"survey-likert-tabulate","trial_index":1,"time_elapsed":85264,"internal_node_id":"0.0-1.0-0.0"},{"rt":53696.5,"responses":"{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7]","test_part":"2","trial_type":"survey-likert-tabulate","trial_index":2,"time_elapsed":138966,"internal_node_id":"0.0-1.0-1.0"},{"rt":73241.90000000596,"responses":"{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12]","test_part":"3","trial_type":"survey-likert-tabulate","trial_index":3,"time_elapsed":212215,"internal_node_id":"0.0-1.0-2.0"},{"rt":52712.59999999404,"responses":"{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8]","test_part":"4","trial_type":"survey-likert-tabulate","trial_index":4,"time_elapsed":264933,"internal_node_id":"0.0-1.0-3.0"},{"rt":85120.5,"responses":"{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]","test_part":"5","trial_type":"survey-likert-tabulate","trial_index":5,"time_elapsed":350062,"internal_node_id":"0.0-1.0-4.0"}]

非常感谢任何帮助!

您可以再次使用fromJSON函数解析嵌套在花括号中的数据并将结果存储在列表中。例如,像这样:

require(tidyverse)
require(jsonlite)

path_to_json <- "../Downloads/test_json.json"

json_string <- readr::read_lines(path_to_json)
data_parsed <- jsonlite::fromJSON(json_string, simplifyVector = FALSE)

data_responses <- map(data_parsed, function(i) {
    r <- i$responses
    if (!is.null(r) && !is.na(r)) {
        r <- jsonlite::fromJSON(r, simplifyVector = FALSE)
    }
    r
})

test_parts <- map(data_parsed, function(i) {
    t <- i$test_part
    if (is.null(t) || is.na(t)) {
        t <- NA_character_
    }
    t
})

names(data_responses) <- test_parts

data_responses

需要检查 NULL/NA 来处理已解析 JSON 的第一个元素,该元素不包含名称为“responses”或“test_part”的字段。