在 R 中构建 JSON 数据
structuring JSON data in R
我是 JSON 数据的新手,在尝试将我的数据放入 R 中数据帧通用的组合数据帧时遇到了一些麻烦。这是 [=22= 的示例] 数据:
{
"id": "rub_al_khali",
"conversion_px": 0.0395882818685669,
"n_surfaces": 4,
"lithic_contours": [
{
"surface_id": 0,
"classification": "Ventral",
"total_area_px": 530565.5,
"total_area": 831.5,
"max_breadth": 22.4,
"max_length": 54,
"polygon_count": 7,
"scar_count": 0,
"percentage_detected_scars": 0,
"scar_contours": []
},
{
"surface_id": 1,
"classification": "Dorsal",
"total_area_px": 530503.5,
"total_area": 831.4,
"max_breadth": 22.4,
"max_length": 54,
"polygon_count": 7,
"scar_count": 4,
"percentage_detected_scars": 0.62,
"scar_contours": [
{
"scar_id": 0,
"total_area_px": 129337,
"total_area": 202.7,
"max_breadth": 10.3,
"max_length": 41.7,
"percentage_of_surface": 0.24,
"scar_angle": 1.85,
"polygon_count": 5
},
{
"scar_id": 1,
"total_area_px": 100130,
"total_area": 156.9,
"max_breadth": 7.2,
"max_length": 43,
"percentage_of_surface": 0.19,
"scar_angle": 357.36,
"polygon_count": 4
},
{
"scar_id": 2,
"total_area_px": 93162,
"total_area": 146,
"max_breadth": 6.5,
"max_length": 41.4,
"percentage_of_surface": 0.18,
"scar_angle": 5.01,
"polygon_count": 4
},
{
"scar_id": 3,
"total_area_px": 6148.5,
"total_area": 9.6,
"max_breadth": 4,
"max_length": 7.1,
"percentage_of_surface": 0.01,
"scar_angle": "NaN",
"polygon_count": 9
}
]
},
{
"surface_id": 2,
"classification": "Lateral",
"total_area_px": 176204,
"total_area": 276.2,
"max_breadth": 8.6,
"max_length": 54.2,
"polygon_count": 3,
"scar_count": 2,
"percentage_detected_scars": 0.33,
"scar_contours": [
{
"scar_id": 0,
"total_area_px": 44605,
"total_area": 69.9,
"max_breadth": 5,
"max_length": 50,
"percentage_of_surface": 0.25,
"scar_angle": "NaN",
"polygon_count": 3
},
{
"scar_id": 1,
"total_area_px": 12877,
"total_area": 20.2,
"max_breadth": 1.5,
"max_length": 22.3,
"percentage_of_surface": 0.07,
"scar_angle": "NaN",
"polygon_count": 2
}
]
},
{
"surface_id": 3,
"classification": "Platform",
"total_area_px": 55252.5,
"total_area": 86.6,
"max_breadth": 20.3,
"max_length": 6.6,
"polygon_count": 5,
"scar_count": 1,
"percentage_detected_scars": 0.42,
"scar_contours": [
{
"scar_id": 0,
"total_area_px": 23298.5,
"total_area": 36.5,
"max_breadth": 15,
"max_length": 4.1,
"percentage_of_surface": 0.42,
"scar_angle": "NaN",
"polygon_count": 4
}
]
}
]
}
到目前为止,我已经使用 jsonlite 通过 flatten = TRUE
导入到 R
library(jsonlite)
dta <- fromJSON("~/rub_al_khali.json", flatten = TRUE)
虽然这让我半途而废,但它并不是真正的 combined/comprehensive data.frame
。我认为可能是 dta$lithic_contours 造成了这个问题。非常感谢任何帮助
jsonlite::fromJSON()
returns 一个列表,但是元素 lithic_contours
包含一个 data.frame。只需对列表进行子集化即可获得 data.frame:
# Subset the list on lithic_contours with $ ...
df <- jsonlite::fromJSON(<file>, flatten = TRUE)$lithic_contours
# ... and it's already a data.frame
class(df)
#> [1] "data.frame"
# Turning into a tibble for better printing
tibble::as_tibble(df)
#> # A tibble: 4 × 10
#> surface_id classification total_area_px total_area max_breadth max_length
#> <int> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 0 Ventral 530566. 832. 22.4 54
#> 2 1 Dorsal 530504. 831. 22.4 54
#> 3 2 Lateral 176204 276. 8.6 54.2
#> 4 3 Platform 55252. 86.6 20.3 6.6
#> # … with 4 more variables: polygon_count <int>, scar_count <int>,
#> # percentage_detected_scars <dbl>, scar_contours <list>
由 reprex package (v2.0.1)
于 2022-04-04 创建
更新:取消嵌套列表列
数据框的 scar_contours
列是一个列表列。这实际上通常是一种非常方便分析的格式,但如果你想删除它,你可以使用函数 tidyr::unnest()
:
library(tidyr)
df %>% unnest(scar_contours, names_repair = "minimal")
#> # A tibble: 7 × 17
#> surface_id classification total_area_px total_area max_breadth max_length
#> <int> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 Dorsal 530504. 831. 22.4 54
#> 2 1 Dorsal 530504. 831. 22.4 54
#> 3 1 Dorsal 530504. 831. 22.4 54
#> 4 1 Dorsal 530504. 831. 22.4 54
#> 5 2 Lateral 176204 276. 8.6 54.2
#> 6 2 Lateral 176204 276. 8.6 54.2
#> 7 3 Platform 55252. 86.6 20.3 6.6
#> # … with 11 more variables: polygon_count <int>, scar_count <int>,
#> # percentage_detected_scars <dbl>, scar_id <int>, total_area_px <dbl>,
#> # total_area <dbl>, max_breadth <dbl>, max_length <dbl>,
#> # percentage_of_surface <dbl>, scar_angle <dbl>, polygon_count <int>
我是 JSON 数据的新手,在尝试将我的数据放入 R 中数据帧通用的组合数据帧时遇到了一些麻烦。这是 [=22= 的示例] 数据:
{
"id": "rub_al_khali",
"conversion_px": 0.0395882818685669,
"n_surfaces": 4,
"lithic_contours": [
{
"surface_id": 0,
"classification": "Ventral",
"total_area_px": 530565.5,
"total_area": 831.5,
"max_breadth": 22.4,
"max_length": 54,
"polygon_count": 7,
"scar_count": 0,
"percentage_detected_scars": 0,
"scar_contours": []
},
{
"surface_id": 1,
"classification": "Dorsal",
"total_area_px": 530503.5,
"total_area": 831.4,
"max_breadth": 22.4,
"max_length": 54,
"polygon_count": 7,
"scar_count": 4,
"percentage_detected_scars": 0.62,
"scar_contours": [
{
"scar_id": 0,
"total_area_px": 129337,
"total_area": 202.7,
"max_breadth": 10.3,
"max_length": 41.7,
"percentage_of_surface": 0.24,
"scar_angle": 1.85,
"polygon_count": 5
},
{
"scar_id": 1,
"total_area_px": 100130,
"total_area": 156.9,
"max_breadth": 7.2,
"max_length": 43,
"percentage_of_surface": 0.19,
"scar_angle": 357.36,
"polygon_count": 4
},
{
"scar_id": 2,
"total_area_px": 93162,
"total_area": 146,
"max_breadth": 6.5,
"max_length": 41.4,
"percentage_of_surface": 0.18,
"scar_angle": 5.01,
"polygon_count": 4
},
{
"scar_id": 3,
"total_area_px": 6148.5,
"total_area": 9.6,
"max_breadth": 4,
"max_length": 7.1,
"percentage_of_surface": 0.01,
"scar_angle": "NaN",
"polygon_count": 9
}
]
},
{
"surface_id": 2,
"classification": "Lateral",
"total_area_px": 176204,
"total_area": 276.2,
"max_breadth": 8.6,
"max_length": 54.2,
"polygon_count": 3,
"scar_count": 2,
"percentage_detected_scars": 0.33,
"scar_contours": [
{
"scar_id": 0,
"total_area_px": 44605,
"total_area": 69.9,
"max_breadth": 5,
"max_length": 50,
"percentage_of_surface": 0.25,
"scar_angle": "NaN",
"polygon_count": 3
},
{
"scar_id": 1,
"total_area_px": 12877,
"total_area": 20.2,
"max_breadth": 1.5,
"max_length": 22.3,
"percentage_of_surface": 0.07,
"scar_angle": "NaN",
"polygon_count": 2
}
]
},
{
"surface_id": 3,
"classification": "Platform",
"total_area_px": 55252.5,
"total_area": 86.6,
"max_breadth": 20.3,
"max_length": 6.6,
"polygon_count": 5,
"scar_count": 1,
"percentage_detected_scars": 0.42,
"scar_contours": [
{
"scar_id": 0,
"total_area_px": 23298.5,
"total_area": 36.5,
"max_breadth": 15,
"max_length": 4.1,
"percentage_of_surface": 0.42,
"scar_angle": "NaN",
"polygon_count": 4
}
]
}
]
}
到目前为止,我已经使用 jsonlite 通过 flatten = TRUE
导入到 Rlibrary(jsonlite)
dta <- fromJSON("~/rub_al_khali.json", flatten = TRUE)
虽然这让我半途而废,但它并不是真正的 combined/comprehensive data.frame
。我认为可能是 dta$lithic_contours 造成了这个问题。非常感谢任何帮助
jsonlite::fromJSON()
returns 一个列表,但是元素 lithic_contours
包含一个 data.frame。只需对列表进行子集化即可获得 data.frame:
# Subset the list on lithic_contours with $ ...
df <- jsonlite::fromJSON(<file>, flatten = TRUE)$lithic_contours
# ... and it's already a data.frame
class(df)
#> [1] "data.frame"
# Turning into a tibble for better printing
tibble::as_tibble(df)
#> # A tibble: 4 × 10
#> surface_id classification total_area_px total_area max_breadth max_length
#> <int> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 0 Ventral 530566. 832. 22.4 54
#> 2 1 Dorsal 530504. 831. 22.4 54
#> 3 2 Lateral 176204 276. 8.6 54.2
#> 4 3 Platform 55252. 86.6 20.3 6.6
#> # … with 4 more variables: polygon_count <int>, scar_count <int>,
#> # percentage_detected_scars <dbl>, scar_contours <list>
由 reprex package (v2.0.1)
于 2022-04-04 创建更新:取消嵌套列表列
数据框的 scar_contours
列是一个列表列。这实际上通常是一种非常方便分析的格式,但如果你想删除它,你可以使用函数 tidyr::unnest()
:
library(tidyr)
df %>% unnest(scar_contours, names_repair = "minimal")
#> # A tibble: 7 × 17
#> surface_id classification total_area_px total_area max_breadth max_length
#> <int> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 Dorsal 530504. 831. 22.4 54
#> 2 1 Dorsal 530504. 831. 22.4 54
#> 3 1 Dorsal 530504. 831. 22.4 54
#> 4 1 Dorsal 530504. 831. 22.4 54
#> 5 2 Lateral 176204 276. 8.6 54.2
#> 6 2 Lateral 176204 276. 8.6 54.2
#> 7 3 Platform 55252. 86.6 20.3 6.6
#> # … with 11 more variables: polygon_count <int>, scar_count <int>,
#> # percentage_detected_scars <dbl>, scar_id <int>, total_area_px <dbl>,
#> # total_area <dbl>, max_breadth <dbl>, max_length <dbl>,
#> # percentage_of_surface <dbl>, scar_angle <dbl>, polygon_count <int>