使用 RJSONIO 包 R 将 json 文件转换为数据帧
Convert json file to dataframe using RJSONIO package R
我是 R 的新手,在将 json 文件转换为数据帧时遇到问题。我有 json 文件,如下所示:
json_file = '[{"id": "abc", "model": "honda", "date": "20190604", "cols": {"action": 15, "values": 18, "not": 29}},
{"id": "abc", "model": "honda", "date": "20190604", "cols": {"hello": 14, "hi": 85, "wow": 14}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"docs": 25, "ok": 87, "none": 42}}]'
我想将上面的 json 文件转换为以下格式的数据帧:
预期结果
df =
id model date cols values_cols
abc honda 20190604 action 15
abc honda 20190604 values 18
abc honda 20190604 not 29
abc honda 20190604 hello 14
abc honda 20190604 hi 85
abc honda 20190604 wow 14
mno ford 20190604 yesterday 21
mno ford 20190604 today 21
mno ford 20190604 tomorrow 29
mno ford 20190604 docs 25
mno ford 20190604 ok 87
我的解决方案
require(RJSONIO)
df = fromJSON(json_file)
我的结果
id model date cols id.1 model.1 date.1 cols.1 id.2 model.2 date.2 cols.2 id.3 model.3 date.3 cols.3
action abc honda 20190604 15 abc honda 20190604 14 mno ford 20190604 21 mno ford 20190604 25
values abc honda 20190604 18 abc honda 20190604 85 mno ford 20190604 21 mno ford 20190604 87
not abc honda 20190604 29 abc honda 20190604 14 mno ford 20190604 29 mno ford 20190604 42
这是不正确的,因为它采用了应该作为列名的索引。
这是你想要的吗?
> library(jsonlite)
> library(tidyverse)
>
> json_file = '[
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"action": 15, "values": 18, "not": 29}},
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"hello": 14, "hi": 85, "wow": 14}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"docs": 25, "ok": 87, "none": 42}}]'
>
> df <- fromJSON(json_file)
>
> # need to convert the internal dataframes in 'cols' to vectors
> x <- map_df(seq_along(df),
+ ~tibble(id = df$id[.x],
+ model = df$model[.x],
+ date = df$date[.x],
+ cols = names(df$cols),
+ values = sapply(df$cols, '[', .x)
+ )
+ )
>
> ## remove NAs
> x[complete.cases(x), ]
# A tibble: 12 x 5
id model date cols values
<chr> <chr> <chr> <chr> <int>
1 abc honda 20190604 action 15
2 abc honda 20190604 values 18
3 abc honda 20190604 not 29
4 abc honda 20190604 hello 14
5 abc honda 20190604 hi 85
6 abc honda 20190604 wow 14
7 mno ford 20190604 yesterday 21
8 mno ford 20190604 today 21
9 mno ford 20190604 tomorrow 29
10 mno ford 20190604 docs 25
11 mno ford 20190604 ok 87
12 mno ford 20190604 none 42
>
我是 R 的新手,在将 json 文件转换为数据帧时遇到问题。我有 json 文件,如下所示:
json_file = '[{"id": "abc", "model": "honda", "date": "20190604", "cols": {"action": 15, "values": 18, "not": 29}},
{"id": "abc", "model": "honda", "date": "20190604", "cols": {"hello": 14, "hi": 85, "wow": 14}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"docs": 25, "ok": 87, "none": 42}}]'
我想将上面的 json 文件转换为以下格式的数据帧:
预期结果
df =
id model date cols values_cols
abc honda 20190604 action 15
abc honda 20190604 values 18
abc honda 20190604 not 29
abc honda 20190604 hello 14
abc honda 20190604 hi 85
abc honda 20190604 wow 14
mno ford 20190604 yesterday 21
mno ford 20190604 today 21
mno ford 20190604 tomorrow 29
mno ford 20190604 docs 25
mno ford 20190604 ok 87
我的解决方案
require(RJSONIO)
df = fromJSON(json_file)
我的结果
id model date cols id.1 model.1 date.1 cols.1 id.2 model.2 date.2 cols.2 id.3 model.3 date.3 cols.3
action abc honda 20190604 15 abc honda 20190604 14 mno ford 20190604 21 mno ford 20190604 25
values abc honda 20190604 18 abc honda 20190604 85 mno ford 20190604 21 mno ford 20190604 87
not abc honda 20190604 29 abc honda 20190604 14 mno ford 20190604 29 mno ford 20190604 42
这是不正确的,因为它采用了应该作为列名的索引。
这是你想要的吗?
> library(jsonlite)
> library(tidyverse)
>
> json_file = '[
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"action": 15, "values": 18, "not": 29}},
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"hello": 14, "hi": 85, "wow": 14}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"docs": 25, "ok": 87, "none": 42}}]'
>
> df <- fromJSON(json_file)
>
> # need to convert the internal dataframes in 'cols' to vectors
> x <- map_df(seq_along(df),
+ ~tibble(id = df$id[.x],
+ model = df$model[.x],
+ date = df$date[.x],
+ cols = names(df$cols),
+ values = sapply(df$cols, '[', .x)
+ )
+ )
>
> ## remove NAs
> x[complete.cases(x), ]
# A tibble: 12 x 5
id model date cols values
<chr> <chr> <chr> <chr> <int>
1 abc honda 20190604 action 15
2 abc honda 20190604 values 18
3 abc honda 20190604 not 29
4 abc honda 20190604 hello 14
5 abc honda 20190604 hi 85
6 abc honda 20190604 wow 14
7 mno ford 20190604 yesterday 21
8 mno ford 20190604 today 21
9 mno ford 20190604 tomorrow 29
10 mno ford 20190604 docs 25
11 mno ford 20190604 ok 87
12 mno ford 20190604 none 42
>