如何扁平化reddit评论数据结构?
How to flatten the reddit comment data structure?
我正在尝试使用 normalise
api 来展平 reddit 评论数据结构。
例如下面的调用:
https://www.reddit.com/r/reactjs/comments/506gca/what_backend_and_db_are_you_using_with_react/.json
会给你嵌套结构。如下所示:
[{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": {
"kind": "Listing",
"data": {
"modhash": "",
"children": [
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": {
"kind": "Listing",
"data": {
"modhash": "",
"children": [
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": {
"kind": "Listing",
"data": {
"modhash": "",
"children": [
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": "",
"user_reports": [],
"saved": false,
"id": "d724wpm",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "bbennett36",
"parent_id": "t1_d724qxh",
"score": 7,
"approved_by": null,
"controversiality": 0,
"body": "I don't think everyone here is only doing front-end lol",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>I don&#39;t think everyone here is only doing front-end lol</p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d724wpm",
"score_hidden": false,
"stickied": false,
"created": 1472554709,
"author_flair_text": null,
"created_utc": 1472525909,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 7
}
}
],
"after": null,
"before": null
}
},
"user_reports": [],
"saved": false,
"id": "d724qxh",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "nicholasxuu",
"parent_id": "t1_d71oyfk",
"score": -10,
"approved_by": null,
"controversiality": 0,
"body": "why even bother asking on this sub? asking front-end experts what backend stack they like.",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>why even bother asking on this sub? asking front-end experts what backend stack they like.</p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d724qxh",
"score_hidden": false,
"stickied": false,
"created": 1472554467,
"author_flair_text": null,
"created_utc": 1472525667,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": -10
}
}
],
"after": null,
"before": null
}
},
"user_reports": [],
"saved": false,
"id": "d71oyfk",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "bbennett36",
"parent_id": "t1_d71nyho",
"score": 5,
"approved_by": null,
"controversiality": 0,
"body": "I just want to see what stacks people are using out there.",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>I just want to see what stacks people are using out there.</p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d71oyfk",
"score_hidden": false,
"stickied": false,
"created": 1472531897,
"author_flair_text": null,
"created_utc": 1472503097,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 5
}
},
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": "",
"user_reports": [],
"saved": false,
"id": "d720vlw",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "zorlan",
"parent_id": "t1_d71nyho",
"score": 1,
"approved_by": null,
"controversiality": 0,
"body": "This. The database should be so far removed that it doesn't affect React one iota. I would even go so far as to say this post is off topic. ",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>This. The database should be so far removed that it doesn&#39;t affect React one iota. I would even go so far as to say this post is off topic. </p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d720vlw",
"score_hidden": false,
"stickied": false,
"created": 1472548717,
"author_flair_text": null,
"created_utc": 1472519917,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 1
}
}
],
"after": null,
"before": null
}
},
"user_reports": [],
"saved": false,
"id": "d71nyho",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "shanestillwell",
"parent_id": "t3_506gca",
"score": 6,
"approved_by": null,
"controversiality": 0,
"body": "Why does it matter? React should be a few layers removed from any database and it's quite agnostic about the backend. Heck, React itself doesn't even know how to talk to the backend. ",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>Why does it matter? React should be a few layers removed from any database and it&#39;s quite agnostic about the backend. Heck, React itself doesn&#39;t even know how to talk to the backend. </p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d71nyho",
"score_hidden": false,
"stickied": false,
"created": 1472530637,
"author_flair_text": null,
"created_utc": 1472501837,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 6
}
}, {...},{...}]
准确地说,data.children
数组有 replies
个键,它又可以有 data
,它又可以有 replies
等等。
我想看看如何展平这个递归结构;在更新 replies
时,在 reducer 中正确导航结构变得越来越困难。 (在显示时,它很容易,因为你可以在飞行中导航)
任何帮助都会很棒!
我不熟悉 reddit API,但是这样的东西怎么样?
// this should be handled more explicitly
function normalize( input ) {
if( input.data ) return normalize( input.data );
if( input.children ) return normalize( input.children );
return input;
}
function build( input ) {
const result = {};
if( input.replies ) {
input.replies = normalize( input.replies ).map( ( data ) => {
const reply = normalize( data );
Object.assign( result, build( reply ) );
return reply.id;
} );
}
result[ input.id ] = input;
return result;
}
如果您的示例存储在 x
中,build( normalize( x[0] ) );
应该给您:
{
"d724wpm": {
// ...
"replies": "",
// ...
"id": "d724wpm",
// ...
"body": "I don't think everyone here is only doing front-end lol"
// ...
},
"d724qxh": {
// ...
"replies": [
"d724wpm"
],
// ...
"id": "d724qxh",
// ...
"body": "why even bother asking on this sub? asking front-end experts what backend stack they like."
// ...
},
"d71oyfk": {
// ...
"replies": [
"d724qxh"
],
// ...
"id": "d71oyfk",
// ...
"body": "I just want to see what stacks people are using out there."
// ...
},
"d720vlw": {
// ...
"replies": "",
// ...
"id": "d720vlw",
// ...
"body": "This. The database should be so far removed that it doesn't affect React one iota. I would even go so far as to say this post is off topic. "
// ...
},
"d71nyho": {
// ...
"replies": [
"d71oyfk",
"d720vlw"
],
// ...
"id": "d71nyho",
// ...
"body": "Why does it matter? React should be a few layers removed from any database and it's quite agnostic about the backend. Heck, React itself doesn't even know how to talk to the backend. "
// ...
}
}
我正在尝试使用 normalise
api 来展平 reddit 评论数据结构。
例如下面的调用:
https://www.reddit.com/r/reactjs/comments/506gca/what_backend_and_db_are_you_using_with_react/.json
会给你嵌套结构。如下所示:
[{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": {
"kind": "Listing",
"data": {
"modhash": "",
"children": [
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": {
"kind": "Listing",
"data": {
"modhash": "",
"children": [
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": {
"kind": "Listing",
"data": {
"modhash": "",
"children": [
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": "",
"user_reports": [],
"saved": false,
"id": "d724wpm",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "bbennett36",
"parent_id": "t1_d724qxh",
"score": 7,
"approved_by": null,
"controversiality": 0,
"body": "I don't think everyone here is only doing front-end lol",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>I don&#39;t think everyone here is only doing front-end lol</p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d724wpm",
"score_hidden": false,
"stickied": false,
"created": 1472554709,
"author_flair_text": null,
"created_utc": 1472525909,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 7
}
}
],
"after": null,
"before": null
}
},
"user_reports": [],
"saved": false,
"id": "d724qxh",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "nicholasxuu",
"parent_id": "t1_d71oyfk",
"score": -10,
"approved_by": null,
"controversiality": 0,
"body": "why even bother asking on this sub? asking front-end experts what backend stack they like.",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>why even bother asking on this sub? asking front-end experts what backend stack they like.</p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d724qxh",
"score_hidden": false,
"stickied": false,
"created": 1472554467,
"author_flair_text": null,
"created_utc": 1472525667,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": -10
}
}
],
"after": null,
"before": null
}
},
"user_reports": [],
"saved": false,
"id": "d71oyfk",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "bbennett36",
"parent_id": "t1_d71nyho",
"score": 5,
"approved_by": null,
"controversiality": 0,
"body": "I just want to see what stacks people are using out there.",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>I just want to see what stacks people are using out there.</p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d71oyfk",
"score_hidden": false,
"stickied": false,
"created": 1472531897,
"author_flair_text": null,
"created_utc": 1472503097,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 5
}
},
{
"kind": "t1",
"data": {
"subreddit_id": "t5_2zldd",
"banned_by": null,
"removal_reason": null,
"link_id": "t3_506gca",
"likes": null,
"replies": "",
"user_reports": [],
"saved": false,
"id": "d720vlw",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "zorlan",
"parent_id": "t1_d71nyho",
"score": 1,
"approved_by": null,
"controversiality": 0,
"body": "This. The database should be so far removed that it doesn't affect React one iota. I would even go so far as to say this post is off topic. ",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>This. The database should be so far removed that it doesn&#39;t affect React one iota. I would even go so far as to say this post is off topic. </p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d720vlw",
"score_hidden": false,
"stickied": false,
"created": 1472548717,
"author_flair_text": null,
"created_utc": 1472519917,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 1
}
}
],
"after": null,
"before": null
}
},
"user_reports": [],
"saved": false,
"id": "d71nyho",
"gilded": 0,
"archived": false,
"report_reasons": null,
"author": "shanestillwell",
"parent_id": "t3_506gca",
"score": 6,
"approved_by": null,
"controversiality": 0,
"body": "Why does it matter? React should be a few layers removed from any database and it's quite agnostic about the backend. Heck, React itself doesn't even know how to talk to the backend. ",
"edited": false,
"author_flair_css_class": null,
"downs": 0,
"body_html": "<div class=\"md\"><p>Why does it matter? React should be a few layers removed from any database and it&#39;s quite agnostic about the backend. Heck, React itself doesn&#39;t even know how to talk to the backend. </p>\n</div>",
"subreddit": "reactjs",
"name": "t1_d71nyho",
"score_hidden": false,
"stickied": false,
"created": 1472530637,
"author_flair_text": null,
"created_utc": 1472501837,
"distinguished": null,
"mod_reports": [],
"num_reports": null,
"ups": 6
}
}, {...},{...}]
准确地说,data.children
数组有 replies
个键,它又可以有 data
,它又可以有 replies
等等。
我想看看如何展平这个递归结构;在更新 replies
时,在 reducer 中正确导航结构变得越来越困难。 (在显示时,它很容易,因为你可以在飞行中导航)
任何帮助都会很棒!
我不熟悉 reddit API,但是这样的东西怎么样?
// this should be handled more explicitly
function normalize( input ) {
if( input.data ) return normalize( input.data );
if( input.children ) return normalize( input.children );
return input;
}
function build( input ) {
const result = {};
if( input.replies ) {
input.replies = normalize( input.replies ).map( ( data ) => {
const reply = normalize( data );
Object.assign( result, build( reply ) );
return reply.id;
} );
}
result[ input.id ] = input;
return result;
}
如果您的示例存储在 x
中,build( normalize( x[0] ) );
应该给您:
{
"d724wpm": {
// ...
"replies": "",
// ...
"id": "d724wpm",
// ...
"body": "I don't think everyone here is only doing front-end lol"
// ...
},
"d724qxh": {
// ...
"replies": [
"d724wpm"
],
// ...
"id": "d724qxh",
// ...
"body": "why even bother asking on this sub? asking front-end experts what backend stack they like."
// ...
},
"d71oyfk": {
// ...
"replies": [
"d724qxh"
],
// ...
"id": "d71oyfk",
// ...
"body": "I just want to see what stacks people are using out there."
// ...
},
"d720vlw": {
// ...
"replies": "",
// ...
"id": "d720vlw",
// ...
"body": "This. The database should be so far removed that it doesn't affect React one iota. I would even go so far as to say this post is off topic. "
// ...
},
"d71nyho": {
// ...
"replies": [
"d71oyfk",
"d720vlw"
],
// ...
"id": "d71nyho",
// ...
"body": "Why does it matter? React should be a few layers removed from any database and it's quite agnostic about the backend. Heck, React itself doesn't even know how to talk to the backend. "
// ...
}
}