从 json 中提取数据,使用 for 循环追加并保存为 CSV
Extract data from json, append using for loop and save as CSV
我已经使用 Tweepy 提取了 102 位政客的 100 位追随者的 id
、username
和 name
。数据存储在名为 pol_followers
的 JSON 文件中。现在我想附加 id
和 username
并使用下面的函数将其保存为 CSV 文件。但是,当使用最后一行 append_followers_to_csv(pol_followers, "pol_followers.csv")
中的函数时,我在底部看到了错误。
# Structure of pol_followers. The full pol_followers is much longer...
print(json.dumps(pol_followers, indent=4, sort_keys=True)) # see json data structure
[
{
"data": [
{
"id": "1464206217807601666",
"name": "terry alex",
"username": "terryal51850644"
},
{
"id": "1479032154394968064",
"name": "Charles Williams",
"username": "Charles99924770"
},
{
"id": "2526015770",
"name": "LISA P",
"username": "LISAP0910"
},
{
"id": "2957692520",
"name": "fayaz ahmad",
"username": "ahmadfayaz202"
}
],
"meta": {
"next_token": "F6HS7IU5SRGHEZZZ",
"result_count": 100
}
},
{
"data": [
{
"id": "2482703136",
"name": "HieuVu",
"username": "sachieuhaihanh"
},
{
"id": "580882148",
"name": "Maxine D. Harmon",
"username": "maxxximd"
},
{
"id": "1478867472841334787",
"name": "RBPsych1",
"username": "RBPsych1"
# Create file
csv_follower_file = open("pol_followers.csv", "a", newline="", encoding='utf-8')
csv_follower_writer = csv.writer(csv_follower_file)
# Create headers for the data I want to save. I only want to save these columns in my dataset
csv_follower_writer.writerow(
['id', 'username'])
csv_follower_file.close()\
def append_followers_to_csv(pol_followers, csv_follower_file):
# A counter variable
global follower_id, username
counter = 0
# Open OR create the target CSV file
csv_follower_file = open(csv_follower_file, "a", newline="", encoding='utf-8')
csv_follower_writer = csv.writer(csv_follower_file)
for ids in pol_followers['data']:
# 1. follower ID
follower_id = ids['id']
# 2. follower username
username = ids['username']
# Assemble all data in a list
ress = [follower_id, username]
# Append the result to the CSV file
csv_follower_writer.writerow(ress)
counter += 1
# When done, close the CSV file
csvFile.close()
# Print the number of tweets for this iteration
print("# of Tweets added from this response: ", counter)
append_followers_to_csv(pol_followers, "pol_followers.csv") # Save tweet data in a csv file
File "<input>", line 1, in <module>
File "<input>", line 11, in append_followers_to_csv
TypeError: list indices must be integers or slices, not str
您似乎已将 JSON 对象包装在列表中,因此您得到的不是 JSON 的 'data' 位,而是第 'data' 位在 append_followers_to_csv
函数中迭代时列表的元素,这在 python 中是做不到的。尝试删除 JSON 周围的方括号或将其设为 for ids in pol_followers[0]['data']
.
您只是缺少额外的循环,如下所示:
for each_dict in pol_followers:
for ids in each_dict['data']:
follower_id = ids['id']
username = ids['username']
我已经使用 Tweepy 提取了 102 位政客的 100 位追随者的 id
、username
和 name
。数据存储在名为 pol_followers
的 JSON 文件中。现在我想附加 id
和 username
并使用下面的函数将其保存为 CSV 文件。但是,当使用最后一行 append_followers_to_csv(pol_followers, "pol_followers.csv")
中的函数时,我在底部看到了错误。
# Structure of pol_followers. The full pol_followers is much longer...
print(json.dumps(pol_followers, indent=4, sort_keys=True)) # see json data structure
[
{
"data": [
{
"id": "1464206217807601666",
"name": "terry alex",
"username": "terryal51850644"
},
{
"id": "1479032154394968064",
"name": "Charles Williams",
"username": "Charles99924770"
},
{
"id": "2526015770",
"name": "LISA P",
"username": "LISAP0910"
},
{
"id": "2957692520",
"name": "fayaz ahmad",
"username": "ahmadfayaz202"
}
],
"meta": {
"next_token": "F6HS7IU5SRGHEZZZ",
"result_count": 100
}
},
{
"data": [
{
"id": "2482703136",
"name": "HieuVu",
"username": "sachieuhaihanh"
},
{
"id": "580882148",
"name": "Maxine D. Harmon",
"username": "maxxximd"
},
{
"id": "1478867472841334787",
"name": "RBPsych1",
"username": "RBPsych1"
# Create file
csv_follower_file = open("pol_followers.csv", "a", newline="", encoding='utf-8')
csv_follower_writer = csv.writer(csv_follower_file)
# Create headers for the data I want to save. I only want to save these columns in my dataset
csv_follower_writer.writerow(
['id', 'username'])
csv_follower_file.close()\
def append_followers_to_csv(pol_followers, csv_follower_file):
# A counter variable
global follower_id, username
counter = 0
# Open OR create the target CSV file
csv_follower_file = open(csv_follower_file, "a", newline="", encoding='utf-8')
csv_follower_writer = csv.writer(csv_follower_file)
for ids in pol_followers['data']:
# 1. follower ID
follower_id = ids['id']
# 2. follower username
username = ids['username']
# Assemble all data in a list
ress = [follower_id, username]
# Append the result to the CSV file
csv_follower_writer.writerow(ress)
counter += 1
# When done, close the CSV file
csvFile.close()
# Print the number of tweets for this iteration
print("# of Tweets added from this response: ", counter)
append_followers_to_csv(pol_followers, "pol_followers.csv") # Save tweet data in a csv file
File "<input>", line 1, in <module>
File "<input>", line 11, in append_followers_to_csv
TypeError: list indices must be integers or slices, not str
您似乎已将 JSON 对象包装在列表中,因此您得到的不是 JSON 的 'data' 位,而是第 'data' 位在 append_followers_to_csv
函数中迭代时列表的元素,这在 python 中是做不到的。尝试删除 JSON 周围的方括号或将其设为 for ids in pol_followers[0]['data']
.
您只是缺少额外的循环,如下所示:
for each_dict in pol_followers:
for ids in each_dict['data']:
follower_id = ids['id']
username = ids['username']