我怎样才能让 TikTok scraper 抓取并添加到 CSV
How can I make TikTok scraper scrape and add to CSV
我正在构建这个抓取工具,我正在尝试抓取 TikTok hastags,然后从该 hashtag 中获取用户名,然后删除我之前抓取的每个用户名。完成后希望将信息组织在 csv 文件中。这是我想出的,但它没有像我预期的那样工作。我是初学者,我正在尝试学习一门新语言。我错过了什么和做错了什么?
import requests
import json
import pandas as pd
# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
payload={}
headers = {
'X-API-KEY': 'xxxxxx'
}
response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads (hashtag_response)
# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()
# list
influencer = []
followerCount = []
bioLink = []
signature = []
for uniqueId in hashtag_json ['uniqueId']:
influencer.append(uniqueId)
# scrape influencer username
url = "https://api.tikapi.io/public/check?username={influencer}"
payload={}
headers = {
'X-API-KEY': 'xxxxx'
}
influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads (infl_response)
# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()
for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
followerCount.append(followerCount)
bioLink.append(bioLink)
signature.append(signature)
# create csv file of results
influencer_df = pd.DataFrame({
'Influencer' : influencer,
'Follower Count' : followerCount,
'Link' : bioLink,
'Signature' : signature,
})
influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)
这部分你错了
for uniqueId in hashtag_json ['uniqueId']:
influencer.append(uniqueId)
应该是
influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
这部分
for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
应该是
followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
signature.append(influencer_json['userInfo']['user']['signature'])
.
import requests
import json
import pandas as pd
# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
payload={}
headers = {
'X-API-KEY': 'xxxx'
}
response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads(hashtag_response)
# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()
# list
influencer = []
followerCount = []
bioLink = []
signature = []
influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
# scrape influencer username
url = "https://api.tikapi.io/public/check?username={}".format(influencer[0])
payload={}
headers = {
'X-API-KEY': 'xxxx'
}
influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads(infl_response)
# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()
followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
signature.append(influencer_json['userInfo']['user']['signature'])
# create csv file of results
influencer_df = pd.DataFrame({
'Influencer' : influencer,
'Follower Count' : followerCount,
'Link' : bioLink,
'Signature' : signature,
})
influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)
我正在构建这个抓取工具,我正在尝试抓取 TikTok hastags,然后从该 hashtag 中获取用户名,然后删除我之前抓取的每个用户名。完成后希望将信息组织在 csv 文件中。这是我想出的,但它没有像我预期的那样工作。我是初学者,我正在尝试学习一门新语言。我错过了什么和做错了什么?
import requests
import json
import pandas as pd
# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
payload={}
headers = {
'X-API-KEY': 'xxxxxx'
}
response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads (hashtag_response)
# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()
# list
influencer = []
followerCount = []
bioLink = []
signature = []
for uniqueId in hashtag_json ['uniqueId']:
influencer.append(uniqueId)
# scrape influencer username
url = "https://api.tikapi.io/public/check?username={influencer}"
payload={}
headers = {
'X-API-KEY': 'xxxxx'
}
influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads (infl_response)
# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()
for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
followerCount.append(followerCount)
bioLink.append(bioLink)
signature.append(signature)
# create csv file of results
influencer_df = pd.DataFrame({
'Influencer' : influencer,
'Follower Count' : followerCount,
'Link' : bioLink,
'Signature' : signature,
})
influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)
这部分你错了
for uniqueId in hashtag_json ['uniqueId']:
influencer.append(uniqueId)
应该是
influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
这部分
for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
应该是
followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
signature.append(influencer_json['userInfo']['user']['signature'])
.
import requests
import json
import pandas as pd
# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
payload={}
headers = {
'X-API-KEY': 'xxxx'
}
response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads(hashtag_response)
# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()
# list
influencer = []
followerCount = []
bioLink = []
signature = []
influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
# scrape influencer username
url = "https://api.tikapi.io/public/check?username={}".format(influencer[0])
payload={}
headers = {
'X-API-KEY': 'xxxx'
}
influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads(infl_response)
# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()
followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
signature.append(influencer_json['userInfo']['user']['signature'])
# create csv file of results
influencer_df = pd.DataFrame({
'Influencer' : influencer,
'Follower Count' : followerCount,
'Link' : bioLink,
'Signature' : signature,
})
influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)