TheGuardian API - 脚本崩溃
TheGuardian API - Script crashes
import json
import requests
from os import makedirs
from os.path import join, exists
from datetime import date, timedelta
ARTICLES_DIR = join('tempdata', 'articles')
makedirs(ARTICLES_DIR, exist_ok=True)
API_ENDPOINT = 'http://content.guardianapis.com/search'
my_params = {
'q': 'coronavirus,stock,covid',
'sectionID': 'business',
'from-date': "2019-01-01",
'to-date': "2020-09-30",
'order-by': "newest",
'show-fields': 'all',
'page-size': 300,
'api-key': '### my cryptic key ###'
}
# day iteration from here:
#
start_date = date(2019, 1, 1)
end_date = date(2020,9, 30)
dayrange = range((end_date - start_date).days + 1)
for daycount in dayrange:
dt = start_date + timedelta(days=daycount)
datestr = dt.strftime('%Y-%m-%d')
fname = join(ARTICLES_DIR, datestr + '.json')
if not exists(fname):
# then let's download it
print("Downloading", datestr)
all_results = []
my_params['from-date'] = datestr
my_params['to-date'] = datestr
current_page = 1
total_pages = 1
while current_page <= total_pages:
print("...page", current_page)
my_params['page'] = current_page
resp = requests.get(API_ENDPOINT, my_params)
data = resp.json()
all_results.extend(data['response']['results'])
# if there is more than one page
current_page += 1
total_pages = data['response']['pages']
with open(fname, 'w') as f:
print("Writing to", fname)
# re-serialize it for pretty indentation
f.write(json.dumps(all_results, indent=2))
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-18-f04b4f0fe9ed> in <module>
49 resp = requests.get(API_ENDPOINT, my_params)
50 data = resp.json()
---> 51 all_results.extend(data['response']['results'])
52 # if there is more than one page
53 current_page += 1
KeyError: 'results'
Same error occurs for 'pages'
At first there was no issues and was able to run it. Download crashed after 2020-03-24. Since then can't get the code running again.
我指的是第 51 和 54 行。至少在这一点上代码崩溃了。
不知道如何摆脱这个问题。有什么想法吗?
了解错误消息将是第一步 - 它抱怨缺少密钥。检查 data['response']['results']
是否存在(提示:它不存在)并检查 data['response']
的结构到底是什么。
幸运的是,可以使用 api 参数 'test'
所以我们可以帮助使用该密钥:
my_params = {
'q': 'coronavirus,stock,covid',
'sectionID': 'business',
'from-date': "2019-01-01",
'to-date': "2020-09-30",
'order-by': "newest",
'show-fields': 'all',
'page-size': 300,
'api-key': 'test' # test key for that API
}
在 运行,我得到同样的异常,检查 data['response']
并得到:
让我们看看给定了哪些参数,好吗?
my_params = {
'q': 'coronavirus,stock,covid',
'sectionID': 'business',
'from-date': "2019-01-01",
'to-date': "2020-09-30",
'order-by': "newest",
'show-fields': 'all',
'page-size': 300, # TOO BIG
'api-key': 'test'
}
将其固定为 200,您将得到
Downloading 2019-01-01
...page 1
Writing to tempdata\articles19-01-01.json
Downloading 2019-01-02
...page 1
Writing to tempdata\articles19-01-02.json
Downloading 2019-01-03
...page 1
Writing to tempdata\articles19-01-03.json
Downloading 2019-01-04
...page 1
Writing to tempdata\articles19-01-04.json
Downloading 2019-01-05
[snipp]
import json
import requests
from os import makedirs
from os.path import join, exists
from datetime import date, timedelta
ARTICLES_DIR = join('tempdata', 'articles')
makedirs(ARTICLES_DIR, exist_ok=True)
API_ENDPOINT = 'http://content.guardianapis.com/search'
my_params = {
'q': 'coronavirus,stock,covid',
'sectionID': 'business',
'from-date': "2019-01-01",
'to-date': "2020-09-30",
'order-by': "newest",
'show-fields': 'all',
'page-size': 300,
'api-key': '### my cryptic key ###'
}
# day iteration from here:
#
start_date = date(2019, 1, 1)
end_date = date(2020,9, 30)
dayrange = range((end_date - start_date).days + 1)
for daycount in dayrange:
dt = start_date + timedelta(days=daycount)
datestr = dt.strftime('%Y-%m-%d')
fname = join(ARTICLES_DIR, datestr + '.json')
if not exists(fname):
# then let's download it
print("Downloading", datestr)
all_results = []
my_params['from-date'] = datestr
my_params['to-date'] = datestr
current_page = 1
total_pages = 1
while current_page <= total_pages:
print("...page", current_page)
my_params['page'] = current_page
resp = requests.get(API_ENDPOINT, my_params)
data = resp.json()
all_results.extend(data['response']['results'])
# if there is more than one page
current_page += 1
total_pages = data['response']['pages']
with open(fname, 'w') as f:
print("Writing to", fname)
# re-serialize it for pretty indentation
f.write(json.dumps(all_results, indent=2))
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-18-f04b4f0fe9ed> in <module>
49 resp = requests.get(API_ENDPOINT, my_params)
50 data = resp.json()
---> 51 all_results.extend(data['response']['results'])
52 # if there is more than one page
53 current_page += 1
KeyError: 'results'
Same error occurs for 'pages'
At first there was no issues and was able to run it. Download crashed after 2020-03-24. Since then can't get the code running again.
我指的是第 51 和 54 行。至少在这一点上代码崩溃了。 不知道如何摆脱这个问题。有什么想法吗?
了解错误消息将是第一步 - 它抱怨缺少密钥。检查 data['response']['results']
是否存在(提示:它不存在)并检查 data['response']
的结构到底是什么。
幸运的是,可以使用 api 参数 'test'
所以我们可以帮助使用该密钥:
my_params = {
'q': 'coronavirus,stock,covid',
'sectionID': 'business',
'from-date': "2019-01-01",
'to-date': "2020-09-30",
'order-by': "newest",
'show-fields': 'all',
'page-size': 300,
'api-key': 'test' # test key for that API
}
在 运行,我得到同样的异常,检查 data['response']
并得到:
让我们看看给定了哪些参数,好吗?
my_params = {
'q': 'coronavirus,stock,covid',
'sectionID': 'business',
'from-date': "2019-01-01",
'to-date': "2020-09-30",
'order-by': "newest",
'show-fields': 'all',
'page-size': 300, # TOO BIG
'api-key': 'test'
}
将其固定为 200,您将得到
Downloading 2019-01-01
...page 1
Writing to tempdata\articles19-01-01.json
Downloading 2019-01-02
...page 1
Writing to tempdata\articles19-01-02.json
Downloading 2019-01-03
...page 1
Writing to tempdata\articles19-01-03.json
Downloading 2019-01-04
...page 1
Writing to tempdata\articles19-01-04.json
Downloading 2019-01-05
[snipp]