将 python 字典(或 JSON?)保存为 CSV
Saving python dictionary (or JSON?) as CSV
我一直在尝试将 Google Search Console API 的输出保存为 CSV 文件。最初,我使用 sys.stdout 来保存从他们提供的示例代码中打印的内容。但是,在第三次左右的尝试中,我开始收到此错误:
File "C:\python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\uff1a' in position 13: character maps to <undefined>
之后我尝试切换到使用 Pandas 到 csv 函数。结果不是我所希望的,但至少更接近:
> ,rows,responseAggregationType
0,"{'keys': ['amp pwa'], 'clicks': 1, 'impressions': 4, 'ctr': 0.25, 'position': 7.25}",byProperty
1,"{'keys': ['convert desktop site to mobile'], 'clicks': 1, 'impressions': 2, 'ctr': 0.5, 'position': 1.5}",byProperty
我是 python 的新手,但我认为这与 API 的输出不完全是标准的字典对象格式有关。
我也尝试使用 csv.write 函数(我在来这里之前删除了该代码,所以我没有示例)但结果与 sys.stdout 一样无法编码问题.
这是完全按照我的需要打印输出的代码,我只需要能够将它保存在我可以在电子表格中使用它的地方。
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import argparse
import sys
from googleapiclient import sample_tools
# Declare command-line flags.
argparser = argparse.ArgumentParser(add_help=False)
argparser.add_argument('property_uri', type=str,
help=('Site or app URI to query data for (including '
'trailing slash).'))
argparser.add_argument('start_date', type=str,
help=('Start date of the requested date range in '
'YYYY-MM-DD format.'))
argparser.add_argument('end_date', type=str,
help=('End date of the requested date range in '
'YYYY-MM-DD format.'))
def main(argv):
service, flags = sample_tools.init(
argv, 'searchconsole', 'v1', __doc__, __file__, parents=[argparser],
scope='https://www.googleapis.com/auth/webmasters.readonly')
# Get top 10 queries for the date range, sorted by click count, descending.
request = {
'startDate': flags.start_date,
'endDate': flags.end_date,
'dimensions': ['query'],
'rowLimit': 10
}
response = execute_request(service, flags.property_uri, request)
print_table(response, 'Top Queries')
def execute_request(service, property_uri, request):
"""Executes a searchAnalytics.query request.
Args:
service: The searchconsole service to use when executing the query.
property_uri: The site or app URI to request data for.
request: The request to be executed.
Returns:
An array of response rows.
"""
return service.searchanalytics().query(
siteUrl=property_uri, body=request).execute()
def print_table(response, title):
"""Prints out a response table.
Each row contains key(s), clicks, impressions, CTR, and average position.
Args:
response: The server response to be printed as a table.
title: The title of the table.
"""
print('\n --' + title + ':')
if 'rows' not in response:
print('Empty response')
return
rows = response['rows']
row_format = '{:<20}' + '{:>20}' * 4
print(row_format.format('Keys', 'Clicks', 'Impressions', 'CTR', 'Position'))
for row in rows:
keys = ''
# Keys are returned only if one or more dimensions are requested.
if 'keys' in row:
keys = u','.join(row['keys']).encode('utf-8').decode()
print(row_format.format(
keys, row['clicks'], row['impressions'], row['ctr'], row['position']))
if __name__ == '__main__':
main(sys.argv)
这是我想要的输出,但以逗号分隔:
Keys Clicks Impressions CTR Position
amp pwa 1 4 0.25 7.25
convert desktop site to mobile 1 2 0.5 1.5
下面是仅打印结果对象的结果:
{'rows': [{'keys': ['amp pwa'], 'clicks': 1, 'impressions': 4, 'ctr': 0.25, 'position': 7.25}, {'keys': ['convert desktop site to mobile'], 'clicks': 1, 'impressions': 2, 'ctr': 0.5, 'position': 1.5}], 'responseAggregationType': 'byProperty'}
我希望我提供了足够的信息,在提问之前我尝试了这里和其他网站上推荐的所有解决方案。它看起来像是一个格式奇怪的 json/dictionary 对象。
非常感谢任何帮助。
更新,解决方案:
调整后的输出代码为:
import csv
with open("out.csv", "w", encoding="utf8", newline='') as f:
rows = response['rows']
writer = csv.writer(f)
headers = ["Keys", "Clicks", "Impressions", "CTR", "Position"]
writer.writerow(headers)
for row in rows:
keys = ''
# Keys are returned only if one or more dimensions are requested.
if 'keys' in row:
keys = u','.join(row['keys']).encode('utf-8').decode()
# Looks like your data has the keys in lowercase
writer.writerow([keys, row['clicks'], row['impressions'], row['ctr'], row['position']])
问题可能出在输出文件的编码上。
看起来您从响应中获得的行是一系列类似 dict 的对象,所以这应该可行:
import csv
with open("out.csv", "w", encoding="utf8") as f:
writer = csv.writer(f)
headers = ["Keys", "Clicks", "Impressions", "CTR", "Position"]
writer.writerow(headers)
for row in rows:
writer.writerow(
[
", ".join(row.get("keys", [])),
row["clicks"],
row["impressions"],
row["ctr"],
row["postition"],
]
)
writer 对象接受一些参数来控制输出 csv 中的行分隔符和引号。查看 module docs 了解详情。
我一直在尝试将 Google Search Console API 的输出保存为 CSV 文件。最初,我使用 sys.stdout 来保存从他们提供的示例代码中打印的内容。但是,在第三次左右的尝试中,我开始收到此错误:
File "C:\python39\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\uff1a' in position 13: character maps to <undefined>
之后我尝试切换到使用 Pandas 到 csv 函数。结果不是我所希望的,但至少更接近:
> ,rows,responseAggregationType
0,"{'keys': ['amp pwa'], 'clicks': 1, 'impressions': 4, 'ctr': 0.25, 'position': 7.25}",byProperty
1,"{'keys': ['convert desktop site to mobile'], 'clicks': 1, 'impressions': 2, 'ctr': 0.5, 'position': 1.5}",byProperty
我是 python 的新手,但我认为这与 API 的输出不完全是标准的字典对象格式有关。
我也尝试使用 csv.write 函数(我在来这里之前删除了该代码,所以我没有示例)但结果与 sys.stdout 一样无法编码问题.
这是完全按照我的需要打印输出的代码,我只需要能够将它保存在我可以在电子表格中使用它的地方。
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import argparse
import sys
from googleapiclient import sample_tools
# Declare command-line flags.
argparser = argparse.ArgumentParser(add_help=False)
argparser.add_argument('property_uri', type=str,
help=('Site or app URI to query data for (including '
'trailing slash).'))
argparser.add_argument('start_date', type=str,
help=('Start date of the requested date range in '
'YYYY-MM-DD format.'))
argparser.add_argument('end_date', type=str,
help=('End date of the requested date range in '
'YYYY-MM-DD format.'))
def main(argv):
service, flags = sample_tools.init(
argv, 'searchconsole', 'v1', __doc__, __file__, parents=[argparser],
scope='https://www.googleapis.com/auth/webmasters.readonly')
# Get top 10 queries for the date range, sorted by click count, descending.
request = {
'startDate': flags.start_date,
'endDate': flags.end_date,
'dimensions': ['query'],
'rowLimit': 10
}
response = execute_request(service, flags.property_uri, request)
print_table(response, 'Top Queries')
def execute_request(service, property_uri, request):
"""Executes a searchAnalytics.query request.
Args:
service: The searchconsole service to use when executing the query.
property_uri: The site or app URI to request data for.
request: The request to be executed.
Returns:
An array of response rows.
"""
return service.searchanalytics().query(
siteUrl=property_uri, body=request).execute()
def print_table(response, title):
"""Prints out a response table.
Each row contains key(s), clicks, impressions, CTR, and average position.
Args:
response: The server response to be printed as a table.
title: The title of the table.
"""
print('\n --' + title + ':')
if 'rows' not in response:
print('Empty response')
return
rows = response['rows']
row_format = '{:<20}' + '{:>20}' * 4
print(row_format.format('Keys', 'Clicks', 'Impressions', 'CTR', 'Position'))
for row in rows:
keys = ''
# Keys are returned only if one or more dimensions are requested.
if 'keys' in row:
keys = u','.join(row['keys']).encode('utf-8').decode()
print(row_format.format(
keys, row['clicks'], row['impressions'], row['ctr'], row['position']))
if __name__ == '__main__':
main(sys.argv)
这是我想要的输出,但以逗号分隔:
Keys Clicks Impressions CTR Position
amp pwa 1 4 0.25 7.25
convert desktop site to mobile 1 2 0.5 1.5
下面是仅打印结果对象的结果:
{'rows': [{'keys': ['amp pwa'], 'clicks': 1, 'impressions': 4, 'ctr': 0.25, 'position': 7.25}, {'keys': ['convert desktop site to mobile'], 'clicks': 1, 'impressions': 2, 'ctr': 0.5, 'position': 1.5}], 'responseAggregationType': 'byProperty'}
我希望我提供了足够的信息,在提问之前我尝试了这里和其他网站上推荐的所有解决方案。它看起来像是一个格式奇怪的 json/dictionary 对象。
非常感谢任何帮助。
更新,解决方案:
调整后的输出代码为:
import csv
with open("out.csv", "w", encoding="utf8", newline='') as f:
rows = response['rows']
writer = csv.writer(f)
headers = ["Keys", "Clicks", "Impressions", "CTR", "Position"]
writer.writerow(headers)
for row in rows:
keys = ''
# Keys are returned only if one or more dimensions are requested.
if 'keys' in row:
keys = u','.join(row['keys']).encode('utf-8').decode()
# Looks like your data has the keys in lowercase
writer.writerow([keys, row['clicks'], row['impressions'], row['ctr'], row['position']])
问题可能出在输出文件的编码上。
看起来您从响应中获得的行是一系列类似 dict 的对象,所以这应该可行:
import csv
with open("out.csv", "w", encoding="utf8") as f:
writer = csv.writer(f)
headers = ["Keys", "Clicks", "Impressions", "CTR", "Position"]
writer.writerow(headers)
for row in rows:
writer.writerow(
[
", ".join(row.get("keys", [])),
row["clicks"],
row["impressions"],
row["ctr"],
row["postition"],
]
)
writer 对象接受一些参数来控制输出 csv 中的行分隔符和引号。查看 module docs 了解详情。