将数据转换为 json 格式
convert data in json format
import enum
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.3"
}
r = requests.get("https://www.fleetpride.com/parts/otr-brake-drum-otr1601b")
soup = BeautifulSoup(r.content, "html5lib")
raw_json = ""
for table_index,table in enumerate( soup.find_all("script")):
if('CCRZ.detailData.jsonProductData = {"' in str(table)):
x=str(table).split('CCRZ.detailData.jsonProductData = {"')
raw_json = "{\""+str(x[-1]).split('};')[0]+"}"
break
req_json = json.loads(raw_json)
# uri = req_json.keys()
print(req_json)
这是我 运行 代码时的输出显示:
我想要像 json format
中的论文,如果可能的话,是否可以转换成这种格式?你能告诉我们如何转换成如下所示的 json 形式吗?我很感激你:
{
"mediaWrappers": {
"Alternate Images": [{
"uri": "https://www.fleetpride.com/imagesns/PDPF/OTR1601B-Webb copy.jpg",
"mediaName": "https://www.fleetpride.com/imagesns/PDPF/OTR1601B-Webb copy.jpg",
"sourceType": "URI",
"URI": "https://www.fleetpride.com/imagesns/PDPF/OTR1601B-Webb copy.jpg",
"startDate": "2018-05-23",
"sequence": 2,
"productMediaId": "OTR-OTR1601B-Alternate Images-2",
"mediaType": "Alternate Images",
"locale": "en_US",
"endDate": "2099-12-31",
"enabled": true,
"altMessage": "OTR OTR OTR1601B",
"sfdcName": "406524",
"sfid": "a8B1W000000c4a1UAA",
"product": "a8G1W000000Y7DfUAK"
},
您可以在 json.loads()
之后添加以下代码。
with open("req_json.json", "w") as f:
json.dump(req_json, f, indent=4)
两个输出都是 JSON 格式。您正在寻找的是漂亮的打印。阅读 json.dumps()
及其选项。您要使用 indent
选项。
...
pretty = json.dumps(req_json, indent=4)
print(pretty)
import enum
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.3"
}
r = requests.get("https://www.fleetpride.com/parts/otr-brake-drum-otr1601b")
soup = BeautifulSoup(r.content, "html5lib")
raw_json = ""
for table_index,table in enumerate( soup.find_all("script")):
if('CCRZ.detailData.jsonProductData = {"' in str(table)):
x=str(table).split('CCRZ.detailData.jsonProductData = {"')
raw_json = "{\""+str(x[-1]).split('};')[0]+"}"
break
req_json = json.loads(raw_json)
# uri = req_json.keys()
print(req_json)
这是我 运行 代码时的输出显示:
我想要像 json format
中的论文,如果可能的话,是否可以转换成这种格式?你能告诉我们如何转换成如下所示的 json 形式吗?我很感激你:
{
"mediaWrappers": {
"Alternate Images": [{
"uri": "https://www.fleetpride.com/imagesns/PDPF/OTR1601B-Webb copy.jpg",
"mediaName": "https://www.fleetpride.com/imagesns/PDPF/OTR1601B-Webb copy.jpg",
"sourceType": "URI",
"URI": "https://www.fleetpride.com/imagesns/PDPF/OTR1601B-Webb copy.jpg",
"startDate": "2018-05-23",
"sequence": 2,
"productMediaId": "OTR-OTR1601B-Alternate Images-2",
"mediaType": "Alternate Images",
"locale": "en_US",
"endDate": "2099-12-31",
"enabled": true,
"altMessage": "OTR OTR OTR1601B",
"sfdcName": "406524",
"sfid": "a8B1W000000c4a1UAA",
"product": "a8G1W000000Y7DfUAK"
},
您可以在 json.loads()
之后添加以下代码。
with open("req_json.json", "w") as f:
json.dump(req_json, f, indent=4)
两个输出都是 JSON 格式。您正在寻找的是漂亮的打印。阅读 json.dumps()
及其选项。您要使用 indent
选项。
...
pretty = json.dumps(req_json, indent=4)
print(pretty)