我需要帮助来修复抓取 graphql API
I need help to fix scraping graphql API
我能够通过 Google Dev Tools - Networking 将 graphql 查询粘贴到 Insomnia(复制 url bash)以发出有效的 python 请求。现在,提供商方面发生了一些变化。现在我连运行curl都失眠了。我只得到响应 400。
在我以前的代码中,我收到错误消息,我自己无法解决。
我会很高兴有一个可行的解决方案。
我目前工作的编码器是:
import requests
import json
def scrape_digitec():
url = "https://www.digitec.ch/api/graphql"
headers = {
"authority": "www.digitec.ch",
"accept": "application/json",
"accept-language": "de-CH",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://www.digitec.ch",
"pragma": "no-cache",
"referer": "https://www.digitec.ch/search?q=bang%20olufsen",
"sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
"x-dg-country": "ch",
"x-dg-mandator": "406802",
"x-dg-portal": "25",
"x-dg-testgroup": "Default"
}
search = 'lg'
offset = '0'
payload = '{"query":"query ENTER_SEARCH(\t$query: String!\t$sortOrder: ProductSort\t$limit: Int = 9\t$offset: Int = 0\t$filters: [SearchFilter]\t$include: [String!]\t$exclude: [String!]\t$searchQueryId: String\t$siteId: String) {\tsearch(\t\tquery: $query\t\tfilters: $filters\t\tsearchQueryId: $searchQueryId\t\tsiteId: $siteId\t) {\t\tproducts(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\t\t\ttotal\t\t\thasMore\t\t\tnextOffset\t\t\tresults {\t\t\t\t...ProductSearchResult\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tfilters(include: $include, exclude: $exclude) {\t\t\tproduct {\t\t\t\tidentifier\t\t\t\tname\t\t\t\tfilterType\t\t\t\tscore\t\t\t\ttooltip {\t\t\t\t\t...FilterTooltipResult\t\t\t\t\t__typename\t\t\t\t}\t\t\t\t...CheckboxSearchFilterResult\t\t\t\t...RangeSearchFilterResult\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tmagazinePages(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tauthors(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tdiscussions(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tquestions(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tratings(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tproductTypes(limit: 24) {\t\t\ttotal\t\t\tresults {\t\t\t\tid\t\t\t\tname\t\t\t\tprimarySynonyms\t\t\t\tisVisible\t\t\t\tdescription\t\t\t\tmetaDescription\t\t\t\timageUrl\t\t\t\tsearchScore\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tbrands(limit: 24) {\t\t\ttotal\t\t\tresults {\t\t\t\tid\t\t\t\ttitle\t\t\t\tsearchScore\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\thelp(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\thasMore\t\t\tresults {\t\t\t\tsearchScore\t\t\t\ttitle\t\t\t\tid\t\t\t\turl\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\t_meta {\t\t\tqueryInfo {\t\t\t\tcorrectedQuery\t\t\t\tdidYouMeanQuery\t\t\t\tlastProductSearchPass\t\t\t\texecutedSearchTerm\t\t\t\ttestGroup\t\t\t\tisManagedQuery\t\t\t\tisRerankedQuery\t\t\t\t__typename\t\t\t}\t\t\tredirectionUrl\t\t\tportalReferral {\t\t\t\tproductCount\t\t\t\tportalName\t\t\t\turl\t\t\t\tproductImageUrls\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\t__typename\t}}fragment ProductSearchResult on ProductSearchResultItem {\tsearchScore\tmandatorSpecificData {\t\t...ProductMandatorSpecific\t\t__typename\t}\tproduct {\t\t...ProductMandatorIndependent\t\t__typename\t}\toffer {\t\t...ProductOffer\t\t__typename\t}\t__typename}fragment FilterTooltipResult on FilterTooltip {\ttext\tmoreInformationLink\t__typename}fragment CheckboxSearchFilterResult on CheckboxSearchFilter {\toptions {\t\tidentifier\t\tname\t\tproductCount\t\tscore\t\treferenceValue {\t\t\tvalue\t\t\tunit {\t\t\t\tabbreviation\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tpreferredValue {\t\t\tvalue\t\t\tunit {\t\t\t\tabbreviation\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\ttooltip {\t\t\t...FilterTooltipResult\t\t\t__typename\t\t}\t\t__typename\t}\t__typename}fragment RangeSearchFilterResult on RangeSearchFilter {\treferenceMin\tpreferredMin\treferenceMax\tpreferredMax\treferenceStepSize\tpreferredStepSize\trangeMergeInfo {\t\tisBottomMerged\t\tisTopMerged\t\t__typename\t}\treferenceUnit {\t\tabbreviation\t\t__typename\t}\tpreferredUnit {\t\tabbreviation\t\t__typename\t}\trangeFilterDataPoint {\t\t...RangeFilterDataPointResult\t\t__typename\t}\t__typename}fragment ProductMandatorSpecific on MandatorSpecificData {\tisBestseller\tisDeleted\tshowroomSites\tsectorIds\t__typename}fragment ProductMandatorIndependent on ProductV2 {\tid\tproductId\tname\tnameProperties\tproductTypeId\tproductTypeName\tbrandId\tbrandName\taverageRating\ttotalRatings\ttotalQuestions\tisProductSet\timages {\t\turl\t\theight\t\twidth\t\t__typename\t}\tenergyEfficiency {\t\tenergyEfficiencyColorType\t\tenergyEfficiencyLabelText\t\tenergyEfficiencyLabelSigns\t\tenergyEfficiencyImage {\t\t\turl\t\t\theight\t\t\twidth\t\t\t__typename\t\t}\t\t__typename\t}\tseo {\t\tseoProductTypeName\t\tseoNameProperties\t\tproductGroups {\t\t\tproductGroup1\t\t\tproductGroup2\t\t\tproductGroup3\t\t\tproductGroup4\t\t\t__typename\t\t}\t\tgtin\t\t__typename\t}\thasVariants\tsmallDimensions\tbasePrice {\t\tpriceFactor\t\tvalue\t\t__typename\t}\t__typename}fragment ProductOffer on OfferV2 {\tid\tproductId\tofferId\tshopOfferId\tprice {\t\tamountIncl\t\tamountExcl\t\tcurrency\t\tfraction\t\t__typename\t}\tdeliveryOptions {\t\tmail {\t\t\tclassification\t\t\tfutureReleaseDate\t\t\t__typename\t\t}\t\tpickup {\t\t\tsiteId\t\t\tclassification\t\t\tfutureReleaseDate\t\t\t__typename\t\t}\t\tdetailsProvider {\t\t\tproductId\t\t\tofferId\t\t\tquantity\t\t\ttype\t\t\t__typename\t\t}\t\t__typename\t}\tlabel\ttype\tvolumeDiscountPrices {\t\tminAmount\t\tprice {\t\t\tamountIncl\t\t\tamountExcl\t\t\tcurrency\t\t\t__typename\t\t}\t\tisDefault\t\t__typename\t}\tsalesInformation {\t\tnumberOfItems\t\tnumberOfItemsSold\t\tisEndingSoon\t\tvalidFrom\t\t__typename\t}\tincentiveText\tisIncentiveCashback\tisNew\tisSalesPromotion\thideInProductDiscovery\tcanAddToBasket\thidePrice\tinsteadOfPrice {\t\ttype\t\tprice {\t\t\tamountIncl\t\t\tamountExcl\t\t\tcurrency\t\t\tfraction\t\t\t__typename\t\t}\t\t__typename\t}\tminOrderQuantity\t__typename}fragment RangeFilterDataPointResult on RangeFilterDataPoint {\tcount\treferenceValue {\t\tvalue\t\tunit {\t\t\tabbreviation\t\t\t__typename\t\t}\t\t__typename\t}\tpreferredValue {\t\tvalue\t\tunit {\t\t\tabbreviation\t\t\t__typename\t\t}\t\t__typename\t}\t__typename}\",\"variables\":{\"limit\":100,\"offset\":'+offset+',\"query\":\"'+search+'\",\"filters\":[],\"sortOrder\":null,\"include\":[\"bra\",\"pt\",\"pr\"],\"exclude\":[\"off\"],\"searchQueryId\":\"4ce81461-09e2-4f7a-bb9a-8f6f8503fdc4\",\"siteId\":null},\"operationName\":\"ENTER_SEARCH\"}'
response = requests.request("POST", url, data=payload, headers=headers)
print(response)
data = response.json()
print(json.dumps(data, indent=2))
print(json.dumps(data))
if __name__ == '__main__':
scrape_digitec()
您需要将您的负载格式化为 json 格式(python 的 dictionary/lists),然后使用 json
参数,而不是 data
:
import requests
import json
def scrape_digitec():
url = "https://www.digitec.ch/api/graphql"
headers = {
"authority": "www.digitec.ch",
"accept": "application/json",
"accept-language": "de-CH",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://www.digitec.ch",
"pragma": "no-cache",
"referer": "https://www.digitec.ch/search?q=bang%20olufsen",
"sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
"x-dg-country": "ch",
"x-dg-mandator": "406802",
"x-dg-portal": "25",
"x-dg-testgroup": "Default"
}
search = 'lg'
offset = 0
payload = [{
"operationName":"ENTER_SEARCH",
"variables":{
"limit":24,
"offset":offset,
"query":search,
"filters":[],
#"sortOrder":null,
"include":["bra","pt","pr","off"],
"searchQueryId":"e1b620fc-bf9c-41c6-85c0-cc49e5d12e25",
#"siteId":null},
},
"query":"query ENTER_SEARCH($query: String!, $sortOrder: ProductSort, $limit: Int = 9, $offset: Int = 0, $filters: [SearchFilter], $include: [String!], $exclude: [String!], $searchQueryId: String, $siteId: String) {\n search(\n query: $query\n filters: $filters\n searchQueryId: $searchQueryId\n siteId: $siteId\n ) {\n products(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\n total\n hasMore\n nextOffset\n results {\n ...ProductSearchResult\n __typename\n }\n __typename\n }\n filters(include: $include, exclude: $exclude) {\n product {\n identifier\n name\n filterType\n score\n tooltip {\n ...FilterTooltipResult\n __typename\n }\n ...CheckboxSearchFilterResult\n ...RangeSearchFilterResult\n __typename\n }\n __typename\n }\n magazinePages(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n authors(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n discussions(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n questions(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n ratings(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n productTypes(limit: 24) {\n total\n results {\n id\n name\n primarySynonyms\n isVisible\n description\n metaDescription\n imageUrl\n searchScore\n __typename\n }\n __typename\n }\n brands(limit: 24) {\n total\n results {\n id\n title\n searchScore\n __typename\n }\n __typename\n }\n _meta {\n queryInfo {\n correctedQuery\n didYouMeanQuery\n lastProductSearchPass\n executedSearchTerm\n testGroup\n isManagedQuery\n isRerankedQuery\n __typename\n }\n redirectionUrl\n portalReferral {\n productCount\n portalName\n url\n productImageUrls\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment ProductSearchResult on ProductSearchResultItem {\n searchScore\n mandatorSpecificData {\n ...ProductMandatorSpecific\n __typename\n }\n product {\n ...ProductMandatorIndependent\n __typename\n }\n offer {\n ...ProductOffer\n __typename\n }\n __typename\n}\n\nfragment FilterTooltipResult on FilterTooltip {\n text\n moreInformationLink\n __typename\n}\n\nfragment CheckboxSearchFilterResult on CheckboxSearchFilter {\n options {\n identifier\n name\n productCount\n score\n referenceValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n preferredValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n tooltip {\n ...FilterTooltipResult\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment RangeSearchFilterResult on RangeSearchFilter {\n referenceMin\n preferredMin\n referenceMax\n preferredMax\n referenceStepSize\n preferredStepSize\n rangeMergeInfo {\n isBottomMerged\n isTopMerged\n __typename\n }\n referenceUnit {\n abbreviation\n __typename\n }\n preferredUnit {\n abbreviation\n __typename\n }\n rangeFilterDataPoint {\n ...RangeFilterDataPointResult\n __typename\n }\n __typename\n}\n\nfragment ProductMandatorSpecific on MandatorSpecificData {\n isBestseller\n isDeleted\n showroomSites\n sectorIds\n __typename\n}\n\nfragment ProductMandatorIndependent on ProductV2 {\n id\n productId\n name\n nameProperties\n productTypeId\n productTypeName\n brandId\n brandName\n averageRating\n totalRatings\n totalQuestions\n isProductSet\n images {\n url\n height\n width\n __typename\n }\n energyEfficiency {\n energyEfficiencyColorType\n energyEfficiencyLabelText\n energyEfficiencyLabelSigns\n energyEfficiencyImage {\n url\n height\n width\n __typename\n }\n __typename\n }\n seo {\n seoProductTypeName\n seoNameProperties\n productGroups {\n productGroup1\n productGroup2\n productGroup3\n productGroup4\n __typename\n }\n gtin\n __typename\n }\n hasVariants\n smallDimensions\n basePrice {\n priceFactor\n value\n __typename\n }\n __typename\n}\n\nfragment ProductOffer on OfferV2 {\n id\n productId\n offerId\n shopOfferId\n price {\n amountIncl\n amountExcl\n currency\n fraction\n __typename\n }\n deliveryOptions {\n mail {\n classification\n futureReleaseDate\n __typename\n }\n pickup {\n siteId\n classification\n futureReleaseDate\n __typename\n }\n detailsProvider {\n productId\n offerId\n quantity\n type\n __typename\n }\n __typename\n }\n label\n type\n volumeDiscountPrices {\n minAmount\n price {\n amountIncl\n amountExcl\n currency\n __typename\n }\n isDefault\n __typename\n }\n salesInformation {\n numberOfItems\n numberOfItemsSold\n isEndingSoon\n validFrom\n __typename\n }\n incentiveText\n isIncentiveCashback\n isNew\n isSalesPromotion\n hideInProductDiscovery\n canAddToBasket\n hidePrice\n insteadOfPrice {\n type\n price {\n amountIncl\n amountExcl\n currency\n fraction\n __typename\n }\n __typename\n }\n minOrderQuantity\n __typename\n}\n\nfragment RangeFilterDataPointResult on RangeFilterDataPoint {\n count\n referenceValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n preferredValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n __typename\n}\n"}]
response = requests.post(url, json=payload, headers=headers)
print(response)
data = response.json()
print(json.dumps(data, indent=2))
print(json.dumps(data))
if __name__ == '__main__':
scrape_digitec()
我能够通过 Google Dev Tools - Networking 将 graphql 查询粘贴到 Insomnia(复制 url bash)以发出有效的 python 请求。现在,提供商方面发生了一些变化。现在我连运行curl都失眠了。我只得到响应 400。 在我以前的代码中,我收到错误消息,我自己无法解决。 我会很高兴有一个可行的解决方案。
我目前工作的编码器是:
import requests
import json
def scrape_digitec():
url = "https://www.digitec.ch/api/graphql"
headers = {
"authority": "www.digitec.ch",
"accept": "application/json",
"accept-language": "de-CH",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://www.digitec.ch",
"pragma": "no-cache",
"referer": "https://www.digitec.ch/search?q=bang%20olufsen",
"sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
"x-dg-country": "ch",
"x-dg-mandator": "406802",
"x-dg-portal": "25",
"x-dg-testgroup": "Default"
}
search = 'lg'
offset = '0'
payload = '{"query":"query ENTER_SEARCH(\t$query: String!\t$sortOrder: ProductSort\t$limit: Int = 9\t$offset: Int = 0\t$filters: [SearchFilter]\t$include: [String!]\t$exclude: [String!]\t$searchQueryId: String\t$siteId: String) {\tsearch(\t\tquery: $query\t\tfilters: $filters\t\tsearchQueryId: $searchQueryId\t\tsiteId: $siteId\t) {\t\tproducts(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\t\t\ttotal\t\t\thasMore\t\t\tnextOffset\t\t\tresults {\t\t\t\t...ProductSearchResult\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tfilters(include: $include, exclude: $exclude) {\t\t\tproduct {\t\t\t\tidentifier\t\t\t\tname\t\t\t\tfilterType\t\t\t\tscore\t\t\t\ttooltip {\t\t\t\t\t...FilterTooltipResult\t\t\t\t\t__typename\t\t\t\t}\t\t\t\t...CheckboxSearchFilterResult\t\t\t\t...RangeSearchFilterResult\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tmagazinePages(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tauthors(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tdiscussions(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tquestions(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tratings(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tproductTypes(limit: 24) {\t\t\ttotal\t\t\tresults {\t\t\t\tid\t\t\t\tname\t\t\t\tprimarySynonyms\t\t\t\tisVisible\t\t\t\tdescription\t\t\t\tmetaDescription\t\t\t\timageUrl\t\t\t\tsearchScore\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tbrands(limit: 24) {\t\t\ttotal\t\t\tresults {\t\t\t\tid\t\t\t\ttitle\t\t\t\tsearchScore\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\thelp(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\thasMore\t\t\tresults {\t\t\t\tsearchScore\t\t\t\ttitle\t\t\t\tid\t\t\t\turl\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\t_meta {\t\t\tqueryInfo {\t\t\t\tcorrectedQuery\t\t\t\tdidYouMeanQuery\t\t\t\tlastProductSearchPass\t\t\t\texecutedSearchTerm\t\t\t\ttestGroup\t\t\t\tisManagedQuery\t\t\t\tisRerankedQuery\t\t\t\t__typename\t\t\t}\t\t\tredirectionUrl\t\t\tportalReferral {\t\t\t\tproductCount\t\t\t\tportalName\t\t\t\turl\t\t\t\tproductImageUrls\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\t__typename\t}}fragment ProductSearchResult on ProductSearchResultItem {\tsearchScore\tmandatorSpecificData {\t\t...ProductMandatorSpecific\t\t__typename\t}\tproduct {\t\t...ProductMandatorIndependent\t\t__typename\t}\toffer {\t\t...ProductOffer\t\t__typename\t}\t__typename}fragment FilterTooltipResult on FilterTooltip {\ttext\tmoreInformationLink\t__typename}fragment CheckboxSearchFilterResult on CheckboxSearchFilter {\toptions {\t\tidentifier\t\tname\t\tproductCount\t\tscore\t\treferenceValue {\t\t\tvalue\t\t\tunit {\t\t\t\tabbreviation\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tpreferredValue {\t\t\tvalue\t\t\tunit {\t\t\t\tabbreviation\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\ttooltip {\t\t\t...FilterTooltipResult\t\t\t__typename\t\t}\t\t__typename\t}\t__typename}fragment RangeSearchFilterResult on RangeSearchFilter {\treferenceMin\tpreferredMin\treferenceMax\tpreferredMax\treferenceStepSize\tpreferredStepSize\trangeMergeInfo {\t\tisBottomMerged\t\tisTopMerged\t\t__typename\t}\treferenceUnit {\t\tabbreviation\t\t__typename\t}\tpreferredUnit {\t\tabbreviation\t\t__typename\t}\trangeFilterDataPoint {\t\t...RangeFilterDataPointResult\t\t__typename\t}\t__typename}fragment ProductMandatorSpecific on MandatorSpecificData {\tisBestseller\tisDeleted\tshowroomSites\tsectorIds\t__typename}fragment ProductMandatorIndependent on ProductV2 {\tid\tproductId\tname\tnameProperties\tproductTypeId\tproductTypeName\tbrandId\tbrandName\taverageRating\ttotalRatings\ttotalQuestions\tisProductSet\timages {\t\turl\t\theight\t\twidth\t\t__typename\t}\tenergyEfficiency {\t\tenergyEfficiencyColorType\t\tenergyEfficiencyLabelText\t\tenergyEfficiencyLabelSigns\t\tenergyEfficiencyImage {\t\t\turl\t\t\theight\t\t\twidth\t\t\t__typename\t\t}\t\t__typename\t}\tseo {\t\tseoProductTypeName\t\tseoNameProperties\t\tproductGroups {\t\t\tproductGroup1\t\t\tproductGroup2\t\t\tproductGroup3\t\t\tproductGroup4\t\t\t__typename\t\t}\t\tgtin\t\t__typename\t}\thasVariants\tsmallDimensions\tbasePrice {\t\tpriceFactor\t\tvalue\t\t__typename\t}\t__typename}fragment ProductOffer on OfferV2 {\tid\tproductId\tofferId\tshopOfferId\tprice {\t\tamountIncl\t\tamountExcl\t\tcurrency\t\tfraction\t\t__typename\t}\tdeliveryOptions {\t\tmail {\t\t\tclassification\t\t\tfutureReleaseDate\t\t\t__typename\t\t}\t\tpickup {\t\t\tsiteId\t\t\tclassification\t\t\tfutureReleaseDate\t\t\t__typename\t\t}\t\tdetailsProvider {\t\t\tproductId\t\t\tofferId\t\t\tquantity\t\t\ttype\t\t\t__typename\t\t}\t\t__typename\t}\tlabel\ttype\tvolumeDiscountPrices {\t\tminAmount\t\tprice {\t\t\tamountIncl\t\t\tamountExcl\t\t\tcurrency\t\t\t__typename\t\t}\t\tisDefault\t\t__typename\t}\tsalesInformation {\t\tnumberOfItems\t\tnumberOfItemsSold\t\tisEndingSoon\t\tvalidFrom\t\t__typename\t}\tincentiveText\tisIncentiveCashback\tisNew\tisSalesPromotion\thideInProductDiscovery\tcanAddToBasket\thidePrice\tinsteadOfPrice {\t\ttype\t\tprice {\t\t\tamountIncl\t\t\tamountExcl\t\t\tcurrency\t\t\tfraction\t\t\t__typename\t\t}\t\t__typename\t}\tminOrderQuantity\t__typename}fragment RangeFilterDataPointResult on RangeFilterDataPoint {\tcount\treferenceValue {\t\tvalue\t\tunit {\t\t\tabbreviation\t\t\t__typename\t\t}\t\t__typename\t}\tpreferredValue {\t\tvalue\t\tunit {\t\t\tabbreviation\t\t\t__typename\t\t}\t\t__typename\t}\t__typename}\",\"variables\":{\"limit\":100,\"offset\":'+offset+',\"query\":\"'+search+'\",\"filters\":[],\"sortOrder\":null,\"include\":[\"bra\",\"pt\",\"pr\"],\"exclude\":[\"off\"],\"searchQueryId\":\"4ce81461-09e2-4f7a-bb9a-8f6f8503fdc4\",\"siteId\":null},\"operationName\":\"ENTER_SEARCH\"}'
response = requests.request("POST", url, data=payload, headers=headers)
print(response)
data = response.json()
print(json.dumps(data, indent=2))
print(json.dumps(data))
if __name__ == '__main__':
scrape_digitec()
您需要将您的负载格式化为 json 格式(python 的 dictionary/lists),然后使用 json
参数,而不是 data
:
import requests
import json
def scrape_digitec():
url = "https://www.digitec.ch/api/graphql"
headers = {
"authority": "www.digitec.ch",
"accept": "application/json",
"accept-language": "de-CH",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://www.digitec.ch",
"pragma": "no-cache",
"referer": "https://www.digitec.ch/search?q=bang%20olufsen",
"sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
"x-dg-country": "ch",
"x-dg-mandator": "406802",
"x-dg-portal": "25",
"x-dg-testgroup": "Default"
}
search = 'lg'
offset = 0
payload = [{
"operationName":"ENTER_SEARCH",
"variables":{
"limit":24,
"offset":offset,
"query":search,
"filters":[],
#"sortOrder":null,
"include":["bra","pt","pr","off"],
"searchQueryId":"e1b620fc-bf9c-41c6-85c0-cc49e5d12e25",
#"siteId":null},
},
"query":"query ENTER_SEARCH($query: String!, $sortOrder: ProductSort, $limit: Int = 9, $offset: Int = 0, $filters: [SearchFilter], $include: [String!], $exclude: [String!], $searchQueryId: String, $siteId: String) {\n search(\n query: $query\n filters: $filters\n searchQueryId: $searchQueryId\n siteId: $siteId\n ) {\n products(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\n total\n hasMore\n nextOffset\n results {\n ...ProductSearchResult\n __typename\n }\n __typename\n }\n filters(include: $include, exclude: $exclude) {\n product {\n identifier\n name\n filterType\n score\n tooltip {\n ...FilterTooltipResult\n __typename\n }\n ...CheckboxSearchFilterResult\n ...RangeSearchFilterResult\n __typename\n }\n __typename\n }\n magazinePages(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n authors(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n discussions(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n questions(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n ratings(limit: 3) {\n ids {\n id\n score\n __typename\n }\n total\n __typename\n }\n productTypes(limit: 24) {\n total\n results {\n id\n name\n primarySynonyms\n isVisible\n description\n metaDescription\n imageUrl\n searchScore\n __typename\n }\n __typename\n }\n brands(limit: 24) {\n total\n results {\n id\n title\n searchScore\n __typename\n }\n __typename\n }\n _meta {\n queryInfo {\n correctedQuery\n didYouMeanQuery\n lastProductSearchPass\n executedSearchTerm\n testGroup\n isManagedQuery\n isRerankedQuery\n __typename\n }\n redirectionUrl\n portalReferral {\n productCount\n portalName\n url\n productImageUrls\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment ProductSearchResult on ProductSearchResultItem {\n searchScore\n mandatorSpecificData {\n ...ProductMandatorSpecific\n __typename\n }\n product {\n ...ProductMandatorIndependent\n __typename\n }\n offer {\n ...ProductOffer\n __typename\n }\n __typename\n}\n\nfragment FilterTooltipResult on FilterTooltip {\n text\n moreInformationLink\n __typename\n}\n\nfragment CheckboxSearchFilterResult on CheckboxSearchFilter {\n options {\n identifier\n name\n productCount\n score\n referenceValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n preferredValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n tooltip {\n ...FilterTooltipResult\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment RangeSearchFilterResult on RangeSearchFilter {\n referenceMin\n preferredMin\n referenceMax\n preferredMax\n referenceStepSize\n preferredStepSize\n rangeMergeInfo {\n isBottomMerged\n isTopMerged\n __typename\n }\n referenceUnit {\n abbreviation\n __typename\n }\n preferredUnit {\n abbreviation\n __typename\n }\n rangeFilterDataPoint {\n ...RangeFilterDataPointResult\n __typename\n }\n __typename\n}\n\nfragment ProductMandatorSpecific on MandatorSpecificData {\n isBestseller\n isDeleted\n showroomSites\n sectorIds\n __typename\n}\n\nfragment ProductMandatorIndependent on ProductV2 {\n id\n productId\n name\n nameProperties\n productTypeId\n productTypeName\n brandId\n brandName\n averageRating\n totalRatings\n totalQuestions\n isProductSet\n images {\n url\n height\n width\n __typename\n }\n energyEfficiency {\n energyEfficiencyColorType\n energyEfficiencyLabelText\n energyEfficiencyLabelSigns\n energyEfficiencyImage {\n url\n height\n width\n __typename\n }\n __typename\n }\n seo {\n seoProductTypeName\n seoNameProperties\n productGroups {\n productGroup1\n productGroup2\n productGroup3\n productGroup4\n __typename\n }\n gtin\n __typename\n }\n hasVariants\n smallDimensions\n basePrice {\n priceFactor\n value\n __typename\n }\n __typename\n}\n\nfragment ProductOffer on OfferV2 {\n id\n productId\n offerId\n shopOfferId\n price {\n amountIncl\n amountExcl\n currency\n fraction\n __typename\n }\n deliveryOptions {\n mail {\n classification\n futureReleaseDate\n __typename\n }\n pickup {\n siteId\n classification\n futureReleaseDate\n __typename\n }\n detailsProvider {\n productId\n offerId\n quantity\n type\n __typename\n }\n __typename\n }\n label\n type\n volumeDiscountPrices {\n minAmount\n price {\n amountIncl\n amountExcl\n currency\n __typename\n }\n isDefault\n __typename\n }\n salesInformation {\n numberOfItems\n numberOfItemsSold\n isEndingSoon\n validFrom\n __typename\n }\n incentiveText\n isIncentiveCashback\n isNew\n isSalesPromotion\n hideInProductDiscovery\n canAddToBasket\n hidePrice\n insteadOfPrice {\n type\n price {\n amountIncl\n amountExcl\n currency\n fraction\n __typename\n }\n __typename\n }\n minOrderQuantity\n __typename\n}\n\nfragment RangeFilterDataPointResult on RangeFilterDataPoint {\n count\n referenceValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n preferredValue {\n value\n unit {\n abbreviation\n __typename\n }\n __typename\n }\n __typename\n}\n"}]
response = requests.post(url, json=payload, headers=headers)
print(response)
data = response.json()
print(json.dumps(data, indent=2))
print(json.dumps(data))
if __name__ == '__main__':
scrape_digitec()