我需要帮助来修复抓取 graphql API

I need help to fix scraping graphql API

我能够通过 Google Dev Tools - Networking 将 graphql 查询粘贴到 Insomnia(复制 url bash)以发出有效的 python 请求。现在,提供商方面发生了一些变化。现在我连运行curl都失眠了。我只得到响应 400。 在我以前的代码中,我收到错误消息,我自己无法解决。 我会很高兴有一个可行的解决方案。

我目前工作的编码器是:

import requests
import json

def scrape_digitec():
    url = "https://www.digitec.ch/api/graphql"
    headers = {
        "authority": "www.digitec.ch",
        "accept": "application/json",
        "accept-language": "de-CH",
        "cache-control": "no-cache",
        "content-type": "application/json",
        "origin": "https://www.digitec.ch",
        "pragma": "no-cache",
        "referer": "https://www.digitec.ch/search?q=bang%20olufsen",
        "sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"',
        "sec-fetch-dest": "empty",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "same-origin",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
        "x-dg-country": "ch",
        "x-dg-mandator": "406802",
        "x-dg-portal": "25",
        "x-dg-testgroup": "Default"
    }

    search = 'lg'
    offset = '0'
    payload = '{"query":"query ENTER_SEARCH(\t$query: String!\t$sortOrder: ProductSort\t$limit: Int = 9\t$offset: Int = 0\t$filters: [SearchFilter]\t$include: [String!]\t$exclude: [String!]\t$searchQueryId: String\t$siteId: String) {\tsearch(\t\tquery: $query\t\tfilters: $filters\t\tsearchQueryId: $searchQueryId\t\tsiteId: $siteId\t) {\t\tproducts(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\t\t\ttotal\t\t\thasMore\t\t\tnextOffset\t\t\tresults {\t\t\t\t...ProductSearchResult\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tfilters(include: $include, exclude: $exclude) {\t\t\tproduct {\t\t\t\tidentifier\t\t\t\tname\t\t\t\tfilterType\t\t\t\tscore\t\t\t\ttooltip {\t\t\t\t\t...FilterTooltipResult\t\t\t\t\t__typename\t\t\t\t}\t\t\t\t...CheckboxSearchFilterResult\t\t\t\t...RangeSearchFilterResult\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tmagazinePages(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tauthors(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tdiscussions(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tquestions(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tratings(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\t__typename\t\t}\t\tproductTypes(limit: 24) {\t\t\ttotal\t\t\tresults {\t\t\t\tid\t\t\t\tname\t\t\t\tprimarySynonyms\t\t\t\tisVisible\t\t\t\tdescription\t\t\t\tmetaDescription\t\t\t\timageUrl\t\t\t\tsearchScore\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tbrands(limit: 24) {\t\t\ttotal\t\t\tresults {\t\t\t\tid\t\t\t\ttitle\t\t\t\tsearchScore\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\thelp(limit: 3) {\t\t\tids {\t\t\t\tid\t\t\t\tscore\t\t\t\t__typename\t\t\t}\t\t\ttotal\t\t\thasMore\t\t\tresults {\t\t\t\tsearchScore\t\t\t\ttitle\t\t\t\tid\t\t\t\turl\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\t_meta {\t\t\tqueryInfo {\t\t\t\tcorrectedQuery\t\t\t\tdidYouMeanQuery\t\t\t\tlastProductSearchPass\t\t\t\texecutedSearchTerm\t\t\t\ttestGroup\t\t\t\tisManagedQuery\t\t\t\tisRerankedQuery\t\t\t\t__typename\t\t\t}\t\t\tredirectionUrl\t\t\tportalReferral {\t\t\t\tproductCount\t\t\t\tportalName\t\t\t\turl\t\t\t\tproductImageUrls\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\t__typename\t}}fragment ProductSearchResult on ProductSearchResultItem {\tsearchScore\tmandatorSpecificData {\t\t...ProductMandatorSpecific\t\t__typename\t}\tproduct {\t\t...ProductMandatorIndependent\t\t__typename\t}\toffer {\t\t...ProductOffer\t\t__typename\t}\t__typename}fragment FilterTooltipResult on FilterTooltip {\ttext\tmoreInformationLink\t__typename}fragment CheckboxSearchFilterResult on CheckboxSearchFilter {\toptions {\t\tidentifier\t\tname\t\tproductCount\t\tscore\t\treferenceValue {\t\t\tvalue\t\t\tunit {\t\t\t\tabbreviation\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\tpreferredValue {\t\t\tvalue\t\t\tunit {\t\t\t\tabbreviation\t\t\t\t__typename\t\t\t}\t\t\t__typename\t\t}\t\ttooltip {\t\t\t...FilterTooltipResult\t\t\t__typename\t\t}\t\t__typename\t}\t__typename}fragment RangeSearchFilterResult on RangeSearchFilter {\treferenceMin\tpreferredMin\treferenceMax\tpreferredMax\treferenceStepSize\tpreferredStepSize\trangeMergeInfo {\t\tisBottomMerged\t\tisTopMerged\t\t__typename\t}\treferenceUnit {\t\tabbreviation\t\t__typename\t}\tpreferredUnit {\t\tabbreviation\t\t__typename\t}\trangeFilterDataPoint {\t\t...RangeFilterDataPointResult\t\t__typename\t}\t__typename}fragment ProductMandatorSpecific on MandatorSpecificData {\tisBestseller\tisDeleted\tshowroomSites\tsectorIds\t__typename}fragment ProductMandatorIndependent on ProductV2 {\tid\tproductId\tname\tnameProperties\tproductTypeId\tproductTypeName\tbrandId\tbrandName\taverageRating\ttotalRatings\ttotalQuestions\tisProductSet\timages {\t\turl\t\theight\t\twidth\t\t__typename\t}\tenergyEfficiency {\t\tenergyEfficiencyColorType\t\tenergyEfficiencyLabelText\t\tenergyEfficiencyLabelSigns\t\tenergyEfficiencyImage {\t\t\turl\t\t\theight\t\t\twidth\t\t\t__typename\t\t}\t\t__typename\t}\tseo {\t\tseoProductTypeName\t\tseoNameProperties\t\tproductGroups {\t\t\tproductGroup1\t\t\tproductGroup2\t\t\tproductGroup3\t\t\tproductGroup4\t\t\t__typename\t\t}\t\tgtin\t\t__typename\t}\thasVariants\tsmallDimensions\tbasePrice {\t\tpriceFactor\t\tvalue\t\t__typename\t}\t__typename}fragment ProductOffer on OfferV2 {\tid\tproductId\tofferId\tshopOfferId\tprice {\t\tamountIncl\t\tamountExcl\t\tcurrency\t\tfraction\t\t__typename\t}\tdeliveryOptions {\t\tmail {\t\t\tclassification\t\t\tfutureReleaseDate\t\t\t__typename\t\t}\t\tpickup {\t\t\tsiteId\t\t\tclassification\t\t\tfutureReleaseDate\t\t\t__typename\t\t}\t\tdetailsProvider {\t\t\tproductId\t\t\tofferId\t\t\tquantity\t\t\ttype\t\t\t__typename\t\t}\t\t__typename\t}\tlabel\ttype\tvolumeDiscountPrices {\t\tminAmount\t\tprice {\t\t\tamountIncl\t\t\tamountExcl\t\t\tcurrency\t\t\t__typename\t\t}\t\tisDefault\t\t__typename\t}\tsalesInformation {\t\tnumberOfItems\t\tnumberOfItemsSold\t\tisEndingSoon\t\tvalidFrom\t\t__typename\t}\tincentiveText\tisIncentiveCashback\tisNew\tisSalesPromotion\thideInProductDiscovery\tcanAddToBasket\thidePrice\tinsteadOfPrice {\t\ttype\t\tprice {\t\t\tamountIncl\t\t\tamountExcl\t\t\tcurrency\t\t\tfraction\t\t\t__typename\t\t}\t\t__typename\t}\tminOrderQuantity\t__typename}fragment RangeFilterDataPointResult on RangeFilterDataPoint {\tcount\treferenceValue {\t\tvalue\t\tunit {\t\t\tabbreviation\t\t\t__typename\t\t}\t\t__typename\t}\tpreferredValue {\t\tvalue\t\tunit {\t\t\tabbreviation\t\t\t__typename\t\t}\t\t__typename\t}\t__typename}\",\"variables\":{\"limit\":100,\"offset\":'+offset+',\"query\":\"'+search+'\",\"filters\":[],\"sortOrder\":null,\"include\":[\"bra\",\"pt\",\"pr\"],\"exclude\":[\"off\"],\"searchQueryId\":\"4ce81461-09e2-4f7a-bb9a-8f6f8503fdc4\",\"siteId\":null},\"operationName\":\"ENTER_SEARCH\"}'

    response = requests.request("POST", url, data=payload, headers=headers)
    print(response)
    data = response.json()
    print(json.dumps(data, indent=2))
    print(json.dumps(data))

if __name__ == '__main__':
    scrape_digitec()

您需要将您的负载格式化为 json 格式(python 的 dictionary/lists),然后使用 json 参数,而不是 data:

import requests
import json

def scrape_digitec():
    url = "https://www.digitec.ch/api/graphql"
    headers = {
        "authority": "www.digitec.ch",
        "accept": "application/json",
        "accept-language": "de-CH",
        "cache-control": "no-cache",
        "content-type": "application/json",
        "origin": "https://www.digitec.ch",
        "pragma": "no-cache",
        "referer": "https://www.digitec.ch/search?q=bang%20olufsen",
        "sec-ch-ua": '"Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"',
        "sec-fetch-dest": "empty",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "same-origin",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
        "x-dg-country": "ch",
        "x-dg-mandator": "406802",
        "x-dg-portal": "25",
        "x-dg-testgroup": "Default"
    }
    
    search = 'lg'
    offset = 0
    payload = [{
        "operationName":"ENTER_SEARCH",
        "variables":{
            "limit":24,
            "offset":offset,
            "query":search,
            "filters":[],
            #"sortOrder":null,
            "include":["bra","pt","pr","off"],
            "searchQueryId":"e1b620fc-bf9c-41c6-85c0-cc49e5d12e25",
            #"siteId":null},
            },
        "query":"query ENTER_SEARCH($query: String!, $sortOrder: ProductSort, $limit: Int = 9, $offset: Int = 0, $filters: [SearchFilter], $include: [String!], $exclude: [String!], $searchQueryId: String, $siteId: String) {\n  search(\n    query: $query\n    filters: $filters\n    searchQueryId: $searchQueryId\n    siteId: $siteId\n  ) {\n    products(limit: $limit, offset: $offset, sortOrder: $sortOrder) {\n      total\n      hasMore\n      nextOffset\n      results {\n        ...ProductSearchResult\n        __typename\n      }\n      __typename\n    }\n    filters(include: $include, exclude: $exclude) {\n      product {\n        identifier\n        name\n        filterType\n        score\n        tooltip {\n          ...FilterTooltipResult\n          __typename\n        }\n        ...CheckboxSearchFilterResult\n        ...RangeSearchFilterResult\n        __typename\n      }\n      __typename\n    }\n    magazinePages(limit: 3) {\n      ids {\n        id\n        score\n        __typename\n      }\n      total\n      __typename\n    }\n    authors(limit: 3) {\n      ids {\n        id\n        score\n        __typename\n      }\n      total\n      __typename\n    }\n    discussions(limit: 3) {\n      ids {\n        id\n        score\n        __typename\n      }\n      total\n      __typename\n    }\n    questions(limit: 3) {\n      ids {\n        id\n        score\n        __typename\n      }\n      total\n      __typename\n    }\n    ratings(limit: 3) {\n      ids {\n        id\n        score\n        __typename\n      }\n      total\n      __typename\n    }\n    productTypes(limit: 24) {\n      total\n      results {\n        id\n        name\n        primarySynonyms\n        isVisible\n        description\n        metaDescription\n        imageUrl\n        searchScore\n        __typename\n      }\n      __typename\n    }\n    brands(limit: 24) {\n      total\n      results {\n        id\n        title\n        searchScore\n        __typename\n      }\n      __typename\n    }\n    _meta {\n      queryInfo {\n        correctedQuery\n        didYouMeanQuery\n        lastProductSearchPass\n        executedSearchTerm\n        testGroup\n        isManagedQuery\n        isRerankedQuery\n        __typename\n      }\n      redirectionUrl\n      portalReferral {\n        productCount\n        portalName\n        url\n        productImageUrls\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment ProductSearchResult on ProductSearchResultItem {\n  searchScore\n  mandatorSpecificData {\n    ...ProductMandatorSpecific\n    __typename\n  }\n  product {\n    ...ProductMandatorIndependent\n    __typename\n  }\n  offer {\n    ...ProductOffer\n    __typename\n  }\n  __typename\n}\n\nfragment FilterTooltipResult on FilterTooltip {\n  text\n  moreInformationLink\n  __typename\n}\n\nfragment CheckboxSearchFilterResult on CheckboxSearchFilter {\n  options {\n    identifier\n    name\n    productCount\n    score\n    referenceValue {\n      value\n      unit {\n        abbreviation\n        __typename\n      }\n      __typename\n    }\n    preferredValue {\n      value\n      unit {\n        abbreviation\n        __typename\n      }\n      __typename\n    }\n    tooltip {\n      ...FilterTooltipResult\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment RangeSearchFilterResult on RangeSearchFilter {\n  referenceMin\n  preferredMin\n  referenceMax\n  preferredMax\n  referenceStepSize\n  preferredStepSize\n  rangeMergeInfo {\n    isBottomMerged\n    isTopMerged\n    __typename\n  }\n  referenceUnit {\n    abbreviation\n    __typename\n  }\n  preferredUnit {\n    abbreviation\n    __typename\n  }\n  rangeFilterDataPoint {\n    ...RangeFilterDataPointResult\n    __typename\n  }\n  __typename\n}\n\nfragment ProductMandatorSpecific on MandatorSpecificData {\n  isBestseller\n  isDeleted\n  showroomSites\n  sectorIds\n  __typename\n}\n\nfragment ProductMandatorIndependent on ProductV2 {\n  id\n  productId\n  name\n  nameProperties\n  productTypeId\n  productTypeName\n  brandId\n  brandName\n  averageRating\n  totalRatings\n  totalQuestions\n  isProductSet\n  images {\n    url\n    height\n    width\n    __typename\n  }\n  energyEfficiency {\n    energyEfficiencyColorType\n    energyEfficiencyLabelText\n    energyEfficiencyLabelSigns\n    energyEfficiencyImage {\n      url\n      height\n      width\n      __typename\n    }\n    __typename\n  }\n  seo {\n    seoProductTypeName\n    seoNameProperties\n    productGroups {\n      productGroup1\n      productGroup2\n      productGroup3\n      productGroup4\n      __typename\n    }\n    gtin\n    __typename\n  }\n  hasVariants\n  smallDimensions\n  basePrice {\n    priceFactor\n    value\n    __typename\n  }\n  __typename\n}\n\nfragment ProductOffer on OfferV2 {\n  id\n  productId\n  offerId\n  shopOfferId\n  price {\n    amountIncl\n    amountExcl\n    currency\n    fraction\n    __typename\n  }\n  deliveryOptions {\n    mail {\n      classification\n      futureReleaseDate\n      __typename\n    }\n    pickup {\n      siteId\n      classification\n      futureReleaseDate\n      __typename\n    }\n    detailsProvider {\n      productId\n      offerId\n      quantity\n      type\n      __typename\n    }\n    __typename\n  }\n  label\n  type\n  volumeDiscountPrices {\n    minAmount\n    price {\n      amountIncl\n      amountExcl\n      currency\n      __typename\n    }\n    isDefault\n    __typename\n  }\n  salesInformation {\n    numberOfItems\n    numberOfItemsSold\n    isEndingSoon\n    validFrom\n    __typename\n  }\n  incentiveText\n  isIncentiveCashback\n  isNew\n  isSalesPromotion\n  hideInProductDiscovery\n  canAddToBasket\n  hidePrice\n  insteadOfPrice {\n    type\n    price {\n      amountIncl\n      amountExcl\n      currency\n      fraction\n      __typename\n    }\n    __typename\n  }\n  minOrderQuantity\n  __typename\n}\n\nfragment RangeFilterDataPointResult on RangeFilterDataPoint {\n  count\n  referenceValue {\n    value\n    unit {\n      abbreviation\n      __typename\n    }\n    __typename\n  }\n  preferredValue {\n    value\n    unit {\n      abbreviation\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n"}]
    response = requests.post(url, json=payload, headers=headers)
    print(response)
    data = response.json()
    print(json.dumps(data, indent=2))
    print(json.dumps(data))


if __name__ == '__main__':
    scrape_digitec()