机械化刮削

Scraping with Mechanize

我遇到了一个问题,即 mechanize 没有产生与浏览器相同的响应。我正在尝试从此网页中获取价格,该网页允许使用预填充 url.

将商品添加到购物车

http://store.nike.com/us/services/jcartService?callback=nike_Cart_hanleJCartResponse&action=addItem&lang_locale=en_US&country=US&catalogId=1&productId=463712&price=00.0&siteId=null&line1=Nike+Air+Max+1+Ultra+Moire&line2=Men%27s+Shoe&passcode=null&sizeType=null&skuAndSize=10661133%3A10&qty=1&rt=json&view=3&skuId=10661133&displaySize=14&_=142655682313

我有的是:

import mechanize
import urllib
import cookielib
import BeautifulSoup
import html2text

url='http://store.nike.com/us/services/jcartService?callback=nike_Cart_hanleJCartResponse&action=addItem&lang_locale=en_US&country=US&catalogId=1&productId=463712&price=00.0&siteId=null&line1=Nike+Air+Max+1+Ultra+Moire&line2=Men%27s+Shoe&passcode=null&sizeType=null&skuAndSize=10661133%3A10&qty=1&rt=json&view=3&skuId=10661133&displaySize=14&_=142655682313'

br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(False)
br.set_handle_redirect(True)
br.set_handle_referer(False)
br.set_handle_robots(True)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Chrome')]

br.open(url)
pageText=br.open(url).read()
print pageText

然后我打算做一些基本的字符串解析来获取价格。问题是,当我抓取页面时,我得到了这个:

print pageText

nike_Cart_hanleJCartResponse({
    "exceptions": [{
        "property": "catalogRefId",
        "errorcode": "noItemsToAddInStock",
        "message": "Sorry, Ѿ, this item isn't available anymore because other shoppers added it to their carts first. But if they don't check out in the next few minutes, you've still got a chance. Please try again shortly. [Code: 420N-00000000]"
    }],
    "status": "failure",
    "order": {
        "id": "dummy",
        "objType": "order",
        "itemQuantity": 0
    }
});

当它应该像在浏览器中那样返回这样的东西时:

nike_Cart_hanleJCartResponse({
    "status" :"success","order" :{
        "id" :"O1014750586",
        "objType" :"order",
        "itemQuantity" :1,
        "priceInfo" :{
            "currencyFormat" :"[=12=].00",
            "currency" :"USD",
            "amount" :"75.0",
            ....
}]}]}});

我查看了 lxml,但对如何着手感到很困惑。无法正确抓取此页面吗?

如有任何帮助,我们将不胜感激。提前致谢!

首先导航到主商店页面,以便您获得正确的 cookie。然后导航到所需的 URL:

import mechanize

store_url = 'http://store.nike.com'
cart_url = 'http://store.nike.com/us/services/jcartService?callback=nike_Cart_hanleJCartResponse&action=addItem&lang_locale=en_US&country=US&catalogId=1&productId=463712&price=00.0&siteId=null&line1=Nike+Air+Max+1+Ultra+Moire&line2=Men%27s+Shoe&passcode=null&sizeType=null&skuAndSize=10661133%3A10&qty=1&rt=json&view=3&skuId=10661133&displaySize=14&_=142655682313'

br = mechanize.Browser()
response = br.open(store_url)
response = br.open(cart_url)
data = response.read()
print data

输出

nike_Cart_hanleJCartResponse({"status" :"success","order" :{"id" :"O1014976420","objType" :"order","itemQuantity" :1,"priceInfo" :{"currencyFormat" :"[=11=].00","currency" :"USD","amount" :"75.0","isDiscounted" :false,"formattedAmount" :".00","subTotal" :75.0,"formattedSubTotal" :".00","discountAmount" :0.0,"formattedDiscountAmount" :"[=11=].00","tax" :0.0,"rawSubtotal" :75.0,"formattedRawSubtotal" :".00","formattedTax" :"[=11=].00","shipping" :0.0,"formattedShipping" :"[=11=].00","rawShipping" :0.0,"formattedRawShipping" :"[=11=].00","giftWrap" :0.0,"formattedGiftWrap" :"[=11=].00","total" :75.0,"formattedTotal" :".00"},"originOfOrder" :"0","transient" :false,"shippingGroups" :[{"id" :"SG1022772456","objType" :"shippingGroup","priceInfo" :{"currencyFormat" :"[=11=].00","currency" :"USD","amount" :"0.0","isDiscounted" :false,"formattedAmount" :"[=11=].00","rawShipping" :0.0,"formattedRawShipping" :"[=11=].00"},"shippingMethod" :"Ground Service","description" :"me","shippingMethodDisplay" :"Standard","commerceItems" :[{"id" :"CI1022253932","objType" :"commerceItem","priceInfo" :{"currencyFormat" :"[=11=].00","currency" :"USD","amount" :"75.0","isDiscounted" :false,"formattedAmount" :".00","listPrice" :75.0,"formattedListPrice" :".00","salePrice" :0.0,"formattedSalePrice" :"[=11=].00","onSale" :false,"rawTotalPrice" :75.0,"formattedRawTotalPrice" :".00","giftWrapPrice" :0.0,"formattedGiftWrapPrice" :"[=11=].00"},"validForCountry" :true,"commerceItemClassType" :"default","quantity" :1,"catalogRefId" :"10661133","catalogId" :"1","product" :{"id" :"463712","view" :"short","displayName" :"Nike Roshe Run Men's Shoe","description" :"Nike Roshe Run Men's Shoe","colorNumber" :"010","activeIndicator" :"ACTIVE","type" :"nikeProduct","styleNumber" :"511881","salePrice" :"75.0","listPrice" :"75.0","employeePrice" :"45.0","onSale" :false,"currency" :"USD","currencyFormat" :"[=11=].00","formattedListPrice" :".00","formattedSalePrice" :".00","colorDescription" :"Black/Sail/Anthracite","prdGroupId" :"943980","nikeType" :"FOOTWEAR","dynamicAttributes" :{"includepromo" :"GIFTS2014","nikeidmatchstyle" :"704691","width" :"Regular","nikeidmatch" :"true","publishdate" :"05/11/2010","nikeidmatchproductid" :"1094199","divisioncode" :"20","productdisplayorder" :"7","simplecolor" :"BLACK","quantitylimit" :"2","giftwrap" :"true","modelType" :"FOOTWEAR","swatchcolorhex" :"000000","gender" :"Men","classid" :"120001"}},"shipTo" :null,"giftMessage" :null,"giftMessageType" :0,"itemSource" :"SC","sizeType" :"","displaySize" :"14","sizeDescription" :"14","eanNumber" :"00675911199978","colorNumber" :"010","colorDescription" :"Black/Sail/Anthracite"}]}]}});