使用 Python Selenium 从 Javascript 代码获取数据
Get Data from Javascript codes with Python Selenium
我正在尝试使用 Selenium 从网站获取 javascript 数据。但是,我无法实现我想要的。下面是 javascript 代码的屏幕截图。我想提取这些代码中的条码编号和变化值(例如36号,37号)。我怎样才能最好地做到这一点?感谢您的帮助。
Web page javascript code
编辑 1:
Javascript代码:
window.PRODUCT_DETAIL_APP_INITIAL_STATE={"product":{"attributes":[],"alternativeVariants":[],"variants" :[{"attributeId":338,"attributeName":"Beden","attributeType":"Size","attributeValue":"36","stamps":[{"type":1,"text":" KARGO BEDAVA"},{"type":3,"text":"%35"}],"price":{"profitMargin":0,"discountedPrice":{"text":"143,99 TL", "value":143.99},"sellingPrice":{"text":"159,99 TL","value":159.99},"originalPrice":{"text":"219,99 TL","value": 219.99},"currency":"TRY"},"fulfilmentType":"mp","attributeBeautifiedValue":"36","isWinner":true,"listingId":"cb32c614f54395f199628ef7066ab102","stock":null,"sellable ":true,"availableForClaim":true,"barcode":"100406115","itemNumber":118134976,"discountedPriceInfo":"Sepette %10 İndirim","hasCollectable":false,"unitInfo":{"unitPrice": false,"unitPriceText":false},"rushDeliveryMerchantListingExist":false}],"otherMerchants":[],"campaign":{"id":591963,"name":"Kadın Giyimde Avantajlı Ürünler - Sepette %10 İndirim" “开始日期”:“2021-11-30T14:30:05”,“结束日期”:“2021-12-07T05:0 0:00","isMultipleSupplied":false,"stockTypeId":7,"url":"/kadin-giyimde-avantajli-urunler---sepette--10-indirim/butikdetay/591963","showTimer":false},"category":{"id":103718 "name":"Klasik Topuklu Ayakkabı","hierarchy":"Ayakkabı/Topuklu Ayakkabı/Klasik Topuklu Ayakkabı","refundable":true,"beautifiedName":"klasik-topuklu-ayakkabi","isVASEnabled":false} ,"brand":{"isVirtual":false,"beautifiedName":"laminta","id":205748,"name":"LAMİNTA","path":"/laminta-x-b205748"},"color ":"GÜMÜŞ ŞEFFAF","metaBrand":{"id":205748,"name":"LAMİNTA","beautifiedName":"","isVirtual":false,"path":""},"showVariants" :true,"showSexualContent":true,"brandCategoryBanners":[],"allVariants":[{"itemNumber":118134976,"value":"36","inStock":true,"currency":"TRY", "barcode":"100406115","price":143.99},{"itemNumber":118134979,"value":"37","inStock":true,"currency":"TRY","barcode":"100406116 ","price":143.99},{"itemNumber":118134975,"value":"38","inStock":true,"currency":"TRY","barcode":"100406117","price": 143.99},{"itemNumber":118134983,"value":"39","inStock":true,"currency":"TRY","barcode ":"100406118","price":143.99},{"itemNumber":118123516,"value":"40","inStock":true,"currency":"TRY","barcode":"100406119", "价格":143.99}]
我的简单代码如下。我得到一个带有以下代码的条形码。但是,我无法为每个变体提取条形码和其他信息。
def barkodBul():
if ('barcode' in driver.page_source):
p_id = driver.find_elements_by_tag_name("script")
for script in p_id:
innerHTML = script.get_property('innerHTML')
task = re.search('barcode', innerHTML)
if (task):
barkod = [l for l in innerHTML.split("{") if "barcode" in l][0]
barkod = barkod.split(",")
for k in range(0,len(barkod)):
if('barcode' in barkod[k]):
barkod = barkod[k].split(":")
print barkod[1]
barkod = barkod[1].replace('"', '')
print barkod
return barkod
return 0
我想做的事情:
36条码For36
37 BarcodeFor37 等
编辑 2 - 解决方案:谢谢 esqew
def barkodBul2():
time.sleep(3)
timeout = 30
WebDriverWait(driver, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME, "pr-new-br")))
product_detail = driver.execute_script('return window.__PRODUCT_DETAIL_APP_INITIAL_STATE__')
for variant in product_detail['product']['variants']:
print variant['attributeValue'] + " " + variant['barcode']
Output:
36 100406115
37 100406116
38 100406117
39 100406118
40 100406119
当数据结构良好地作为对象时,为什么要为此使用 RegExp?直接用 Selenium 提取内容,然后从那里访问你想要的数据:
def barkodBul():
product_detail = driver.execute_script('return window.PRODUCT_DETAIL_APP_INITIAL_STATE'))
for variant in product_detail['product']['allVariants']:
print variant['value'] + " " + variant['barcode']
我正在尝试使用 Selenium 从网站获取 javascript 数据。但是,我无法实现我想要的。下面是 javascript 代码的屏幕截图。我想提取这些代码中的条码编号和变化值(例如36号,37号)。我怎样才能最好地做到这一点?感谢您的帮助。
Web page javascript code
编辑 1:
Javascript代码:
window.PRODUCT_DETAIL_APP_INITIAL_STATE={"product":{"attributes":[],"alternativeVariants":[],"variants" :[{"attributeId":338,"attributeName":"Beden","attributeType":"Size","attributeValue":"36","stamps":[{"type":1,"text":" KARGO BEDAVA"},{"type":3,"text":"%35"}],"price":{"profitMargin":0,"discountedPrice":{"text":"143,99 TL", "value":143.99},"sellingPrice":{"text":"159,99 TL","value":159.99},"originalPrice":{"text":"219,99 TL","value": 219.99},"currency":"TRY"},"fulfilmentType":"mp","attributeBeautifiedValue":"36","isWinner":true,"listingId":"cb32c614f54395f199628ef7066ab102","stock":null,"sellable ":true,"availableForClaim":true,"barcode":"100406115","itemNumber":118134976,"discountedPriceInfo":"Sepette %10 İndirim","hasCollectable":false,"unitInfo":{"unitPrice": false,"unitPriceText":false},"rushDeliveryMerchantListingExist":false}],"otherMerchants":[],"campaign":{"id":591963,"name":"Kadın Giyimde Avantajlı Ürünler - Sepette %10 İndirim" “开始日期”:“2021-11-30T14:30:05”,“结束日期”:“2021-12-07T05:0 0:00","isMultipleSupplied":false,"stockTypeId":7,"url":"/kadin-giyimde-avantajli-urunler---sepette--10-indirim/butikdetay/591963","showTimer":false},"category":{"id":103718 "name":"Klasik Topuklu Ayakkabı","hierarchy":"Ayakkabı/Topuklu Ayakkabı/Klasik Topuklu Ayakkabı","refundable":true,"beautifiedName":"klasik-topuklu-ayakkabi","isVASEnabled":false} ,"brand":{"isVirtual":false,"beautifiedName":"laminta","id":205748,"name":"LAMİNTA","path":"/laminta-x-b205748"},"color ":"GÜMÜŞ ŞEFFAF","metaBrand":{"id":205748,"name":"LAMİNTA","beautifiedName":"","isVirtual":false,"path":""},"showVariants" :true,"showSexualContent":true,"brandCategoryBanners":[],"allVariants":[{"itemNumber":118134976,"value":"36","inStock":true,"currency":"TRY", "barcode":"100406115","price":143.99},{"itemNumber":118134979,"value":"37","inStock":true,"currency":"TRY","barcode":"100406116 ","price":143.99},{"itemNumber":118134975,"value":"38","inStock":true,"currency":"TRY","barcode":"100406117","price": 143.99},{"itemNumber":118134983,"value":"39","inStock":true,"currency":"TRY","barcode ":"100406118","price":143.99},{"itemNumber":118123516,"value":"40","inStock":true,"currency":"TRY","barcode":"100406119", "价格":143.99}]
我的简单代码如下。我得到一个带有以下代码的条形码。但是,我无法为每个变体提取条形码和其他信息。
def barkodBul():
if ('barcode' in driver.page_source):
p_id = driver.find_elements_by_tag_name("script")
for script in p_id:
innerHTML = script.get_property('innerHTML')
task = re.search('barcode', innerHTML)
if (task):
barkod = [l for l in innerHTML.split("{") if "barcode" in l][0]
barkod = barkod.split(",")
for k in range(0,len(barkod)):
if('barcode' in barkod[k]):
barkod = barkod[k].split(":")
print barkod[1]
barkod = barkod[1].replace('"', '')
print barkod
return barkod
return 0
我想做的事情: 36条码For36 37 BarcodeFor37 等
编辑 2 - 解决方案:谢谢 esqew
def barkodBul2():
time.sleep(3)
timeout = 30
WebDriverWait(driver, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME, "pr-new-br")))
product_detail = driver.execute_script('return window.__PRODUCT_DETAIL_APP_INITIAL_STATE__')
for variant in product_detail['product']['variants']:
print variant['attributeValue'] + " " + variant['barcode']
Output:
36 100406115
37 100406116
38 100406117
39 100406118
40 100406119
当数据结构良好地作为对象时,为什么要为此使用 RegExp?直接用 Selenium 提取内容,然后从那里访问你想要的数据:
def barkodBul():
product_detail = driver.execute_script('return window.PRODUCT_DETAIL_APP_INITIAL_STATE'))
for variant in product_detail['product']['allVariants']:
print variant['value'] + " " + variant['barcode']