Python Beautifulsoup 在 span 中抓取了错误的数据
Python Beautifulsoup grabbed the wrong data inside spans
我需要在 url 中获取一些额外的数据:https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde。有些数据我试过抓取returns错误的数据
from bs4 import BeautifulSoup
from urllib import request
from urllib.request import Request, urlopen
url = 'https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde'
headers = {'User-Agent': 'Mozilla/5.0'}
req = Request(url, headers=headers)
html = urlopen(req).read()
soup = BeautifulSoup(html, 'html.parser')
blockconf = soup.find('span', class_='u-label u-label--xs u-label--badge-in u-label--secondary ml-1').text
fromaddr = soup.find('span', id='spanFromAdd').text
toaddr = soup.find('span', class_='hash-tag text-truncate hash-tag-custom-from tooltip-address').text
transcount = soup.find('span', class_='badge badge-pill badge-secondary align-midle').text
val = soup.find('span', class_='u-label u-label--value u-label--secondary text-dark rounded mr-1').text
transfee = soup.find('span', id='ContentPlaceHolder1_spanTxFee').text
print ("Block Number: ", str(blockconf))
print("From Address: " + str(fromaddr))
print ("To Address: " + str(toaddr))
print ("Transfer Count: " + str(transcount))
print("Value: " + str(val))
print("Transaction Fee: " + str(transfee))
当前输出:
Block Number: 792 Block Confirmations #-- wrong data
From Address: 0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address: 0x53a01b184df7ac3c5f839c65fd120c693dda274c #-- wrong data
Transfer Count: 8
Value: 0 BNB
Transaction Fee: 0.00529533 BNB (.60)
想要的输出:
Block Number: 9428747 792 Block Confirmations #-- wanted data
From Address: 0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address: PancakeSwap: Router v2 #-- wanted data
Transfer Count: 8
Value: 0 BNB
Transaction Fee: 0.00529533 BNB (.60)
尝试(注意:我从 "Interacted With (To):"
行中抓取了 (PancakeSwap: Router v2)
):
from bs4 import BeautifulSoup
from urllib import request
from urllib.request import Request, urlopen
url = "https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde"
headers = {"User-Agent": "Mozilla/5.0"}
req = Request(url, headers=headers)
html = urlopen(req).read()
soup = BeautifulSoup(html, "html.parser")
blockconf = soup.find(
"span",
class_="u-label u-label--xs u-label--badge-in u-label--secondary ml-1",
).parent.text.strip()
fromaddr = soup.find("span", id="spanFromAdd").text
toaddr = soup.select_one("#contractCopy ~ .mr-1").text
transcount = soup.find(
"span", class_="badge badge-pill badge-secondary align-midle"
).text
val = soup.find(
"span",
class_="u-label u-label--value u-label--secondary text-dark rounded mr-1",
).text
transfee = soup.find("span", id="ContentPlaceHolder1_spanTxFee").text
print("Block Number: ", str(blockconf))
print("From Address: " + str(fromaddr))
print("To Address: " + str(toaddr))
print("Transfer Count: " + str(transcount))
print("Value: " + str(val))
print("Transaction Fee: " + str(transfee))
打印:
Block Number: 9428747 3013 Block Confirmations
From Address: 0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address: (PancakeSwap: Router v2)
Transfer Count: 8
Value: 0 BNB
Transaction Fee: 0.00529533 BNB (.60)
编辑:要获得 0x10ed43c718714eb63d5aa57b78b54704e256024e (PancakeSwap: Router v2)
使用此
#...
toaddr = (
soup.select_one("#contractCopy").text
+ " "
+ soup.select_one("#contractCopy ~ .mr-1").text
)
#...
我需要在 url 中获取一些额外的数据:https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde。有些数据我试过抓取returns错误的数据
from bs4 import BeautifulSoup
from urllib import request
from urllib.request import Request, urlopen
url = 'https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde'
headers = {'User-Agent': 'Mozilla/5.0'}
req = Request(url, headers=headers)
html = urlopen(req).read()
soup = BeautifulSoup(html, 'html.parser')
blockconf = soup.find('span', class_='u-label u-label--xs u-label--badge-in u-label--secondary ml-1').text
fromaddr = soup.find('span', id='spanFromAdd').text
toaddr = soup.find('span', class_='hash-tag text-truncate hash-tag-custom-from tooltip-address').text
transcount = soup.find('span', class_='badge badge-pill badge-secondary align-midle').text
val = soup.find('span', class_='u-label u-label--value u-label--secondary text-dark rounded mr-1').text
transfee = soup.find('span', id='ContentPlaceHolder1_spanTxFee').text
print ("Block Number: ", str(blockconf))
print("From Address: " + str(fromaddr))
print ("To Address: " + str(toaddr))
print ("Transfer Count: " + str(transcount))
print("Value: " + str(val))
print("Transaction Fee: " + str(transfee))
当前输出:
Block Number: 792 Block Confirmations #-- wrong data
From Address: 0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address: 0x53a01b184df7ac3c5f839c65fd120c693dda274c #-- wrong data
Transfer Count: 8
Value: 0 BNB
Transaction Fee: 0.00529533 BNB (.60)
想要的输出:
Block Number: 9428747 792 Block Confirmations #-- wanted data
From Address: 0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address: PancakeSwap: Router v2 #-- wanted data
Transfer Count: 8
Value: 0 BNB
Transaction Fee: 0.00529533 BNB (.60)
尝试(注意:我从 "Interacted With (To):"
行中抓取了 (PancakeSwap: Router v2)
):
from bs4 import BeautifulSoup
from urllib import request
from urllib.request import Request, urlopen
url = "https://bscscan.com/tx/0x86d46163d35626df273bbed53cbdba26959d8ab7eb536af0f526dfa937c75dde"
headers = {"User-Agent": "Mozilla/5.0"}
req = Request(url, headers=headers)
html = urlopen(req).read()
soup = BeautifulSoup(html, "html.parser")
blockconf = soup.find(
"span",
class_="u-label u-label--xs u-label--badge-in u-label--secondary ml-1",
).parent.text.strip()
fromaddr = soup.find("span", id="spanFromAdd").text
toaddr = soup.select_one("#contractCopy ~ .mr-1").text
transcount = soup.find(
"span", class_="badge badge-pill badge-secondary align-midle"
).text
val = soup.find(
"span",
class_="u-label u-label--value u-label--secondary text-dark rounded mr-1",
).text
transfee = soup.find("span", id="ContentPlaceHolder1_spanTxFee").text
print("Block Number: ", str(blockconf))
print("From Address: " + str(fromaddr))
print("To Address: " + str(toaddr))
print("Transfer Count: " + str(transcount))
print("Value: " + str(val))
print("Transaction Fee: " + str(transfee))
打印:
Block Number: 9428747 3013 Block Confirmations
From Address: 0xf3b1c9f3bbc351d90c7a539b26d789af57c869f7
To Address: (PancakeSwap: Router v2)
Transfer Count: 8
Value: 0 BNB
Transaction Fee: 0.00529533 BNB (.60)
编辑:要获得 0x10ed43c718714eb63d5aa57b78b54704e256024e (PancakeSwap: Router v2)
使用此
#...
toaddr = (
soup.select_one("#contractCopy").text
+ " "
+ soup.select_one("#contractCopy ~ .mr-1").text
)
#...