在 Python beautifulsoup 中提取 tr 值的 href link
Extracting the href link of a tr value in Python beautifulsoup
下面的代码片段有效。作为改进它的一部分,我想提取 href link 并将其添加到正在显示的数据列表中。
import requests
from bs4 import BeautifulSoup
from itertools import groupby
url = "https://bscscan.com/tokentxns"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
data = []
for tr in soup.select("tr:has(td)"):
tds = [td.get_text(strip=True) for td in tr.select("td")]
_, txn_hash, tm, age, from_, _, to_, value, token = tds
data.append((token, value, txn_hash))
data = sorted(data)
for _, g in groupby(data, lambda k: k[0]):
g = list(map(list, g))
for subl in g[1:]:
subl[0] = ""
for subl in g:
print("{:<27} {:<27} {:<60}".format(*subl))
print()
当前输出:
Wrapped BNB (WBNB) 0.013344799772136381 0xe45d252ffd82e6720ea1993f95670bb03f130fbe129800d0353e6a54b47f1ab1
0.01534839792812691 0x2a6519d13e3bed1b14724a3712a10ee902941c1de9c1fc23f81c438a6954e353
0.018368 0x9cc3beff8b8e70a265fca4f3c95eb5ef3ea01a8731241a22214f0c25e61effa3
CryptoBlades...(SKILL) 0.971749999999999991 0x885026cd5c6aa9788bc1ef37b4d94f185384d2fcf31a8f44e605bd597b41c9d8
0.971749999999999991 0xe0afde7005bde28039ee2ab9c9260df4feed43ec904870c796fa197a12ded1d4
0.971749999999999991 0xe94a019d6e473a5485bff9e3e732a9bb9f7e35d4d040cea1866463d771ffbd42
需要改进:# 添加代币名称的 href link (link, token, values, txnhash)
https://bscscan.com/token/0xbb4cdb9cbd36b01bd1cbaebf2de08d9173bc095c Wrapped BNB (WBNB) 0.013344799772136381 0xe45d252ffd82e6720ea1993f95670bb03f130fbe129800d0353e6a54b47f1ab1
0.01534839792812691 0x2a6519d13e3bed1b14724a3712a10ee902941c1de9c1fc23f81c438a6954e353
0.018368 0x9cc3beff8b8e70a265fca4f3c95eb5ef3ea01a8731241a22214f0c25e61effa3
https://bscscan.com/token/0x154a9f9cbd3449ad22fdae23044319d6ef2a1fab CryptoBlades...(SKILL) 0.971749999999999991 0x885026cd5c6aa9788bc1ef37b4d94f185384d2fcf31a8f44e605bd597b41c9d8
0.971749999999999991 0xe0afde7005bde28039ee2ab9c9260df4feed43ec904870c796fa197a12ded1d4
0.971749999999999991 0xe94a019d6e473a5485bff9e3e732a9bb9f7e35d4d040cea1866463d771ffbd42
不确定您将如何对打印格式进行排序以使这么多列看起来不错,但您可以将基础 url 字符串定义为:
base = 'https://bscscan.com'
然后将链接附加到数据中:
for tr in soup.select("tr:has(td)"):
tds = [td.get_text(strip=True) for td in tr.select("td")]
_, txn_hash, tm, age, from_, _, to_, value, token = tds
data.append((txn_hash, token, value,
base + tr.select_one('td:nth-child(2) a')['href'], #hash_link
base + tr.select_one('td:nth-child(5) a')['href'], #from_link
base + tr.select_one('td:nth-child(7) a')['href'], #to_link
base + tr.select_one('td:nth-child(9) a')['href'] #token_link
))
下面的代码片段有效。作为改进它的一部分,我想提取 href link 并将其添加到正在显示的数据列表中。
import requests
from bs4 import BeautifulSoup
from itertools import groupby
url = "https://bscscan.com/tokentxns"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
data = []
for tr in soup.select("tr:has(td)"):
tds = [td.get_text(strip=True) for td in tr.select("td")]
_, txn_hash, tm, age, from_, _, to_, value, token = tds
data.append((token, value, txn_hash))
data = sorted(data)
for _, g in groupby(data, lambda k: k[0]):
g = list(map(list, g))
for subl in g[1:]:
subl[0] = ""
for subl in g:
print("{:<27} {:<27} {:<60}".format(*subl))
print()
当前输出:
Wrapped BNB (WBNB) 0.013344799772136381 0xe45d252ffd82e6720ea1993f95670bb03f130fbe129800d0353e6a54b47f1ab1
0.01534839792812691 0x2a6519d13e3bed1b14724a3712a10ee902941c1de9c1fc23f81c438a6954e353
0.018368 0x9cc3beff8b8e70a265fca4f3c95eb5ef3ea01a8731241a22214f0c25e61effa3
CryptoBlades...(SKILL) 0.971749999999999991 0x885026cd5c6aa9788bc1ef37b4d94f185384d2fcf31a8f44e605bd597b41c9d8
0.971749999999999991 0xe0afde7005bde28039ee2ab9c9260df4feed43ec904870c796fa197a12ded1d4
0.971749999999999991 0xe94a019d6e473a5485bff9e3e732a9bb9f7e35d4d040cea1866463d771ffbd42
需要改进:# 添加代币名称的 href link (link, token, values, txnhash)
https://bscscan.com/token/0xbb4cdb9cbd36b01bd1cbaebf2de08d9173bc095c Wrapped BNB (WBNB) 0.013344799772136381 0xe45d252ffd82e6720ea1993f95670bb03f130fbe129800d0353e6a54b47f1ab1
0.01534839792812691 0x2a6519d13e3bed1b14724a3712a10ee902941c1de9c1fc23f81c438a6954e353
0.018368 0x9cc3beff8b8e70a265fca4f3c95eb5ef3ea01a8731241a22214f0c25e61effa3
https://bscscan.com/token/0x154a9f9cbd3449ad22fdae23044319d6ef2a1fab CryptoBlades...(SKILL) 0.971749999999999991 0x885026cd5c6aa9788bc1ef37b4d94f185384d2fcf31a8f44e605bd597b41c9d8
0.971749999999999991 0xe0afde7005bde28039ee2ab9c9260df4feed43ec904870c796fa197a12ded1d4
0.971749999999999991 0xe94a019d6e473a5485bff9e3e732a9bb9f7e35d4d040cea1866463d771ffbd42
不确定您将如何对打印格式进行排序以使这么多列看起来不错,但您可以将基础 url 字符串定义为:
base = 'https://bscscan.com'
然后将链接附加到数据中:
for tr in soup.select("tr:has(td)"):
tds = [td.get_text(strip=True) for td in tr.select("td")]
_, txn_hash, tm, age, from_, _, to_, value, token = tds
data.append((txn_hash, token, value,
base + tr.select_one('td:nth-child(2) a')['href'], #hash_link
base + tr.select_one('td:nth-child(5) a')['href'], #from_link
base + tr.select_one('td:nth-child(7) a')['href'], #to_link
base + tr.select_one('td:nth-child(9) a')['href'] #token_link
))