在 Python beautifulsoup 中提取 tr 值的 href link

Extracting the href link of a tr value in Python beautifulsoup

下面的代码片段有效。作为改进它的一部分,我想提取 href link 并将其添加到正在显示的数据列表中。

import requests
from bs4 import BeautifulSoup
from itertools import groupby

url = "https://bscscan.com/tokentxns"

soup = BeautifulSoup(requests.get(url).content, "html.parser")

data = []
for tr in soup.select("tr:has(td)"):
    tds = [td.get_text(strip=True) for td in tr.select("td")]
    _, txn_hash, tm, age, from_, _, to_, value, token = tds
    data.append((token, value, txn_hash))

data = sorted(data)
for _, g in groupby(data, lambda k: k[0]):
    g = list(map(list, g))
    for subl in g[1:]:
        subl[0] = ""

    for subl in g:
        print("{:<27} {:<27} {:<60}".format(*subl))
    print()

当前输出:

    Wrapped BNB (WBNB)      0.013344799772136381        0xe45d252ffd82e6720ea1993f95670bb03f130fbe129800d0353e6a54b47f1ab1
                            0.01534839792812691         0x2a6519d13e3bed1b14724a3712a10ee902941c1de9c1fc23f81c438a6954e353
                            0.018368                    0x9cc3beff8b8e70a265fca4f3c95eb5ef3ea01a8731241a22214f0c25e61effa3
                            
    CryptoBlades...(SKILL)  0.971749999999999991        0x885026cd5c6aa9788bc1ef37b4d94f185384d2fcf31a8f44e605bd597b41c9d8
                            0.971749999999999991        0xe0afde7005bde28039ee2ab9c9260df4feed43ec904870c796fa197a12ded1d4
                            0.971749999999999991        0xe94a019d6e473a5485bff9e3e732a9bb9f7e35d4d040cea1866463d771ffbd42

需要改进:# 添加代币名称的 href link (link, token, values, txnhash)

    https://bscscan.com/token/0xbb4cdb9cbd36b01bd1cbaebf2de08d9173bc095c    Wrapped BNB (WBNB)      0.013344799772136381        0xe45d252ffd82e6720ea1993f95670bb03f130fbe129800d0353e6a54b47f1ab1
                                                                                                    0.01534839792812691         0x2a6519d13e3bed1b14724a3712a10ee902941c1de9c1fc23f81c438a6954e353
                                                                                                    0.018368                    0x9cc3beff8b8e70a265fca4f3c95eb5ef3ea01a8731241a22214f0c25e61effa3
                            
    https://bscscan.com/token/0x154a9f9cbd3449ad22fdae23044319d6ef2a1fab    CryptoBlades...(SKILL)  0.971749999999999991        0x885026cd5c6aa9788bc1ef37b4d94f185384d2fcf31a8f44e605bd597b41c9d8
                                                                                                    0.971749999999999991        0xe0afde7005bde28039ee2ab9c9260df4feed43ec904870c796fa197a12ded1d4
                                                                                                    0.971749999999999991        0xe94a019d6e473a5485bff9e3e732a9bb9f7e35d4d040cea1866463d771ffbd42

不确定您将如何对打印格式进行排序以使这么多列看起来不错,但您可以将基础 url 字符串定义为:

base = 'https://bscscan.com'

然后将链接附加到数据中:

for tr in soup.select("tr:has(td)"):
    tds = [td.get_text(strip=True) for td in tr.select("td")]
    _, txn_hash, tm, age, from_, _, to_, value, token = tds
    data.append((txn_hash, token, value,
                 base + tr.select_one('td:nth-child(2) a')['href'], #hash_link
                 base + tr.select_one('td:nth-child(5) a')['href'], #from_link
                 base + tr.select_one('td:nth-child(7) a')['href'], #to_link
                 base + tr.select_one('td:nth-child(9) a')['href'] #token_link
               ))