格式化从下拉框中抓取的数据并将数据添加到 Python Beautifulsoup 中的结果中
Formatting grabbed data from dropdwon boxes and adding data into the result in Python Beautifulsoup
我有以下代码,部分 运行s 但结果显示非常混乱。我需要有关如何获取附加数据以及输出格式的帮助。
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import re, random, ctypes
import requests, os
from time import sleep
import beepy as beep
from time import strftime
import datetime
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
line = "https://bscscan.com/address/0x639AD7c49EC616a64e074c21a58608C0d843A8a3"
contractpage = requests.get(line,header)
ca = BeautifulSoup(contractpage.content, 'html.parser')
contractcreator = ca.find(id='ContentPlaceHolder1_trContract').get_text()
tokenname = ca.find(id='ContentPlaceHolder1_tr_tokeninfo').get_text()
transcount = ca.find('p', class_='mr-2 mb-2').get_text()
tokencount = ca.find(id='ContentPlaceHolder1_tokenbalance').get_text()
print (contractcreator)
print ("Token Name: ", tokenname)
print ("Trans Count: ", transcount)
print ("Token Count: ", tokencount)
当前输出:#-- 非常非常混乱,缺少一些预期数据
ContractCreator:
0x7ab96edb99e1faa06238609947792038520f1a3c at txn 0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79
Token Name:
TokenTracker:
TripCandy (CANDY)
Trans Count:
Latest 25 from a total of 2,878 transactions
Token Count:
Token:
6.10
3
Could not find any matches! Token display limit reached. Click to Show more
BEP-20 Tokens (3) Minereum BSC (MNEB)150,000 MNEB Neftipedia (NFT)1 NFT[=13=].01@0.0086TripCandy (CANDY)76,581.46551862 CANDY6.09@0.0095
Wanted Output: #-- 当我运行代码
时提取的当前数据
ContractCreator: 0x7ab96edb99e1faa06238609947792038520f1a3c
Txn: 0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79
Token Name: TripCandy (CANDY)
Trans Count: 2,875
Balance: 0.498586644749540253 BNB #-- needed additional data
Tokens ValCount: 2.78 / 3 #-- needed additional data and formatting
Token List: BEP-20 Tokens (3) #-- data from the dropdown box
Minereum BSC (MNEB)150,000 MNEB
Neftipedia (NFT)1 - [=14=].01 @ 0.0086
TripCandy (CANDY)76,581.46551862 - 2.77 @ 0.0096
import requests
from bs4 import BeautifulSoup
from pprint import pp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
adds = [
'0x639AD7c49EC616a64e074c21a58608C0d843A8a3'
]
def main(url):
with requests.Session() as req:
req.headers.update(headers)
for add in adds:
r = req.get(url.format(add))
soup = BeautifulSoup(r.text, 'lxml')
goal = soup.select('div.card-body')
data = {
'ContractCreator': goal[1].select_one('a.hash-tag').text,
'Txn': goal[1].select('a.hash-tag')[1].text,
'Token Name': goal[1].select('a')[3].text,
'Trans Count': soup.select_one('p.mr-2 a').text,
'Balance': goal[0].select_one('.col-md-8').get_text(strip=True),
'Tokens ValCount': " / ".join(list(goal[0].select_one('.position-relative').stripped_strings)[:2]),
'Token List': [x.get_text(strip=True) for x in soup.select('.list.list-unstyled strong, .list-name')]
}
pp(data)
main('https://bscscan.com/address/{}')
输出:
{'ContractCreator': '0x7ab96edb99e1faa06238609947792038520f1a3c',
'Txn': '0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79',
'Token Name': 'TripCandy (CANDY)',
'Trans Count': '2,880',
'Balance': '0.498586644749540253 BNB',
'Tokens ValCount': '7.19 / 3',
'Token List': ['BEP-20 Tokens',
'Minereum BSC (MNEB)',
'Neftipedia (NFT)',
'TripCandy (CANDY)']}
我有以下代码,部分 运行s 但结果显示非常混乱。我需要有关如何获取附加数据以及输出格式的帮助。
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import re, random, ctypes
import requests, os
from time import sleep
import beepy as beep
from time import strftime
import datetime
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
line = "https://bscscan.com/address/0x639AD7c49EC616a64e074c21a58608C0d843A8a3"
contractpage = requests.get(line,header)
ca = BeautifulSoup(contractpage.content, 'html.parser')
contractcreator = ca.find(id='ContentPlaceHolder1_trContract').get_text()
tokenname = ca.find(id='ContentPlaceHolder1_tr_tokeninfo').get_text()
transcount = ca.find('p', class_='mr-2 mb-2').get_text()
tokencount = ca.find(id='ContentPlaceHolder1_tokenbalance').get_text()
print (contractcreator)
print ("Token Name: ", tokenname)
print ("Trans Count: ", transcount)
print ("Token Count: ", tokencount)
当前输出:#-- 非常非常混乱,缺少一些预期数据
ContractCreator:
0x7ab96edb99e1faa06238609947792038520f1a3c at txn 0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79
Token Name:
TokenTracker:
TripCandy (CANDY)
Trans Count:
Latest 25 from a total of 2,878 transactions
Token Count:
Token:
6.10
3
Could not find any matches! Token display limit reached. Click to Show more
BEP-20 Tokens (3) Minereum BSC (MNEB)150,000 MNEB Neftipedia (NFT)1 NFT[=13=].01@0.0086TripCandy (CANDY)76,581.46551862 CANDY6.09@0.0095
Wanted Output: #-- 当我运行代码
时提取的当前数据ContractCreator: 0x7ab96edb99e1faa06238609947792038520f1a3c
Txn: 0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79
Token Name: TripCandy (CANDY)
Trans Count: 2,875
Balance: 0.498586644749540253 BNB #-- needed additional data
Tokens ValCount: 2.78 / 3 #-- needed additional data and formatting
Token List: BEP-20 Tokens (3) #-- data from the dropdown box
Minereum BSC (MNEB)150,000 MNEB
Neftipedia (NFT)1 - [=14=].01 @ 0.0086
TripCandy (CANDY)76,581.46551862 - 2.77 @ 0.0096
import requests
from bs4 import BeautifulSoup
from pprint import pp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
adds = [
'0x639AD7c49EC616a64e074c21a58608C0d843A8a3'
]
def main(url):
with requests.Session() as req:
req.headers.update(headers)
for add in adds:
r = req.get(url.format(add))
soup = BeautifulSoup(r.text, 'lxml')
goal = soup.select('div.card-body')
data = {
'ContractCreator': goal[1].select_one('a.hash-tag').text,
'Txn': goal[1].select('a.hash-tag')[1].text,
'Token Name': goal[1].select('a')[3].text,
'Trans Count': soup.select_one('p.mr-2 a').text,
'Balance': goal[0].select_one('.col-md-8').get_text(strip=True),
'Tokens ValCount': " / ".join(list(goal[0].select_one('.position-relative').stripped_strings)[:2]),
'Token List': [x.get_text(strip=True) for x in soup.select('.list.list-unstyled strong, .list-name')]
}
pp(data)
main('https://bscscan.com/address/{}')
输出:
{'ContractCreator': '0x7ab96edb99e1faa06238609947792038520f1a3c',
'Txn': '0x51a8db6ac707dcd9644b5400b533c9bbe95243054c9c67e8a8aeeab38c7f7e79',
'Token Name': 'TripCandy (CANDY)',
'Trans Count': '2,880',
'Balance': '0.498586644749540253 BNB',
'Tokens ValCount': '7.19 / 3',
'Token List': ['BEP-20 Tokens',
'Minereum BSC (MNEB)',
'Neftipedia (NFT)',
'TripCandy (CANDY)']}