美丽的汤需要帮助找到多个标签
beautiful soup need help finding multiple tags
from bs4 import BeautifulSoup
from lxml import etree
import requests
import re
URL = "https://csimarket.com/stocks/at_glance.php?code=AA"
HEADERS = ({'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', \
'Accept-Language': 'en-US, en;q=0.5'})
webpage = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(webpage.content, "html.parser")
dom = etree.HTML(str(soup))
raw_html = soup.find(href="../Industry/Industry_Data.php?s=100")
span = raw_html.find("span")
span.decompose()
print(raw_html.text.strip())
代码工作正常 raw_html = soup.find(href="../Industry/Industry_Data.php?s=100")
当我浏览其他页面时,这部分会有所不同../Industry/Industry_Data.php?s=1000
如何只搜索“../Industry/Industry_Data.php”
Select 你的元素 css selectors
并检查 <span>
是否存在:
for a in soup.select('a[href*="../Industry/Industry_Data.php"]'):
if a.span:
a.span.decompose()
print(a.text.strip())
例子
from bs4 import BeautifulSoup
import requests
URL = "https://csimarket.com/stocks/at_glance.php?code=AA"
HEADERS = ({'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', \
'Accept-Language': 'en-US, en;q=0.5'})
webpage = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(webpage.content, "html.parser")
for a in soup.select('a[href*="../Industry/Industry_Data.php"]'):
if a.span:
a.span.decompose()
print(a.text.strip())
输出
Industries At a Glance
Basic Materials
Aluminum
Aluminum
Basic Materials
from bs4 import BeautifulSoup
from lxml import etree
import requests
import re
URL = "https://csimarket.com/stocks/at_glance.php?code=AA"
HEADERS = ({'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', \
'Accept-Language': 'en-US, en;q=0.5'})
webpage = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(webpage.content, "html.parser")
dom = etree.HTML(str(soup))
raw_html = soup.find(href="../Industry/Industry_Data.php?s=100")
span = raw_html.find("span")
span.decompose()
print(raw_html.text.strip())
代码工作正常 raw_html = soup.find(href="../Industry/Industry_Data.php?s=100") 当我浏览其他页面时,这部分会有所不同../Industry/Industry_Data.php?s=1000
如何只搜索“../Industry/Industry_Data.php”
Select 你的元素 css selectors
并检查 <span>
是否存在:
for a in soup.select('a[href*="../Industry/Industry_Data.php"]'):
if a.span:
a.span.decompose()
print(a.text.strip())
例子
from bs4 import BeautifulSoup
import requests
URL = "https://csimarket.com/stocks/at_glance.php?code=AA"
HEADERS = ({'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', \
'Accept-Language': 'en-US, en;q=0.5'})
webpage = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(webpage.content, "html.parser")
for a in soup.select('a[href*="../Industry/Industry_Data.php"]'):
if a.span:
a.span.decompose()
print(a.text.strip())
输出
Industries At a Glance
Basic Materials
Aluminum
Aluminum
Basic Materials