Python 抓取循环
Python scraping loop
所以,我需要帮助,这是 mi 代码
results=[]
import re
for i in popup_linkz: # Here I take N links like this one https://www.mercadopublico.cl/Procurement/Modules/RFB/DetailsAcquisition.aspx?qs=uEap3sWEgifS2G+m9xvYiA== to iterate thorught them a scraping
url=i # so right now I scrape the iterating urls
response = requests.get(url)
print('url:', response.url)
#print('status:', response.status_code)
soup = BeautifulSoup(response.content, "html.parser")
results=[]
#json_res = json.loads(res.text)
#print(json_res[0]['price'])
item_1='grvProducto_ctl02_lblCategoria'
for line in soup.findAll('span', attrs={'id': 'grvProducto_ctl02_lblCategoria'}):
results.append(line.text)
#this actually get the first code, but don't know how to iterate for others, also doesn't store every code on it, when I print doesn't stack them , show them single on print.
print('id',results)
我正在尝试从这个 urlsample >https://www.mercadopublico.cl/Procurement/Modules/RFB/DetailsAcquisition.aspx?qs=uEap3sWEgifS2G+m9xvYiA==
实际上它迭代了 2 到 10.000 个。
information I want to get here but that cant get it
我不确定如何使用它
for line in soup.findAll('span', attrs={'id': 'grvProducto_ctl02_lblCategoria'}):
results.append(line.text)
使用相同的循环获取其他信息。
data of page underlying
你能给我讲讲吗?
尝试:
import requests
from bs4 import BeautifulSoup
url = "https://www.mercadopublico.cl/Procurement/Modules/RFB/DetailsAcquisition.aspx?qs=uEap3sWEgifS2G+m9xvYiA=="
soup = BeautifulSoup(requests.get(url).content, "html.parser")
licitation_number = soup.select_one("#lblNumLicitacion").text
responsable = soup.select_one("#lblResponsable").text
ficha = soup.select_one("#lblFicha2Reclamo").text
print(f"{licitation_number=}")
print(f"{responsable=}")
print(f"{ficha=}")
print("-" * 80)
for t in soup.select("#grvProducto .borde_tabla00"):
categoria = t.select_one('[id$="lblCategoria"]').text
candidad = t.select_one('[id$="lblCantidad"]').text
descripction = t.select_one('[id$="lblDescripcion"]').text
print(f"{categoria=} {candidad=}")
print(f"{descripction=}")
print()
打印:
licitation_number='1549-5-LR22'
responsable='SERVICIO DE SALUD METROPOLITANA NORTE HOSPITAL SAN JOSE, Hospital San José'
ficha='107'
--------------------------------------------------------------------------------
categoria='42221501' candidad='130'
descripction='(226-2001) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO EVEROLIMUS'
categoria='42221501' candidad='360'
descripction='(226-2002) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO ZOTAROLIMUS'
categoria='42221501' candidad='120'
descripction='(226-2004) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO SIROLIMUS, CON STRUT DE 0.80'
categoria='42221501' candidad='240'
descripction='(226-2003) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO SIROLIMUS, CON STRUT DE 0.60'
所以,我需要帮助,这是 mi 代码
results=[]
import re
for i in popup_linkz: # Here I take N links like this one https://www.mercadopublico.cl/Procurement/Modules/RFB/DetailsAcquisition.aspx?qs=uEap3sWEgifS2G+m9xvYiA== to iterate thorught them a scraping
url=i # so right now I scrape the iterating urls
response = requests.get(url)
print('url:', response.url)
#print('status:', response.status_code)
soup = BeautifulSoup(response.content, "html.parser")
results=[]
#json_res = json.loads(res.text)
#print(json_res[0]['price'])
item_1='grvProducto_ctl02_lblCategoria'
for line in soup.findAll('span', attrs={'id': 'grvProducto_ctl02_lblCategoria'}):
results.append(line.text)
#this actually get the first code, but don't know how to iterate for others, also doesn't store every code on it, when I print doesn't stack them , show them single on print.
print('id',results)
我正在尝试从这个 urlsample >https://www.mercadopublico.cl/Procurement/Modules/RFB/DetailsAcquisition.aspx?qs=uEap3sWEgifS2G+m9xvYiA== 实际上它迭代了 2 到 10.000 个。
information I want to get here but that cant get it
我不确定如何使用它
for line in soup.findAll('span', attrs={'id': 'grvProducto_ctl02_lblCategoria'}):
results.append(line.text)
使用相同的循环获取其他信息。 data of page underlying
你能给我讲讲吗?
尝试:
import requests
from bs4 import BeautifulSoup
url = "https://www.mercadopublico.cl/Procurement/Modules/RFB/DetailsAcquisition.aspx?qs=uEap3sWEgifS2G+m9xvYiA=="
soup = BeautifulSoup(requests.get(url).content, "html.parser")
licitation_number = soup.select_one("#lblNumLicitacion").text
responsable = soup.select_one("#lblResponsable").text
ficha = soup.select_one("#lblFicha2Reclamo").text
print(f"{licitation_number=}")
print(f"{responsable=}")
print(f"{ficha=}")
print("-" * 80)
for t in soup.select("#grvProducto .borde_tabla00"):
categoria = t.select_one('[id$="lblCategoria"]').text
candidad = t.select_one('[id$="lblCantidad"]').text
descripction = t.select_one('[id$="lblDescripcion"]').text
print(f"{categoria=} {candidad=}")
print(f"{descripction=}")
print()
打印:
licitation_number='1549-5-LR22'
responsable='SERVICIO DE SALUD METROPOLITANA NORTE HOSPITAL SAN JOSE, Hospital San José'
ficha='107'
--------------------------------------------------------------------------------
categoria='42221501' candidad='130'
descripction='(226-2001) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO EVEROLIMUS'
categoria='42221501' candidad='360'
descripction='(226-2002) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO ZOTAROLIMUS'
categoria='42221501' candidad='120'
descripction='(226-2004) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO SIROLIMUS, CON STRUT DE 0.80'
categoria='42221501' candidad='240'
descripction='(226-2003) STENT CORONARIO DE CROMO COBALTO, LIBERADOR DE FÁRMACO SIROLIMUS, CON STRUT DE 0.60'