ajax 公交车页面的搜索和抓取价格 // 也许是 selenium?
ajax search and scraping prices of bus page // maybe selenium?
我正在尝试获取巴士页面上的路线价格
导入请求
从 bs4 导入 BeautifulSoup
导入重新
popup_linkz= list()
p=range(1, 2, 1)
for i in p:
def get_headers(session):
res = session.get("https://new.turbus.cl/turbuscl/inicio-compra")
if res.status_code == 200:
print("Got headers")
return res.text
else:
print("Failed to get headers")
def search(session):
data = {
'origenInputModal': 'Santiago',
'destinoInputModal':'Calama',
'fechaRegreso': '03-04-2021',
'fechaIda': '31-03-2021',
}
res = session.post(
"https://new.turbus.cl/turbuscl/seleccion-itinerario",
data=data) #not sure if this is the search link
if res.status_code == 200:
print("Search succeeded")
return res.text
else:
print("Search failed with error:", res.reason)
print(res.text)
def get_popup_link(html):
soup = BeautifulSoup(html, "html.parser")
for t in soup.find_all('div', {'class': 'ticket_price-value'}):
precio = t.find('[class$="ticket_price-value"]').text
#cantidad = t.select_one('[id$="lblCantidad"]').text
#descripction = t.select_one('[id$="lblDescripcion"]').text
print(f"{precio=} {precio=}")
#print()
return precio
def main():
with requests.Session() as s:
get_headers(s)
html = search(s)
popup_links = (get_popup_link(html))
print(popup_links)
# popup_linkz.extend(popup_links)
#print(popup_links)
#print(popup_linkz)
#download_html = get_download_html(s, popup_links)
# print(download_html)
#popup_linkz.extend(popup_links for i in range(0, 1, 1))
main()
#a = popup_linkz
#print(a)
enter code here
这是linkhttps://new.turbus.cl/turbuscl/inicio-compra
所以现在我能够找到搜索的输入框,但不确定是否运行它。
我遇到这个错误ValueError: too many values to unpack (expected 2)
所以我不太确定我失败了什么。
你会尝试启发我以取得成功吗?
我一直在尝试所有的死法,并获得一种新的 selenium 方法来进行搜索....
我正在做的是正确的还是我的第一种方法更好?
-- 编码:utf-8 --
"""
创建于 2022 年 3 月 29 日星期二 21:04:05
@作者:克里斯蒂安·马科斯
"""
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 29 16:20:40 2022
@author: christian marcos
"""
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from pandas.io.html import read_html
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
#select and fill firs field origin
driver=wd.Chrome('C:\chromedriver.exe')
driver.maximize_window()
driver.get('https://new.turbus.cl/turbuscl/inicio-compra')
driver.implicitly_wait(20)
driver.find_element_by_xpath('//*[@id="origen"]').click();
wait = WebDriverWait(driver, 30)
#select and fill firs field
driver.implicitly_wait(10)
driver.find_element_by_xpath('//*[@id="modalOriginCity"]/div/div/div[2]/div[2]/ul/li[1]').click();
此致,
所需的post数据不同。在这种情况下,您需要:
{
"fechaSalidaTramo": "31/03/2022",
"mnemotecnicoCiudadOrigenTramo": "stgo",
"mnemotecnicoCiudadDestinoTramo": "aric",
"horaSalidaTramo": 0,
"horaSalidaTramoMaxima": 0,
"codigoLinea": 90,
"numeroViaje": 0,
"numeroCuentaCorrienteCliente": 0,
"codigoIdaRegreso": 1,
"cantidadAsientos": 1,
"numeroRegistros": 0
}
而 link 是 https://new.turbus.cl/turbuscl/recursos/vtwst76/web1
。
在 python 中,它看起来像这样:
import requests
HOST = "https://nclt.gov.in/"
LINK = "https://new.turbus.cl/turbuscl/recursos/vtwst76/web1"
DATA = '{"fechaSalidaTramo":"31/03/2022","mnemotecnicoCiudadOrigenTramo":"stgo","mnemotecnicoCiudadDestinoTramo":"aric","horaSalidaTramo":0,"horaSalidaTramoMaxima":0,"codigoLinea":90,"numeroViaje":0,"numeroCuentaCorrienteCliente":0,"codigoIdaRegreso":1,"cantidadAsientos":1,"numeroRegistros":0}'
HEADERS = {
"Content-Type": "application/json",
}
def get_route(origin, destination):
res = requests.post(LINK, data=DATA, headers=HEADERS)
if res.status_code == 200:
print("getting routes")
return res.json()
else:
print(res)
def main():
info = get_route("here", "there")
print(info)
if __name__ == "__main__":
main()
我是如何找到答案的:
- 前往网站。
- 打开网络选项卡,这样我就可以看到请求了。
- 进行搜索,找到匹配的请求。
- 将请求复制为 curl 请求并将其导入 postman.
- 去掉headers,看看你做请求的时候会不会报错。重复直到你只需要 headers.
- 复制所需的 headers 和数据,并使用请求进行测试。
我正在尝试获取巴士页面上的路线价格 导入请求 从 bs4 导入 BeautifulSoup 导入重新
popup_linkz= list()
p=range(1, 2, 1)
for i in p:
def get_headers(session):
res = session.get("https://new.turbus.cl/turbuscl/inicio-compra")
if res.status_code == 200:
print("Got headers")
return res.text
else:
print("Failed to get headers")
def search(session):
data = {
'origenInputModal': 'Santiago',
'destinoInputModal':'Calama',
'fechaRegreso': '03-04-2021',
'fechaIda': '31-03-2021',
}
res = session.post(
"https://new.turbus.cl/turbuscl/seleccion-itinerario",
data=data) #not sure if this is the search link
if res.status_code == 200:
print("Search succeeded")
return res.text
else:
print("Search failed with error:", res.reason)
print(res.text)
def get_popup_link(html):
soup = BeautifulSoup(html, "html.parser")
for t in soup.find_all('div', {'class': 'ticket_price-value'}):
precio = t.find('[class$="ticket_price-value"]').text
#cantidad = t.select_one('[id$="lblCantidad"]').text
#descripction = t.select_one('[id$="lblDescripcion"]').text
print(f"{precio=} {precio=}")
#print()
return precio
def main():
with requests.Session() as s:
get_headers(s)
html = search(s)
popup_links = (get_popup_link(html))
print(popup_links)
# popup_linkz.extend(popup_links)
#print(popup_links)
#print(popup_linkz)
#download_html = get_download_html(s, popup_links)
# print(download_html)
#popup_linkz.extend(popup_links for i in range(0, 1, 1))
main()
#a = popup_linkz
#print(a)
enter code here
这是linkhttps://new.turbus.cl/turbuscl/inicio-compra
所以现在我能够找到搜索的输入框,但不确定是否运行它。
我遇到这个错误ValueError: too many values to unpack (expected 2)
所以我不太确定我失败了什么。
你会尝试启发我以取得成功吗?
我一直在尝试所有的死法,并获得一种新的 selenium 方法来进行搜索....
我正在做的是正确的还是我的第一种方法更好?
-- 编码:utf-8 --
""" 创建于 2022 年 3 月 29 日星期二 21:04:05
@作者:克里斯蒂安·马科斯 """
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 29 16:20:40 2022
@author: christian marcos
"""
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from pandas.io.html import read_html
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
#select and fill firs field origin
driver=wd.Chrome('C:\chromedriver.exe')
driver.maximize_window()
driver.get('https://new.turbus.cl/turbuscl/inicio-compra')
driver.implicitly_wait(20)
driver.find_element_by_xpath('//*[@id="origen"]').click();
wait = WebDriverWait(driver, 30)
#select and fill firs field
driver.implicitly_wait(10)
driver.find_element_by_xpath('//*[@id="modalOriginCity"]/div/div/div[2]/div[2]/ul/li[1]').click();
此致,
所需的post数据不同。在这种情况下,您需要:
{
"fechaSalidaTramo": "31/03/2022",
"mnemotecnicoCiudadOrigenTramo": "stgo",
"mnemotecnicoCiudadDestinoTramo": "aric",
"horaSalidaTramo": 0,
"horaSalidaTramoMaxima": 0,
"codigoLinea": 90,
"numeroViaje": 0,
"numeroCuentaCorrienteCliente": 0,
"codigoIdaRegreso": 1,
"cantidadAsientos": 1,
"numeroRegistros": 0
}
而 link 是 https://new.turbus.cl/turbuscl/recursos/vtwst76/web1
。
在 python 中,它看起来像这样:
import requests
HOST = "https://nclt.gov.in/"
LINK = "https://new.turbus.cl/turbuscl/recursos/vtwst76/web1"
DATA = '{"fechaSalidaTramo":"31/03/2022","mnemotecnicoCiudadOrigenTramo":"stgo","mnemotecnicoCiudadDestinoTramo":"aric","horaSalidaTramo":0,"horaSalidaTramoMaxima":0,"codigoLinea":90,"numeroViaje":0,"numeroCuentaCorrienteCliente":0,"codigoIdaRegreso":1,"cantidadAsientos":1,"numeroRegistros":0}'
HEADERS = {
"Content-Type": "application/json",
}
def get_route(origin, destination):
res = requests.post(LINK, data=DATA, headers=HEADERS)
if res.status_code == 200:
print("getting routes")
return res.json()
else:
print(res)
def main():
info = get_route("here", "there")
print(info)
if __name__ == "__main__":
main()
我是如何找到答案的:
- 前往网站。
- 打开网络选项卡,这样我就可以看到请求了。
- 进行搜索,找到匹配的请求。
- 将请求复制为 curl 请求并将其导入 postman.
- 去掉headers,看看你做请求的时候会不会报错。重复直到你只需要 headers.
- 复制所需的 headers 和数据,并使用请求进行测试。