如何获取在 python 中不断变化的元素的文本
How to get text of an element that keeps changing in python
我只是想自动化一个像 Replika (the chatbot). In it, a new chat always keeps coming but with a whole new xpath and id. It's getting difficult for me to track the recent chat with selenium. I did try the solutions listed and 这样的网站,但它们对我不起作用(或者我做错了什么)。我刚刚开始使用硒,所以我对它了解不多。请帮帮我。我正在使用 python 3.8.2.
代码如下:
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from urllib.request import urlopen
browser = webdriver.Chrome("C:\Chromedriver\chromedriver.exe")
browser.get("https://my.replika.ai/")
time.sleep(3)
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/main/a[2]""").click()
time.sleep(2)
### Login ###
browser.find_element_by_xpath("""//*
[@id="emailOrPhone"]""").send_keys("gmail_id")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(3)
### Password ###
browser.find_element_by_xpath("""//*[@id="login-
password"]""").send_keys("gmail_password")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(10)
### Accept the cookies ###
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/div[1]/button""").click()
time.sleep(5)
### Getting the Latest text ### Here is where it doesn't work
# This is a implementation that I tried and it didn't work
url = "https://my.replika.ai/"
# We use try-except in case the request was unsuccessful because of
# wrong URL
try:
page = urlopen(url)
except Exception:
print("Error opening the URL")
soup = BeautifulSoup(page, 'html.parser')
content = soup.find('div', {"id": "chat-messages"})
chat = ''
for i in content.findAll('span'):
chat = chat + ' ' + i.text
print(chat)
提前致谢。
browser = webdriver.Chrome()
browser.get("https://my.replika.ai/")
time.sleep(3)
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/main/a[2]""").click()
time.sleep(2)
### Login ###
browser.find_element_by_xpath("""//*
[@id="emailOrPhone"]""").send_keys("username")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(5)
### Password ###
browser.find_element_by_xpath("""//*[@id="login-password"]""").send_keys("password")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(3)
### Accept the cookies ###
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/div[1]/button""").click()
time.sleep(5)
a = browser.find_elements(By.XPATH, "//*[@data-author]")
print([i.text for i in a])
print("last text : " + a[-1].text)
只需使用定位器
browser.find_elements(By.XPATH, "//*[@data-author]")
这会找到所有具有属性 @data-author 的元素(只有聊天有这个 属性)并访问 a[-1] 以获取最后一个元素。调用 a[-1].text 从中获取文本
我只是想自动化一个像 Replika (the chatbot). In it, a new chat always keeps coming but with a whole new xpath and id. It's getting difficult for me to track the recent chat with selenium. I did try the solutions listed
代码如下:
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from urllib.request import urlopen
browser = webdriver.Chrome("C:\Chromedriver\chromedriver.exe")
browser.get("https://my.replika.ai/")
time.sleep(3)
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/main/a[2]""").click()
time.sleep(2)
### Login ###
browser.find_element_by_xpath("""//*
[@id="emailOrPhone"]""").send_keys("gmail_id")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(3)
### Password ###
browser.find_element_by_xpath("""//*[@id="login-
password"]""").send_keys("gmail_password")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(10)
### Accept the cookies ###
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/div[1]/button""").click()
time.sleep(5)
### Getting the Latest text ### Here is where it doesn't work
# This is a implementation that I tried and it didn't work
url = "https://my.replika.ai/"
# We use try-except in case the request was unsuccessful because of
# wrong URL
try:
page = urlopen(url)
except Exception:
print("Error opening the URL")
soup = BeautifulSoup(page, 'html.parser')
content = soup.find('div', {"id": "chat-messages"})
chat = ''
for i in content.findAll('span'):
chat = chat + ' ' + i.text
print(chat)
提前致谢。
browser = webdriver.Chrome()
browser.get("https://my.replika.ai/")
time.sleep(3)
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/main/a[2]""").click()
time.sleep(2)
### Login ###
browser.find_element_by_xpath("""//*
[@id="emailOrPhone"]""").send_keys("username")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(5)
### Password ###
browser.find_element_by_xpath("""//*[@id="login-password"]""").send_keys("password")
time.sleep(1)
browser.find_element_by_xpath("""//*[@id="loginForm"]/button""").click()
time.sleep(3)
### Accept the cookies ###
browser.find_element_by_xpath("""//*
[@id="root"]/div/div[1]/div[1]/button""").click()
time.sleep(5)
a = browser.find_elements(By.XPATH, "//*[@data-author]")
print([i.text for i in a])
print("last text : " + a[-1].text)
只需使用定位器 browser.find_elements(By.XPATH, "//*[@data-author]")
这会找到所有具有属性 @data-author 的元素(只有聊天有这个 属性)并访问 a[-1] 以获取最后一个元素。调用 a[-1].text 从中获取文本