按字段名从 table 中提取数据。 Xpath,python
Extract data from table by field name. Xpath, python
我想从此页面中提取数据https://mbasic.facebook.com/kristina.layus
有一个 table“住过的地方”有两行
Current city --- Moscow, Russia
Home town --- Saint Petersburg, Russia
我可以借助完整的 xpath 提取数据(提取的数据“俄罗斯莫斯科”):
/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[1]/div/table/tbody/tr/td[2]/div/a
但我想借助 table 中的名称提取数据。我试过这个
//div[@id='living']//div[@title='Current City']//a/text()
但是收到错误
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//div[@id='living']//div[@title='Current City']//a/text()"}
(Session info: chrome=84.0.4147.89)
我的代码
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class FacebookParser:
LOGIN_URL = 'https://www.facebook.com/login.php'
def __init__(self, login, password):
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs", prefs)
self.driver = webdriver.Chrome(chrome_options=chrome_options)
self.wait = WebDriverWait(self.driver, 10)
self.login(login, password)
def login(self, login, password):
self.driver.get(self.LOGIN_URL)
# wait for the login page to load
self.wait.until(EC.visibility_of_element_located((By.ID, "email")))
self.driver.find_element_by_id('email').send_keys(login)
self.driver.find_element_by_id('pass').send_keys(password)
self.driver.find_element_by_id('loginbutton').click()
def get_user_by_id(self, id):
self.driver.get(BASIC_URL + 'profile.php?id=' + str(id))
def get_user_by_url(self, url):
self.driver.get(url)
def find_element_by_xpath_safe(self, path):
try:
return parser.driver.find_element_by_xpath(path)
except:
return None
def get_first_name(self):
res = self.find_element_by_xpath_safe('//span/div/span/strong')
if res:
vec = res.text.split()
if len(vec) > 0:
return vec[0]
else:
print("Can't split {}".format(res.text))
return ""
def get_second_name(self):
res = self.find_element_by_xpath_safe('//span/div/span/strong')
if res:
vec = res.text.split()
if len(vec) > 1:
return vec[1]
else:
print("Can't split {}".format(res.text))
return ""
def get_curr_city(self):
res = self.find_element_by_xpath_safe('/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[1]/div/table/tbody/tr/td[2]/div/a')
if res:
return res.text
return ""
def get_home_town(self):
res = self.find_element_by_xpath_safe('/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[2]/div/table/tbody/tr/td[2]/div/a')
if res:
return res.text
return ""
#####################################
LOGIN = '----.com'
PASSWORD = '----'
BASIC_URL = 'https://mbasic.facebook.com/'
#####################################
parser = FacebookParser(login=LOGIN, password=PASSWORD)
parser.driver.get("https://mbasic.facebook.com/kristina.layus")
parser.driver.get("https://mbasic.facebook.com/kristina.layus")
print(parser.get_curr_city())
尝试在登录(loginbutton.click())和打开目标页面之间添加以下代码:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
WebDriverWait(wd, DELAY).until(EC.presence_of_element_located((By.ID, "mount_0_0")))
此代码将等到登录过程完成,只有在目标页面打开之后。
同时检查您的 xpath 表达式:在调查页面源代码时,可以找到具有 id="living"
的 div 元素,但是不存在具有属性 title="Current City"
的 div。
要打印文本 Moscow, Russia 你需要诱导 for the visibility_of_element_located()
and you can use the following xpath based :
打印俄罗斯莫斯科:
driver.get('https://mbasic.facebook.com/kristina.layus')
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Current City']//following::td//a"))).text)
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
参考资料
您可以在以下位置找到关于 的一些相关讨论:
我想从此页面中提取数据https://mbasic.facebook.com/kristina.layus 有一个 table“住过的地方”有两行
Current city --- Moscow, Russia
Home town --- Saint Petersburg, Russia
我可以借助完整的 xpath 提取数据(提取的数据“俄罗斯莫斯科”):
/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[1]/div/table/tbody/tr/td[2]/div/a
但我想借助 table 中的名称提取数据。我试过这个
//div[@id='living']//div[@title='Current City']//a/text()
但是收到错误
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//div[@id='living']//div[@title='Current City']//a/text()"}
(Session info: chrome=84.0.4147.89)
我的代码
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class FacebookParser:
LOGIN_URL = 'https://www.facebook.com/login.php'
def __init__(self, login, password):
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs", prefs)
self.driver = webdriver.Chrome(chrome_options=chrome_options)
self.wait = WebDriverWait(self.driver, 10)
self.login(login, password)
def login(self, login, password):
self.driver.get(self.LOGIN_URL)
# wait for the login page to load
self.wait.until(EC.visibility_of_element_located((By.ID, "email")))
self.driver.find_element_by_id('email').send_keys(login)
self.driver.find_element_by_id('pass').send_keys(password)
self.driver.find_element_by_id('loginbutton').click()
def get_user_by_id(self, id):
self.driver.get(BASIC_URL + 'profile.php?id=' + str(id))
def get_user_by_url(self, url):
self.driver.get(url)
def find_element_by_xpath_safe(self, path):
try:
return parser.driver.find_element_by_xpath(path)
except:
return None
def get_first_name(self):
res = self.find_element_by_xpath_safe('//span/div/span/strong')
if res:
vec = res.text.split()
if len(vec) > 0:
return vec[0]
else:
print("Can't split {}".format(res.text))
return ""
def get_second_name(self):
res = self.find_element_by_xpath_safe('//span/div/span/strong')
if res:
vec = res.text.split()
if len(vec) > 1:
return vec[1]
else:
print("Can't split {}".format(res.text))
return ""
def get_curr_city(self):
res = self.find_element_by_xpath_safe('/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[1]/div/table/tbody/tr/td[2]/div/a')
if res:
return res.text
return ""
def get_home_town(self):
res = self.find_element_by_xpath_safe('/html/body/div/div/div[2]/div/div[1]/div[4]/div/div/div[2]/div/table/tbody/tr/td[2]/div/a')
if res:
return res.text
return ""
#####################################
LOGIN = '----.com'
PASSWORD = '----'
BASIC_URL = 'https://mbasic.facebook.com/'
#####################################
parser = FacebookParser(login=LOGIN, password=PASSWORD)
parser.driver.get("https://mbasic.facebook.com/kristina.layus")
parser.driver.get("https://mbasic.facebook.com/kristina.layus")
print(parser.get_curr_city())
尝试在登录(loginbutton.click())和打开目标页面之间添加以下代码:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
WebDriverWait(wd, DELAY).until(EC.presence_of_element_located((By.ID, "mount_0_0")))
此代码将等到登录过程完成,只有在目标页面打开之后。
同时检查您的 xpath 表达式:在调查页面源代码时,可以找到具有 id="living"
的 div 元素,但是不存在具有属性 title="Current City"
的 div。
要打印文本 Moscow, Russia 你需要诱导 visibility_of_element_located()
and you can use the following xpath based
打印俄罗斯莫斯科:
driver.get('https://mbasic.facebook.com/kristina.layus') print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Current City']//following::td//a"))).text)
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC
参考资料
您可以在以下位置找到关于