Selenium3 Python3 如何从属性 style="background-image: ur,' " 中获取 url 并且有一个 url"
Selenium3 Python3 how to get url from attribute style="background-image: ur,' " and there is a url"
如何从属性 url 中获取 url 我的意思是 url 他自己的风格? style="width: 433px; height: 510px; background-image: url(https://cs7056.vk.me/c635104/v635104607/1c316/ADzy-2WY8pw.jpg)" Selenium3 Python3 对你来说很简单!
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import re
import time
url = 'https://vk.com/uporols_you'
driver = webdriver.Firefox(executable_path='C:/Users/PANDEMIC/AppData/Local/Mozilla/geckodriver.exe')
def login(driver):
log_page = driver.get('https://login.vk.com/?act=login')
find_login_input = driver.find_element_by_id('login_form').find_element_by_id('email').send_keys('+77782303865')
find_password_input = driver.find_element_by_id('login_form').find_element_by_id('pass').send_keys('pass')
find_button = driver.find_element_by_xpath('//button[@id="login_button"]').click()
time.sleep(5)
def get_photo_from_page(driver):
driver.get(url)
try:
driver.find_element_by_class_name('popup_box_container').find_element_by_class_name('box_title_wrap').find_element_by_class_name('box_x_button').click()
except:
print('nope nothing')
for i in range(2):
scrol_down = driver.find_element_by_id('public_wall').find_element_by_id('wall_more_link').click()
time.sleep(2)
tut = []
#t = (a[@class="page_post_thumb_wrap image_cover page_post_thumb_last_column page_post_thumb_last_row"])
for ii in driver.find_elements_by_xpath('//a[@style]'):
o = ii.get_attribute('style')
print(o)
#soup = BeautifulSoup(htlm, 'lxml')
#im = soup.find_all('a', class_="'page_post_thumb_wrap image_cover page_post_thumb_last_column page_post_thumb_last_row'")
#print(htlm)
#for a in im:
# s = a.get('data-src_big').split('|')[0]
# tut.append(s)
#print(tut)
#for num, link in enumerate(tut, start=1):
# p = requests.get(link)
# out = open("img%s.jpg" % (num), 'wb')
# out.write(p.content)
# out.close()
def main():
login(driver)
get_photo_from_page(driver)
if __name__ == '__main__':
main()
在那种特殊情况下,您可以只解析您已经能够使用脚本收集的样式字符串。
只需将此函数添加到您的代码中:
def parse_style_attribute(style_string):
if 'background-image' in style_string:
style_string = style_string.split(' url("')[1].replace('");', '')
return style_string
return None
这是一个简单的字符串解析,如果字符串中有"background-image",则提取url,如果没有图像,则提取return None。
然后您可以在您的代码中使用它:
links = list()
for ii in driver.find_elements_by_xpath('//a[@style]'):
o = ii.get_attribute('style')
links.append(parse_style_attribute(o))
links = [link for link in links if link is not None]
如何从属性 url 中获取 url 我的意思是 url 他自己的风格? style="width: 433px; height: 510px; background-image: url(https://cs7056.vk.me/c635104/v635104607/1c316/ADzy-2WY8pw.jpg)" Selenium3 Python3 对你来说很简单!
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import re
import time
url = 'https://vk.com/uporols_you'
driver = webdriver.Firefox(executable_path='C:/Users/PANDEMIC/AppData/Local/Mozilla/geckodriver.exe')
def login(driver):
log_page = driver.get('https://login.vk.com/?act=login')
find_login_input = driver.find_element_by_id('login_form').find_element_by_id('email').send_keys('+77782303865')
find_password_input = driver.find_element_by_id('login_form').find_element_by_id('pass').send_keys('pass')
find_button = driver.find_element_by_xpath('//button[@id="login_button"]').click()
time.sleep(5)
def get_photo_from_page(driver):
driver.get(url)
try:
driver.find_element_by_class_name('popup_box_container').find_element_by_class_name('box_title_wrap').find_element_by_class_name('box_x_button').click()
except:
print('nope nothing')
for i in range(2):
scrol_down = driver.find_element_by_id('public_wall').find_element_by_id('wall_more_link').click()
time.sleep(2)
tut = []
#t = (a[@class="page_post_thumb_wrap image_cover page_post_thumb_last_column page_post_thumb_last_row"])
for ii in driver.find_elements_by_xpath('//a[@style]'):
o = ii.get_attribute('style')
print(o)
#soup = BeautifulSoup(htlm, 'lxml')
#im = soup.find_all('a', class_="'page_post_thumb_wrap image_cover page_post_thumb_last_column page_post_thumb_last_row'")
#print(htlm)
#for a in im:
# s = a.get('data-src_big').split('|')[0]
# tut.append(s)
#print(tut)
#for num, link in enumerate(tut, start=1):
# p = requests.get(link)
# out = open("img%s.jpg" % (num), 'wb')
# out.write(p.content)
# out.close()
def main():
login(driver)
get_photo_from_page(driver)
if __name__ == '__main__':
main()
在那种特殊情况下,您可以只解析您已经能够使用脚本收集的样式字符串。
只需将此函数添加到您的代码中:
def parse_style_attribute(style_string):
if 'background-image' in style_string:
style_string = style_string.split(' url("')[1].replace('");', '')
return style_string
return None
这是一个简单的字符串解析,如果字符串中有"background-image",则提取url,如果没有图像,则提取return None。
然后您可以在您的代码中使用它:
links = list()
for ii in driver.find_elements_by_xpath('//a[@style]'):
o = ii.get_attribute('style')
links.append(parse_style_attribute(o))
links = [link for link in links if link is not None]