在原始代码 python、selenium、re 中替换 url 时出现错误
I am getting error while replacing url in orginal code python,selenium,re
我将该代码中的 url 替换为我的 url
当我编译时得到如下所示的错误
第 66 行,在
current_file_name = re.search(r'https://player.hdflixcore.workers.dev//0://课程//账号%20Cracking%20--MrSihag//TN%20Cracking%20课程%20- -MrSihag/.+/(.+)', download_path, re.DOTALL).group(1)
AttributeError: 'NoneType' 对象没有属性 'group'
我想我在代码中使用了网站地址
在“current_file_name”中有一些额外的字母,如反斜杠
我不知道
我试着通过添加一些反斜杠来做同样的事情,但没有修复
但是当我 运行 原始代码时它工作正常
当我在我想要的网站上使用它时,它最终会出现上面提到的错误
下面是我编辑的代码
from selenium import webdriver
import time
import os
import shutil
import re
path = r'https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/'
# For changing the download location for this browser temporarily
options = webdriver.ChromeOptions()
preferences = {"download.default_directory": r"C:\Users\shanid\Desktop\test", "safebrowsing.enabled": "false"}
options.add_experimental_option("prefs", preferences)
# Acquire the Course Link and Get all the directories
browser = webdriver.Chrome(chrome_options=options)
browser.get(r"https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/")
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
# loop for as many directories there are
for i in range(0, len(elements)):
print("deft")
# At each directory, it refreshes the page to update the webelements in the list, and returns the current directory that is being worked on
browser.get(path)
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
element = elements[i]
# checks if the folder for the directory already exists
current_directory_name = element.text[11:].strip(" .")
current_folder_path = "C:\Users\shanid\Desktop\test\" + current_directory_name
if os.path.exists(current_folder_path):
pass
else:
os.mkdir(current_folder_path)
# Formatting what has been downloaded and sorted, and
print(current_directory_name, "------------------------------", sep="\n")
# moves on to the directory to get the page with the files
element.click()
# pausing for a few secs for the page to load, and running the same mechanism to get each file using the same method used in directory
time.sleep(3)
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
for j in range(len(files)):
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
_file = files[j]
# constants for some if statements
download = True
move = True
current_file_name = _file.text[17:].strip()
# If file exists, then pass over it, and don't do anything, and moveon to next file
if os.path.exists(current_folder_path + "\" + current_file_name):
pass
# If it doesnt exist, then depending on its extension, do specific actions with it
else:
# Downloads the mp4 files by clicking on it, and finding the input tag which contains the download link for vid in its value attribute
if ".mp4" in current_file_name:
_file.click()
time.sleep(2)
download_path = browser.find_element_by_css_selector("input").get_attribute("value")
current_file_name = re.search(r'https://player.hdflixcore.workers.dev//0://Courses//Account%20Cracking%20--MrSihag//TN%20Cracking%20Course%20--MrSihag/.+/(.+)', download_path, re.DOTALL).group(1)
# Checks if file exists again, incase the filename is different then the predicted filename orderly generated.
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
# returns to the previous page with the files
browser.back()
# self explanatory
elif ".html" in current_file_name:
download_path = path + current_directory_name + "/" + current_file_name
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
else:
# acquires the download location by going to the parent tag which is an a tag containing the link for html in its 'href' attribute
download_path = _file.find_element_by_xpath('..').get_attribute('href').replace(r"%5E", "^")
current_file_name = re.search(r'https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/.+/(.+)', download_path, re.DOTALL).group(1).replace("%20", " ")
time.sleep(2)
current_file_path = "C:\Users\shanid\Desktop\test\" + current_file_name
# responsible for downloading it using a path, get allows downloading, by source links
if download:
browser.get(download_path)
# while the file doesn't exist/ it hasn't been downloaded yet, do nothing
while True:
if os.path.exists(current_file_path):
break
time.sleep(1)
# moves the file from the download spot to its own folder
if move:
shutil.move(current_file_path, current_folder_path + "\" + current_file_name)
print(current_file_name)
# formatter
print("------------------------------", "", sep="\n")
time.sleep(3)
下面是原代码
from selenium import webdriver
import time
import os
import shutil
import re
path = r'https://coursevania.courses.workers.dev/[coursevania.com]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20+%20Algorithms/'
# For changing the download location for this browser temporarily
options = webdriver.ChromeOptions()
preferences = {"download.default_directory": r"E:\Utilities_and_Apps\Python\MY PROJECTS\Test data\Downloads", "safebrowsing.enabled": "false"}
options.add_experimental_option("prefs", preferences)
# Acquire the Course Link and Get all the directories
browser = webdriver.Chrome(chrome_options=options)
browser.get(r"https://coursevania.courses.workers.dev/[coursevania.com]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20+%20Algorithms/")
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
# loop for as many directories there are
for i in range(0, len(elements)):
# At each directory, it refreshes the page to update the webelements in the list, and returns the current directory that is being worked on
browser.get(path)
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
element = elements[i]
# checks if the folder for the directory already exists
current_directory_name = element.text[11:].strip(" .")
current_folder_path = "E:\Utilities_and_Apps\Python\MY PROJECTS\Test data\Downloads\" + current_directory_name
if os.path.exists(current_folder_path):
pass
else:
os.mkdir(current_folder_path)
# Formatting what has been downloaded and sorted, and
print(current_directory_name, "------------------------------", sep="\n")
# moves on to the directory to get the page with the files
element.click()
# pausing for a few secs for the page to load, and running the same mechanism to get each file using the same method used in directory
time.sleep(3)
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
for j in range(len(files)):
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
_file = files[j]
# constants for some if statements
download = True
move = True
current_file_name = _file.text[17:].strip()
# If file exists, then pass over it, and don't do anything, and moveon to next file
if os.path.exists(current_folder_path + "\" + current_file_name):
pass
# If it doesnt exist, then depending on its extension, do specific actions with it
else:
# Downloads the mp4 files by clicking on it, and finding the input tag which contains the download link for vid in its value attribute
if ".mp4" in current_file_name:
_file.click()
time.sleep(2)
download_path = browser.find_element_by_css_selector("input").get_attribute("value")
current_file_name = re.search(r'https://coursevania.courses.workers.dev/\[coursevania.com\]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20\+%20Algorithms/.+/(.+)', download_path, re.DOTALL).group(1)
# Checks if file exists again, incase the filename is different then the predicted filename orderly generated.
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
# returns to the previous page with the files
browser.back()
# self explanatory
elif ".html" in current_file_name:
download_path = path + current_directory_name + "/" + current_file_name
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
else:
# acquires the download location by going to the parent tag which is an a tag containing the link for html in its 'href' attribute
download_path = _file.find_element_by_xpath('..').get_attribute('href').replace(r"%5E", "^")
current_file_name = re.search(r'https://coursevania.courses.workers.dev/\[coursevania.com\]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20\+%20Algorithms/.+/(.+)', download_path, re.DOTALL).group(1).replace("%20", " ")
time.sleep(2)
current_file_path = "E:\Utilities_and_Apps\Python\MY PROJECTS\Test data\Downloads\" + current_file_name
# responsible for downloading it using a path, get allows downloading, by source links
if download:
browser.get(download_path)
# while the file doesn't exist/ it hasn't been downloaded yet, do nothing
while True:
if os.path.exists(current_file_path):
break
time.sleep(1)
# moves the file from the download spot to its own folder
if move:
shutil.move(current_file_path, current_folder_path + "\" + current_file_name)
print(current_file_name)
# formatter
print("------------------------------", "", sep="\n")
time.sleep(3)
这段代码工作正常
时无法正常工作
我使用的网站是原始网站的克隆
我不知道为什么会出错
问题出在下页输入框的 CSS 选择器上。
https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/01%20Course%20Introduction/1%20Course%20Introduction.mp4?a=view
页面上有2个输入框,所以CSS路径要写成"#content > div > div:nth-child(6) > input"
.
代码有问题。
download_path = browser.find_element_by_css_selector("input").get_attribute("value")
将替换为。
download_path = browser.find_element_by_css_selector("#content > div > div:nth-child(6) > input").get_attribute("value")
我将该代码中的 url 替换为我的 url 当我编译时得到如下所示的错误
第 66 行,在 current_file_name = re.search(r'https://player.hdflixcore.workers.dev//0://课程//账号%20Cracking%20--MrSihag//TN%20Cracking%20课程%20- -MrSihag/.+/(.+)', download_path, re.DOTALL).group(1) AttributeError: 'NoneType' 对象没有属性 'group'
我想我在代码中使用了网站地址
在“current_file_name”中有一些额外的字母,如反斜杠
我不知道
我试着通过添加一些反斜杠来做同样的事情,但没有修复
但是当我 运行 原始代码时它工作正常 当我在我想要的网站上使用它时,它最终会出现上面提到的错误
下面是我编辑的代码
from selenium import webdriver
import time
import os
import shutil
import re
path = r'https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/'
# For changing the download location for this browser temporarily
options = webdriver.ChromeOptions()
preferences = {"download.default_directory": r"C:\Users\shanid\Desktop\test", "safebrowsing.enabled": "false"}
options.add_experimental_option("prefs", preferences)
# Acquire the Course Link and Get all the directories
browser = webdriver.Chrome(chrome_options=options)
browser.get(r"https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/")
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
# loop for as many directories there are
for i in range(0, len(elements)):
print("deft")
# At each directory, it refreshes the page to update the webelements in the list, and returns the current directory that is being worked on
browser.get(path)
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
element = elements[i]
# checks if the folder for the directory already exists
current_directory_name = element.text[11:].strip(" .")
current_folder_path = "C:\Users\shanid\Desktop\test\" + current_directory_name
if os.path.exists(current_folder_path):
pass
else:
os.mkdir(current_folder_path)
# Formatting what has been downloaded and sorted, and
print(current_directory_name, "------------------------------", sep="\n")
# moves on to the directory to get the page with the files
element.click()
# pausing for a few secs for the page to load, and running the same mechanism to get each file using the same method used in directory
time.sleep(3)
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
for j in range(len(files)):
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
_file = files[j]
# constants for some if statements
download = True
move = True
current_file_name = _file.text[17:].strip()
# If file exists, then pass over it, and don't do anything, and moveon to next file
if os.path.exists(current_folder_path + "\" + current_file_name):
pass
# If it doesnt exist, then depending on its extension, do specific actions with it
else:
# Downloads the mp4 files by clicking on it, and finding the input tag which contains the download link for vid in its value attribute
if ".mp4" in current_file_name:
_file.click()
time.sleep(2)
download_path = browser.find_element_by_css_selector("input").get_attribute("value")
current_file_name = re.search(r'https://player.hdflixcore.workers.dev//0://Courses//Account%20Cracking%20--MrSihag//TN%20Cracking%20Course%20--MrSihag/.+/(.+)', download_path, re.DOTALL).group(1)
# Checks if file exists again, incase the filename is different then the predicted filename orderly generated.
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
# returns to the previous page with the files
browser.back()
# self explanatory
elif ".html" in current_file_name:
download_path = path + current_directory_name + "/" + current_file_name
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
else:
# acquires the download location by going to the parent tag which is an a tag containing the link for html in its 'href' attribute
download_path = _file.find_element_by_xpath('..').get_attribute('href').replace(r"%5E", "^")
current_file_name = re.search(r'https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/.+/(.+)', download_path, re.DOTALL).group(1).replace("%20", " ")
time.sleep(2)
current_file_path = "C:\Users\shanid\Desktop\test\" + current_file_name
# responsible for downloading it using a path, get allows downloading, by source links
if download:
browser.get(download_path)
# while the file doesn't exist/ it hasn't been downloaded yet, do nothing
while True:
if os.path.exists(current_file_path):
break
time.sleep(1)
# moves the file from the download spot to its own folder
if move:
shutil.move(current_file_path, current_folder_path + "\" + current_file_name)
print(current_file_name)
# formatter
print("------------------------------", "", sep="\n")
time.sleep(3)
下面是原代码
from selenium import webdriver
import time
import os
import shutil
import re
path = r'https://coursevania.courses.workers.dev/[coursevania.com]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20+%20Algorithms/'
# For changing the download location for this browser temporarily
options = webdriver.ChromeOptions()
preferences = {"download.default_directory": r"E:\Utilities_and_Apps\Python\MY PROJECTS\Test data\Downloads", "safebrowsing.enabled": "false"}
options.add_experimental_option("prefs", preferences)
# Acquire the Course Link and Get all the directories
browser = webdriver.Chrome(chrome_options=options)
browser.get(r"https://coursevania.courses.workers.dev/[coursevania.com]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20+%20Algorithms/")
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
# loop for as many directories there are
for i in range(0, len(elements)):
# At each directory, it refreshes the page to update the webelements in the list, and returns the current directory that is being worked on
browser.get(path)
time.sleep(2)
elements = browser.find_elements_by_css_selector(".mdui-text-truncate")
element = elements[i]
# checks if the folder for the directory already exists
current_directory_name = element.text[11:].strip(" .")
current_folder_path = "E:\Utilities_and_Apps\Python\MY PROJECTS\Test data\Downloads\" + current_directory_name
if os.path.exists(current_folder_path):
pass
else:
os.mkdir(current_folder_path)
# Formatting what has been downloaded and sorted, and
print(current_directory_name, "------------------------------", sep="\n")
# moves on to the directory to get the page with the files
element.click()
# pausing for a few secs for the page to load, and running the same mechanism to get each file using the same method used in directory
time.sleep(3)
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
for j in range(len(files)):
files = browser.find_elements_by_css_selector(".mdui-text-truncate")
_file = files[j]
# constants for some if statements
download = True
move = True
current_file_name = _file.text[17:].strip()
# If file exists, then pass over it, and don't do anything, and moveon to next file
if os.path.exists(current_folder_path + "\" + current_file_name):
pass
# If it doesnt exist, then depending on its extension, do specific actions with it
else:
# Downloads the mp4 files by clicking on it, and finding the input tag which contains the download link for vid in its value attribute
if ".mp4" in current_file_name:
_file.click()
time.sleep(2)
download_path = browser.find_element_by_css_selector("input").get_attribute("value")
current_file_name = re.search(r'https://coursevania.courses.workers.dev/\[coursevania.com\]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20\+%20Algorithms/.+/(.+)', download_path, re.DOTALL).group(1)
# Checks if file exists again, incase the filename is different then the predicted filename orderly generated.
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
# returns to the previous page with the files
browser.back()
# self explanatory
elif ".html" in current_file_name:
download_path = path + current_directory_name + "/" + current_file_name
if os.path.exists(current_folder_path + "\" + current_file_name):
move = False
download = False
else:
# acquires the download location by going to the parent tag which is an a tag containing the link for html in its 'href' attribute
download_path = _file.find_element_by_xpath('..').get_attribute('href').replace(r"%5E", "^")
current_file_name = re.search(r'https://coursevania.courses.workers.dev/\[coursevania.com\]%20Udemy%20-%20Master%20the%20Coding%20Interview%20Data%20Structures%20\+%20Algorithms/.+/(.+)', download_path, re.DOTALL).group(1).replace("%20", " ")
time.sleep(2)
current_file_path = "E:\Utilities_and_Apps\Python\MY PROJECTS\Test data\Downloads\" + current_file_name
# responsible for downloading it using a path, get allows downloading, by source links
if download:
browser.get(download_path)
# while the file doesn't exist/ it hasn't been downloaded yet, do nothing
while True:
if os.path.exists(current_file_path):
break
time.sleep(1)
# moves the file from the download spot to its own folder
if move:
shutil.move(current_file_path, current_folder_path + "\" + current_file_name)
print(current_file_name)
# formatter
print("------------------------------", "", sep="\n")
time.sleep(3)
这段代码工作正常
时无法正常工作我使用的网站是原始网站的克隆
我不知道为什么会出错
问题出在下页输入框的 CSS 选择器上。 https://player.hdflixcore.workers.dev/0:/Courses/Account%20Cracking%20--MrSihag/TN%20Cracking%20Course%20--MrSihag/01%20Course%20Introduction/1%20Course%20Introduction.mp4?a=view
页面上有2个输入框,所以CSS路径要写成"#content > div > div:nth-child(6) > input"
.
代码有问题。
download_path = browser.find_element_by_css_selector("input").get_attribute("value")
将替换为。
download_path = browser.find_element_by_css_selector("#content > div > div:nth-child(6) > input").get_attribute("value")