Python / MySQL / Selenium - 赋值前引用的局部变量
Python / MySQL / Selenium - Local variable referenced before assignment
我认为 MySQL 数据库存在问题,因为我有 运行 代码 没有 MySQL 代码和 Python/Selenium 工作正常。当 运行 代码的第一个迭代 for link in links:
是 运行 然后由于以下错误消息而中断:UnboundLocalError: local variable 'followers' referenced before assignment
谁能告诉我错误在哪里?我尝试了类似主题的几种解决方案,但均未成功。
输出:
Barbecue Records
Australia
https://d1fuks2cnuq5t9.cloudfront.net/i/2uQiBiM7jpSCowBXEOqfZdu9x9FCcTf9u7iZygAg.jpg
Tech House
House
Nu Disco
Techno
Deep House
Soundcloud N/A
https://www.facebook.com/barbecuerecords
https://www.beatport.com/label/barbecue-records/33621
https://www.barbecuerecords.com/
info@barbecuerecords.com
demo@barbecuerecords.com
https://labelsbase.net/barbecue-records
File "/Users/tom/Desktop/WebScraping/label_scraper.py", line 122, in <module>
scrapeLabels(country)
File "/Users/tom/Desktop/WebScraping/label_scraper.py", line 118, in scrapeLabels
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
UnboundLocalError: local variable 'followers' referenced before assignment
数据库代码:
import mysql.connector
db = mysql.connector.connect(
host='localhost',
user='root',
passwd='',
database='Labels'
)
mycursor = db.cursor()
mycursor.execute('''
CREATE TABLE labels (
title VARCHAR(30) NOT NULL PRIMARY KEY,
country VARCHAR(30) NOT NULL,
image BLOB,
genre VARCHAR(30),
sc_link VARCHAR(50),
sc_followers SMALLINT,
fb_link VARCHAR(50),
bp_link VARCHAR(50),
website VARCHAR(50),
gen_email VARCHAR(50),
demo_email VARCHAR(50),
labelbase VARCHAR(50)
)''')
Python/Selenium代码-
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import pandas as pd
import time
import string
import mysql.connector
PATH = '/Applications/chromedriver'
driver = webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 10)
links = []
countries = ['Australia','United+Kingdom','United+States','Spain','Portugal','France','Germany','Italy','Ireland']
db = mysql.connector.connect(
host='localhost',
user='root',
passwd='',
database='Labels')
mycursor = db.cursor()
def extractHrefs():
while True:
try:
time.sleep(2)
body = driver.find_element_by_xpath('/html/body/div[3]/div/div[2]')
cards = body.find_elements_by_class_name('label-card-logo-link')
for card in cards:
links.append(card.get_attribute('href'))
nextpage = driver.find_element_by_xpath('//a[text()="›"]')
driver.execute_script("arguments[0].click();", nextpage)
except:
break
def scrapeLabels(page):
url = 'https://labelsbase.net/?g=Techno%2C+Tech+House%2C+Deep+House%2C+House%2C+Progressive+House%2C+Electronica%2C+Breaks%2C+Downtempo&c={}'.format(page)
driver.get(url)
extractHrefs()
for link in links:
driver.get(link)
time.sleep(1)
# -- Title
try:
title = driver.find_element_by_class_name('label-name').text
print(title)
except:
print('Title N/A')
# -- Country
page = str(page).replace('+',' ')
print(page)
# -- Image
try:
image = driver.find_element_by_tag_name('img')
src = image.get_attribute('src')
print(src)
except:
print('Image N/A')
# -- Genres
try:
genres = driver.find_elements_by_css_selector("a[title*='More']")
for genre in genres:
genre = genre.text
print(genre)
except:
print('Genre N/A')
# -- Soundcloud link & followers
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-soundcloud fa-fw"]')
soundcloud = child.find_element_by_xpath('..').get_attribute('href')
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
print('Soundcloud Followers:',followers)
print(soundcloud)
except:
print('Soundcloud N/A')
# -- Facebook
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-facebook-official fa-fw"]')
facebook = child.find_element_by_xpath('..').get_attribute('href')
print(facebook)
except:
print('Facebook N/A')
# -- Beatport
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-cart-arrow-down fa-fw"]')
beatport = child.find_element_by_xpath('..').get_attribute('href')
print(beatport)
except:
print('Beatport N/A')
# -- Label Website
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-globe fa-fw"]')
website = child.find_element_by_xpath('..').get_attribute('href')
print(website)
except:
print('Label Website N/A')
# -- Emails
try:
email = driver.find_element_by_xpath("//a[contains(@href,'@')]").text
print(email)
try:
demo = driver.find_element_by_xpath("//a[contains(@href,'demo')]").text
print(demo)
except:
pass
except:
print('Email N/A')
# -- Labelbase Link
print(link)
sql = '''INSERT INTO labels (title, country, image, genre, sc_link, sc_followers, fb_link, bp_link, website, gen_email, demo_email, labelbase) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
mycursor.executemany(sql,values)
for country in countries:
scrapeLabels(country)
db.commit()
driver.quit()
如果您不在 try/except 子句中为变量“followers”赋值,会发生什么情况?
您的线路 values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
将不知道该怎么做,因为关注者没有价值。
我猜问题是:
在 for link in links:
循环内,在
内
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-soundcloud fa-fw"]')
soundcloud = child.find_element_by_xpath('..').get_attribute('href')
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
print('Soundcloud Followers:',followers)
print(soundcloud)
block,出问题了,所以抛出异常,没有到达
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
行,所以followers
变量还没有初始化。
但是在那之后你试图在
中使用这个变量
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
行,因此 Python 无法识别此变量,因为它仍未初始化。
UPD
为了避免这种情况,可能最简单的方法是在使用它们的块/范围的开头定义所有这些变量,并将它们初始化为空字符串。
不管怎样,这只是我的一个建议。如下:
def scrapeLabels(page):
url = 'https://labelsbase.net/?g=Techno%2C+Tech+House%2C+Deep+House%2C+House%2C+Progressive+House%2C+Electronica%2C+Breaks%2C+Downtempo&c={}'.format(page)
driver.get(url)
extractHrefs()
for link in links:
title = ""
page = ""
src = ""
genre = ""
soundcloud = ""
followers = ""
facebook = ""
beatport = ""
website = ""
email = ""
demo = ""
driver.get(link)
time.sleep(1)
# -- Title
try:
title = driver.find_element_by_class_name('label-name').text
print(title)
except:
print('Title N/A')
# -- Country
page = str(page).replace('+',' ')
print(page)
# -- Image
try:
image = driver.find_element_by_tag_name('img')
src = image.get_attribute('src')
print(src)
except:
print('Image N/A')
# -- Genres
try:
genres = driver.find_elements_by_css_selector("a[title*='More']")
for genre in genres:
genre = genre.text
print(genre)
except:
print('Genre N/A')
# -- Soundcloud link & followers
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-soundcloud fa-fw"]')
soundcloud = child.find_element_by_xpath('..').get_attribute('href')
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
print('Soundcloud Followers:',followers)
print(soundcloud)
except:
print('Soundcloud N/A')
# -- Facebook
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-facebook-official fa-fw"]')
facebook = child.find_element_by_xpath('..').get_attribute('href')
print(facebook)
except:
print('Facebook N/A')
# -- Beatport
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-cart-arrow-down fa-fw"]')
beatport = child.find_element_by_xpath('..').get_attribute('href')
print(beatport)
except:
print('Beatport N/A')
# -- Label Website
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-globe fa-fw"]')
website = child.find_element_by_xpath('..').get_attribute('href')
print(website)
except:
print('Label Website N/A')
# -- Emails
try:
email = driver.find_element_by_xpath("//a[contains(@href,'@')]").text
print(email)
try:
demo = driver.find_element_by_xpath("//a[contains(@href,'demo')]").text
print(demo)
except:
pass
except:
print('Email N/A')
# -- Labelbase Link
print(link)
sql = '''INSERT INTO labels (title, country, image, genre, sc_link, sc_followers, fb_link, bp_link, website, gen_email, demo_email, labelbase) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
mycursor.executemany(sql,values)
您在 try/except
块内分配 followers
,这可能会失败并且实际上不会创建 followers
变量,如打印 Soundcloud N/A
的输出所示。
我认为 MySQL 数据库存在问题,因为我有 运行 代码 没有 MySQL 代码和 Python/Selenium 工作正常。当 运行 代码的第一个迭代 for link in links:
是 运行 然后由于以下错误消息而中断:UnboundLocalError: local variable 'followers' referenced before assignment
谁能告诉我错误在哪里?我尝试了类似主题的几种解决方案,但均未成功。
输出:
Barbecue Records
Australia
https://d1fuks2cnuq5t9.cloudfront.net/i/2uQiBiM7jpSCowBXEOqfZdu9x9FCcTf9u7iZygAg.jpg
Tech House
House
Nu Disco
Techno
Deep House
Soundcloud N/A
https://www.facebook.com/barbecuerecords
https://www.beatport.com/label/barbecue-records/33621
https://www.barbecuerecords.com/
info@barbecuerecords.com
demo@barbecuerecords.com
https://labelsbase.net/barbecue-records
File "/Users/tom/Desktop/WebScraping/label_scraper.py", line 122, in <module>
scrapeLabels(country)
File "/Users/tom/Desktop/WebScraping/label_scraper.py", line 118, in scrapeLabels
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
UnboundLocalError: local variable 'followers' referenced before assignment
数据库代码:
import mysql.connector
db = mysql.connector.connect(
host='localhost',
user='root',
passwd='',
database='Labels'
)
mycursor = db.cursor()
mycursor.execute('''
CREATE TABLE labels (
title VARCHAR(30) NOT NULL PRIMARY KEY,
country VARCHAR(30) NOT NULL,
image BLOB,
genre VARCHAR(30),
sc_link VARCHAR(50),
sc_followers SMALLINT,
fb_link VARCHAR(50),
bp_link VARCHAR(50),
website VARCHAR(50),
gen_email VARCHAR(50),
demo_email VARCHAR(50),
labelbase VARCHAR(50)
)''')
Python/Selenium代码-
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import pandas as pd
import time
import string
import mysql.connector
PATH = '/Applications/chromedriver'
driver = webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 10)
links = []
countries = ['Australia','United+Kingdom','United+States','Spain','Portugal','France','Germany','Italy','Ireland']
db = mysql.connector.connect(
host='localhost',
user='root',
passwd='',
database='Labels')
mycursor = db.cursor()
def extractHrefs():
while True:
try:
time.sleep(2)
body = driver.find_element_by_xpath('/html/body/div[3]/div/div[2]')
cards = body.find_elements_by_class_name('label-card-logo-link')
for card in cards:
links.append(card.get_attribute('href'))
nextpage = driver.find_element_by_xpath('//a[text()="›"]')
driver.execute_script("arguments[0].click();", nextpage)
except:
break
def scrapeLabels(page):
url = 'https://labelsbase.net/?g=Techno%2C+Tech+House%2C+Deep+House%2C+House%2C+Progressive+House%2C+Electronica%2C+Breaks%2C+Downtempo&c={}'.format(page)
driver.get(url)
extractHrefs()
for link in links:
driver.get(link)
time.sleep(1)
# -- Title
try:
title = driver.find_element_by_class_name('label-name').text
print(title)
except:
print('Title N/A')
# -- Country
page = str(page).replace('+',' ')
print(page)
# -- Image
try:
image = driver.find_element_by_tag_name('img')
src = image.get_attribute('src')
print(src)
except:
print('Image N/A')
# -- Genres
try:
genres = driver.find_elements_by_css_selector("a[title*='More']")
for genre in genres:
genre = genre.text
print(genre)
except:
print('Genre N/A')
# -- Soundcloud link & followers
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-soundcloud fa-fw"]')
soundcloud = child.find_element_by_xpath('..').get_attribute('href')
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
print('Soundcloud Followers:',followers)
print(soundcloud)
except:
print('Soundcloud N/A')
# -- Facebook
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-facebook-official fa-fw"]')
facebook = child.find_element_by_xpath('..').get_attribute('href')
print(facebook)
except:
print('Facebook N/A')
# -- Beatport
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-cart-arrow-down fa-fw"]')
beatport = child.find_element_by_xpath('..').get_attribute('href')
print(beatport)
except:
print('Beatport N/A')
# -- Label Website
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-globe fa-fw"]')
website = child.find_element_by_xpath('..').get_attribute('href')
print(website)
except:
print('Label Website N/A')
# -- Emails
try:
email = driver.find_element_by_xpath("//a[contains(@href,'@')]").text
print(email)
try:
demo = driver.find_element_by_xpath("//a[contains(@href,'demo')]").text
print(demo)
except:
pass
except:
print('Email N/A')
# -- Labelbase Link
print(link)
sql = '''INSERT INTO labels (title, country, image, genre, sc_link, sc_followers, fb_link, bp_link, website, gen_email, demo_email, labelbase) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
mycursor.executemany(sql,values)
for country in countries:
scrapeLabels(country)
db.commit()
driver.quit()
如果您不在 try/except 子句中为变量“followers”赋值,会发生什么情况?
您的线路 values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
将不知道该怎么做,因为关注者没有价值。
我猜问题是:
在 for link in links:
循环内,在
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-soundcloud fa-fw"]')
soundcloud = child.find_element_by_xpath('..').get_attribute('href')
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
print('Soundcloud Followers:',followers)
print(soundcloud)
block,出问题了,所以抛出异常,没有到达
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
行,所以followers
变量还没有初始化。
但是在那之后你试图在
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
行,因此 Python 无法识别此变量,因为它仍未初始化。
UPD
为了避免这种情况,可能最简单的方法是在使用它们的块/范围的开头定义所有这些变量,并将它们初始化为空字符串。
不管怎样,这只是我的一个建议。如下:
def scrapeLabels(page):
url = 'https://labelsbase.net/?g=Techno%2C+Tech+House%2C+Deep+House%2C+House%2C+Progressive+House%2C+Electronica%2C+Breaks%2C+Downtempo&c={}'.format(page)
driver.get(url)
extractHrefs()
for link in links:
title = ""
page = ""
src = ""
genre = ""
soundcloud = ""
followers = ""
facebook = ""
beatport = ""
website = ""
email = ""
demo = ""
driver.get(link)
time.sleep(1)
# -- Title
try:
title = driver.find_element_by_class_name('label-name').text
print(title)
except:
print('Title N/A')
# -- Country
page = str(page).replace('+',' ')
print(page)
# -- Image
try:
image = driver.find_element_by_tag_name('img')
src = image.get_attribute('src')
print(src)
except:
print('Image N/A')
# -- Genres
try:
genres = driver.find_elements_by_css_selector("a[title*='More']")
for genre in genres:
genre = genre.text
print(genre)
except:
print('Genre N/A')
# -- Soundcloud link & followers
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-soundcloud fa-fw"]')
soundcloud = child.find_element_by_xpath('..').get_attribute('href')
followers = soundcloud.find_element_by_xpath('./following-sibling::span').text
print('Soundcloud Followers:',followers)
print(soundcloud)
except:
print('Soundcloud N/A')
# -- Facebook
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-facebook-official fa-fw"]')
facebook = child.find_element_by_xpath('..').get_attribute('href')
print(facebook)
except:
print('Facebook N/A')
# -- Beatport
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-cart-arrow-down fa-fw"]')
beatport = child.find_element_by_xpath('..').get_attribute('href')
print(beatport)
except:
print('Beatport N/A')
# -- Label Website
try:
child = driver.find_element_by_xpath('//i[@class="fa fa-globe fa-fw"]')
website = child.find_element_by_xpath('..').get_attribute('href')
print(website)
except:
print('Label Website N/A')
# -- Emails
try:
email = driver.find_element_by_xpath("//a[contains(@href,'@')]").text
print(email)
try:
demo = driver.find_element_by_xpath("//a[contains(@href,'demo')]").text
print(demo)
except:
pass
except:
print('Email N/A')
# -- Labelbase Link
print(link)
sql = '''INSERT INTO labels (title, country, image, genre, sc_link, sc_followers, fb_link, bp_link, website, gen_email, demo_email, labelbase) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
values = (title, page, src, genre, soundcloud, followers, facebook, beatport, website, email, demo, link)
mycursor.executemany(sql,values)
您在 try/except
块内分配 followers
,这可能会失败并且实际上不会创建 followers
变量,如打印 Soundcloud N/A
的输出所示。