抓取纳斯达克网站
scraping the Nasdaq website
我是 运行 以下脚本,用于在特定时间集中抓取纳斯达克网站的公司列表。该脚本应该下载 DownLoad 文件夹中的文件,使用公司名称重命名并将其传输到目标文件夹。最后,它应该删除最初下载的文件并继续循环。
一切似乎都正常 - 第一个文件已下载、重命名并移动到目标文件,但是在进行第二次下载时 returns 出现此错误:
FileNotFoundError: File b'C:\Users\Filippo
Sebastio\Downloads\HistoricalQuotes.csv' does not exist
知道为什么吗?
from selenium import webdriver
import os
import pandas as pd
import time
import glob
def pull_nasdaq_data(tickers, save_path):
driver = webdriver.Chrome(executable_path=r'C:\Users\Filippo Sebastio\Desktop\chromedriver.exe')
for ticker in tickers:
site = 'http://www.nasdaq.com/symbol/' + ticker + '/historical'
driver.get(site)
# Choose 10 year data from a drop down
data_range = driver.find_element_by_name('ddlTimeFrame')
for option in data_range.find_elements_by_tag_name('option'):
if option.text == '18 months':
option.click()
break
time.sleep(5)
driver.find_element_by_id('lnkDownLoad').click()
time.sleep(5)
data = pd.read_csv(r'C:\Users\Filippo Sebastio\Downloads\HistoricalQuotes.csv')
data['company'] = ticker
file_loc = save_path + ticker + '.csv'
data.to_csv(file_loc, index=False)
os.chdir(r'C:\Users\Filippo Sebastio\Downloads')
for f in glob.glob("Historical*.csv"):
os.remove(f)
print("Downloaded: ", ticker)
time.sleep(5)
save_path = r'C:\Users\Filippo Sebastio\Desktop\Stock'
tickers = ['mmm', 'tesla', 'pcb']
pull_nasdaq_data(tickers, save_path)
如上所述,自动收报机是个问题。当代码没有下载时,您的下载目录中只剩下一个 HistoricalQuotes.csv,当它被删除时,没有任何东西可以替换它,它会抛出找不到文件的错误。我添加了一个我认为可能有用的下载目录。
def pull_nasdaq_data(tickers, save_path, download_dir):
driver = webdriver.Chrome()
for ticker in tickers:
site = 'http://www.nasdaq.com/symbol/' + ticker + '/historical'
driver.get(site)
# Choose 10 year data from a drop down
data_range = driver.find_element_by_name('ddlTimeFrame')
for option in data_range.find_elements_by_tag_name('option'):
if option.text == '18 months':
option.click()
break
time.sleep(5)
driver.find_element_by_id('lnkDownLoad').click()
time.sleep(1)
data = pd.read_csv(download_dir + 'HistoricalQuotes.csv')
data['company'] = ticker
file_loc = save_path + ticker + '.csv'
data.to_csv(file_loc, index=False)
os.remove(download_dir + 'HistoricalQuotes.csv')
print("Downloaded: ", ticker)
time.sleep(5)
save_path = '/Users/tetracycline/'
download_dir = '/Users/tetracycline/Downloads/'
tickers = ['mmm', 'tsla']
pull_nasdaq_data(tickers, save_path, download_dir)
我是 运行 以下脚本,用于在特定时间集中抓取纳斯达克网站的公司列表。该脚本应该下载 DownLoad 文件夹中的文件,使用公司名称重命名并将其传输到目标文件夹。最后,它应该删除最初下载的文件并继续循环。
一切似乎都正常 - 第一个文件已下载、重命名并移动到目标文件,但是在进行第二次下载时 returns 出现此错误:
FileNotFoundError: File b'C:\Users\Filippo Sebastio\Downloads\HistoricalQuotes.csv' does not exist
知道为什么吗?
from selenium import webdriver
import os
import pandas as pd
import time
import glob
def pull_nasdaq_data(tickers, save_path):
driver = webdriver.Chrome(executable_path=r'C:\Users\Filippo Sebastio\Desktop\chromedriver.exe')
for ticker in tickers:
site = 'http://www.nasdaq.com/symbol/' + ticker + '/historical'
driver.get(site)
# Choose 10 year data from a drop down
data_range = driver.find_element_by_name('ddlTimeFrame')
for option in data_range.find_elements_by_tag_name('option'):
if option.text == '18 months':
option.click()
break
time.sleep(5)
driver.find_element_by_id('lnkDownLoad').click()
time.sleep(5)
data = pd.read_csv(r'C:\Users\Filippo Sebastio\Downloads\HistoricalQuotes.csv')
data['company'] = ticker
file_loc = save_path + ticker + '.csv'
data.to_csv(file_loc, index=False)
os.chdir(r'C:\Users\Filippo Sebastio\Downloads')
for f in glob.glob("Historical*.csv"):
os.remove(f)
print("Downloaded: ", ticker)
time.sleep(5)
save_path = r'C:\Users\Filippo Sebastio\Desktop\Stock'
tickers = ['mmm', 'tesla', 'pcb']
pull_nasdaq_data(tickers, save_path)
如上所述,自动收报机是个问题。当代码没有下载时,您的下载目录中只剩下一个 HistoricalQuotes.csv,当它被删除时,没有任何东西可以替换它,它会抛出找不到文件的错误。我添加了一个我认为可能有用的下载目录。
def pull_nasdaq_data(tickers, save_path, download_dir):
driver = webdriver.Chrome()
for ticker in tickers:
site = 'http://www.nasdaq.com/symbol/' + ticker + '/historical'
driver.get(site)
# Choose 10 year data from a drop down
data_range = driver.find_element_by_name('ddlTimeFrame')
for option in data_range.find_elements_by_tag_name('option'):
if option.text == '18 months':
option.click()
break
time.sleep(5)
driver.find_element_by_id('lnkDownLoad').click()
time.sleep(1)
data = pd.read_csv(download_dir + 'HistoricalQuotes.csv')
data['company'] = ticker
file_loc = save_path + ticker + '.csv'
data.to_csv(file_loc, index=False)
os.remove(download_dir + 'HistoricalQuotes.csv')
print("Downloaded: ", ticker)
time.sleep(5)
save_path = '/Users/tetracycline/'
download_dir = '/Users/tetracycline/Downloads/'
tickers = ['mmm', 'tsla']
pull_nasdaq_data(tickers, save_path, download_dir)