尝试使用 beautifulsoup 抓取 2 个标签并将它们放在同一个 csv 中
trying to scrape 2 tags using beautifulsoup and placing them in the same csv
我目前正在学习 python 并尝试通过学习其他代码来完成我自己的项目,所以在我学习的时候不要指责我。
我从 tickers.csv 中获取了一份股票清单并抓取了一个网站以获取部门和行业并将它们放在 stocks.csv
上
问题是我只能通过
将部门或行业(通过选择一个)纳入stocks.csv
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
我想同时完成部门和行业
这是完整的代码
# dependencies
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', index=False)
def get_soup(url):
return bs(requests.get(url).text, 'html.parser')
def get_sector(ticker):
soup = get_soup(LSE + ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
return {'ticker': ticker, 'sector': sector}
def get_industry(ticker):
soup1 = get_soup(LSE + ticker)
try:
industry = soup1.find('span', text='Industry').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
这里是 tickers.csv
ticker,
A
AA
AADI
AAIC
AAL
AAN
AAOI
AAON
AAP
AAPL
AAT
AAU
AAWW
AB
ABB
ABBV
ABC
ABCB
ABCL
ABEO
ABEV
ABG
ABIO
ABM
ABMD
ABNB
ABOS
ABR
ABSI
ABST
ABT
ABTX
ABUS
ACA
ACAD
ACB
ACC
ACCD
ACCO
ACEL
ACER
ACET
ACEV
ACGL
ACH
ACHC
ACHR
ACHV
ACI
ACIU
这是 stocks.csv 当我得到扇区时
ticker,sector
A,Healthcare
AA,Basic Materials
AADI,
AAIC,Services
AAL,Transportation
AAN,Services
AAOI,Technology
AAON,Capital Goods
AAP,Retail
AAPL,Technology
AAT,Financial
AAU,Basic Materials
AAWW,Transportation
AB,Financial
ABB,Consumer Discretionary
ABBV,Healthcare
ABC,Retail
ABCB,Financial
ABCL,Healthcare
ABEO,Healthcare
ABEV,Consumer Non Cyclical
ABG,Retail
ABIO,Healthcare
ABM,Services
ABMD,Healthcare
ABNB,Services
ABOS,Healthcare
ABR,Financial
ABSI,Healthcare
ABST,
ABT,Healthcare
ABTX,Financial
ABUS,Healthcare
ACA,Basic Materials
ACAD,Healthcare
ACB,
ACC,Financial
ACCD,Financial
ACCO,Basic Materials
ACEL,Services
ACER,Healthcare
ACET,Retail
ACEV,Technology
ACGL,Financial
ACH,Basic Materials
ACHC,Healthcare
ACHR,Capital Goods
ACHV,Healthcare
ACI,Energy
ACIU,
这里是stocks.csv当我得到行业
ticker,industry
A,Laboratory Analytical Instruments
AA,Aluminum
AADI,
AAIC,Real Estate Operations
AAL,Airline
AAN,Rental & Leasing
AAOI,Computer Networks
AAON,Industrial Machinery and Components
AAP,Automotive Aftermarket
AAPL,Computer Hardware
AAT,Real Estate Investment Trusts
AAU,Metal Mining
AAWW,Special Transportation Services
AB,Investment Services
ABB,Electric & Wiring Equipment
ABBV,Biotechnology & Pharmaceuticals
ABC,Pharmacy Services & Retail Drugstore
ABCB,Regional Banks
ABCL,Major Pharmaceutical Preparations
ABEO,Major Pharmaceutical Preparations
ABEV,Nonalcoholic Beverages
ABG,Automotive Aftermarket
ABIO,In Vitro & In Vivo Diagnostic Substances
ABM,Professional Services
ABMD,Medical Equipment & Supplies
ABNB,Real Estate Operations
ABOS,Biotechnology & Pharmaceuticals
ABR,Real Estate Investment Trusts
ABSI,Medical Laboratories
ABST,
ABT,Major Pharmaceutical Preparations
ABTX,Commercial Banks
ABUS,Major Pharmaceutical Preparations
ACA,Miscellaneous Fabricated Products
ACAD,Major Pharmaceutical Preparations
ACB,
ACC,Real Estate Investment Trusts
ACCD,Blank Checks
ACCO,Paper & Paper Products
ACEL,Casinos & Gaming
ACER,Major Pharmaceutical Preparations
ACET,Pharmacy Services & Retail Drugstore
ACEV,Semiconductors
ACGL,Property & Casualty Insurance
ACH,Aluminum
ACHC,Healthcare Facilities
ACHR,Aerospace & Defense
ACHV,In Vitro & In Vivo Diagnostic Substances
ACI,Coal Mining
ACIU,
只需将现有的两个函数合二为一,return 通过单个 soup 对象解析的结果
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', encoding='utf-8-sig', index=False)
def get_soup(url):
return bs(requests.get(url, headers = {'User-Agent':'Mozilla/5.0'}).text, 'html.parser')
def get_data(ticker):
soup = get_soup(LSE + ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
try:
industry = soup.find('span', text='Industry').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'sector': sector, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_data(ticker), get_stocks())))
我目前正在学习 python 并尝试通过学习其他代码来完成我自己的项目,所以在我学习的时候不要指责我。
我从 tickers.csv 中获取了一份股票清单并抓取了一个网站以获取部门和行业并将它们放在 stocks.csv
上问题是我只能通过
将部门或行业(通过选择一个)纳入stocks.csvif __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
我想同时完成部门和行业 这是完整的代码
# dependencies
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', index=False)
def get_soup(url):
return bs(requests.get(url).text, 'html.parser')
def get_sector(ticker):
soup = get_soup(LSE + ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
return {'ticker': ticker, 'sector': sector}
def get_industry(ticker):
soup1 = get_soup(LSE + ticker)
try:
industry = soup1.find('span', text='Industry').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
这里是 tickers.csv
ticker,
A
AA
AADI
AAIC
AAL
AAN
AAOI
AAON
AAP
AAPL
AAT
AAU
AAWW
AB
ABB
ABBV
ABC
ABCB
ABCL
ABEO
ABEV
ABG
ABIO
ABM
ABMD
ABNB
ABOS
ABR
ABSI
ABST
ABT
ABTX
ABUS
ACA
ACAD
ACB
ACC
ACCD
ACCO
ACEL
ACER
ACET
ACEV
ACGL
ACH
ACHC
ACHR
ACHV
ACI
ACIU
这是 stocks.csv 当我得到扇区时
ticker,sector
A,Healthcare
AA,Basic Materials
AADI,
AAIC,Services
AAL,Transportation
AAN,Services
AAOI,Technology
AAON,Capital Goods
AAP,Retail
AAPL,Technology
AAT,Financial
AAU,Basic Materials
AAWW,Transportation
AB,Financial
ABB,Consumer Discretionary
ABBV,Healthcare
ABC,Retail
ABCB,Financial
ABCL,Healthcare
ABEO,Healthcare
ABEV,Consumer Non Cyclical
ABG,Retail
ABIO,Healthcare
ABM,Services
ABMD,Healthcare
ABNB,Services
ABOS,Healthcare
ABR,Financial
ABSI,Healthcare
ABST,
ABT,Healthcare
ABTX,Financial
ABUS,Healthcare
ACA,Basic Materials
ACAD,Healthcare
ACB,
ACC,Financial
ACCD,Financial
ACCO,Basic Materials
ACEL,Services
ACER,Healthcare
ACET,Retail
ACEV,Technology
ACGL,Financial
ACH,Basic Materials
ACHC,Healthcare
ACHR,Capital Goods
ACHV,Healthcare
ACI,Energy
ACIU,
这里是stocks.csv当我得到行业
ticker,industry
A,Laboratory Analytical Instruments
AA,Aluminum
AADI,
AAIC,Real Estate Operations
AAL,Airline
AAN,Rental & Leasing
AAOI,Computer Networks
AAON,Industrial Machinery and Components
AAP,Automotive Aftermarket
AAPL,Computer Hardware
AAT,Real Estate Investment Trusts
AAU,Metal Mining
AAWW,Special Transportation Services
AB,Investment Services
ABB,Electric & Wiring Equipment
ABBV,Biotechnology & Pharmaceuticals
ABC,Pharmacy Services & Retail Drugstore
ABCB,Regional Banks
ABCL,Major Pharmaceutical Preparations
ABEO,Major Pharmaceutical Preparations
ABEV,Nonalcoholic Beverages
ABG,Automotive Aftermarket
ABIO,In Vitro & In Vivo Diagnostic Substances
ABM,Professional Services
ABMD,Medical Equipment & Supplies
ABNB,Real Estate Operations
ABOS,Biotechnology & Pharmaceuticals
ABR,Real Estate Investment Trusts
ABSI,Medical Laboratories
ABST,
ABT,Major Pharmaceutical Preparations
ABTX,Commercial Banks
ABUS,Major Pharmaceutical Preparations
ACA,Miscellaneous Fabricated Products
ACAD,Major Pharmaceutical Preparations
ACB,
ACC,Real Estate Investment Trusts
ACCD,Blank Checks
ACCO,Paper & Paper Products
ACEL,Casinos & Gaming
ACER,Major Pharmaceutical Preparations
ACET,Pharmacy Services & Retail Drugstore
ACEV,Semiconductors
ACGL,Property & Casualty Insurance
ACH,Aluminum
ACHC,Healthcare Facilities
ACHR,Aerospace & Defense
ACHV,In Vitro & In Vivo Diagnostic Substances
ACI,Coal Mining
ACIU,
只需将现有的两个函数合二为一,return 通过单个 soup 对象解析的结果
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', encoding='utf-8-sig', index=False)
def get_soup(url):
return bs(requests.get(url, headers = {'User-Agent':'Mozilla/5.0'}).text, 'html.parser')
def get_data(ticker):
soup = get_soup(LSE + ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
try:
industry = soup.find('span', text='Industry').find_next('a').text.replace('\n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'sector': sector, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_data(ticker), get_stocks())))