请求被抓取后数据丢失

Data missing after requests get scrape

我正在尝试从该页面抓取“已发行股票总数(百万)”:

https://www.nasdaq.com/market-activity/stocks/grtx/institutional-holdings

我有这个尝试执行此操作的脚本,但它没有显示数据 table,而是显示“数据当前不可用”。这是抓取的 HTML (output.html) 的截图[1].

import requests
from bs4 import BeautifulSoup 
import csv
import pandas as pd


headers = {
    "authority": "www.nasdaq.com",
    "method": "GET",
    "path": "/market-activity/stocks/grtx/institutional-holdings",
    "scheme": "https",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "accept-encoding": "gzip, deflate, br",
    "accept-language": "en-CA,en;q=0.9,ro-RO;q=0.8,ro;q=0.7,en-GB;q=0.6,en-US;q=0.5",
    "cache-control": "max-age=0",
    "dnt": "1",
    "if-modified-since": "Tue, 30 Jun 2020 19:43:05 GMT",
    "if-none-match": "1593546185",
    "sec-fetch-dest": "document",
    "sec-fetch-mode": "navigate",
    "sec-fetch-site": "none",
    "sec-fetch-user": "?1",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
}


url = "https://www.nasdaq.com/market-activity/stocks/grtx/institutional-holdings"

r= requests.get(url, headers=headers)
data=r.text
soup=BeautifulSoup(data)

html = soup.prettify("utf-8")
with open("output.html", "wb") as file:
    file.write(html)

[1]https://ibb.co/74Lb935

数据是动态加载的,因此requests不支持。但是,可以通过向网站 API:

发送 GET 请求来获取数据
https://api.nasdaq.com/api/company/GRTX/institutional-holdings?limit=15&type=TOTAL&sortColumn=marketValue&sortOrder=DESC

例如打印“总股数(百万)”的数据:

import requests


URL = "https://api.nasdaq.com/api/company/GRTX/institutional-holdings?limit=15&type=TOTAL&sortColumn=marketValue&sortOrder=DESC"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
}
response = requests.get(URL, headers=headers).json()

print(response["data"]["ownershipSummary"]["ShareoutstandingTotal"])

输出:

{'label': 'Total Shares Outstanding (millions)', 'value': '25'}

数据在 Python 字典 (dict) 中可用,您可以在其中访问 key/values:

>>> print(type(response))
<class 'dict'>
>>> print(response)
{'data': {'ownershipSummary': {'SharesOutstandingPCT': {'label': 'Institutional Ownership', 'value': '77.18 %'}, 'ShareoutstandingTotal': {'label': 'Total Shares Outstanding (millions)', 'value': '25'}, 'TotalHoldingsValue': {'label': 'Total Value of Holdings (millions)', 'value': '7'}}, 'activePositions': {'headers': {'positions': 'ACTIVE POSITIONS', 'holders': 'HOLDERS', 'shares': 'SHARES'}, 'rows': [{'positions': 'Increased Positions', 'holders': '27', 'shares': '1,787,961'}, {'positions': 'Decreased Positions', 'holders': '9', 'shares': '1,330,574'}, {'positions': 'Held Positions', 'holders': '16', 'shares': '16,158,250'}, {'positions': 'Total Institutional Shares', 'holders': '52', 'shares': '19,276,785'}]}, 'newSoldOutPositions': {'headers': {'positions': 'ACTIVE POSITIONS', 'holders': 'HOLDERS', 'shares': 'SHARES'}, 'rows': [{'positions': 'New Positions', 'holders': '7', 'shares': '472,469'}, {'positions': 'Sold Out Positions', 'holders': '1', 'shares': '825,118'}]}, 'holdingsTransactions': {'totalRecords': '52', 'institutionalHolders': '52 Institutional Holders', 'sharesHeld': '19,276,785 Total Shares Held', 'table': {'headers': {'ownerName': 'OWNER NAME', 'date': 'DATE', 'sharesHeld': 'SHARES HELD', 'sharesChange': 'CHANGE (SHARES)', 'sharesChangePCT': 'CHANGE (%)', 'marketValue': 'VALUE (IN 1,000S)'}, 'rows': [{'ownerName': 'NEA MANAGEMENT COMPANY, LLC', 'date': '12/31/2020', 'sharesHeld': '4,362,986', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',558', 'url': '/market-activity/institutional-portfolio/nea-management-company-llc-848050'}, {'ownerName': 'SOFINNOVA INVESTMENTS, INC.', 'date': '12/31/2020', 'sharesHeld': '3,083,712', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',132', 'url': '/market-activity/institutional-portfolio/sofinnova-investments-inc-955353'}, {'ownerName': 'NOVO HOLDINGS A/S', 'date': '12/31/2020', 'sharesHeld': '2,959,021', 'sharesChange': '-450,000', 'sharesChangePCT': '-13.2%', 'marketValue': ',116', 'url': '/market-activity/institutional-portfolio/novo-holdings-as-733654'}, {'ownerName': 'BLACKSTONE GROUP INC', 'date': '12/31/2020', 'sharesHeld': '1,678,984', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',684', 'url': '/market-activity/institutional-portfolio/blackstone-group-inc-738605'}, {'ownerName': 'ADAGE CAPITAL PARTNERS GP, L.L.C.', 'date': '12/31/2020', 'sharesHeld': '1,509,500', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',302', 'url': '/market-activity/institutional-portfolio/adage-capital-partners-gp-llc-95432'}, {'ownerName': 'TEKLA CAPITAL MANAGEMENT LLC', 'date': '12/31/2020', 'sharesHeld': '1,041,845', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',491', 'url': '/market-activity/institutional-portfolio/tekla-capital-management-llc-646528'}, {'ownerName': 'VR ADVISER, LLC', 'date': '12/31/2020', 'sharesHeld': '917,860', 'sharesChange': '866,207', 'sharesChangePCT': '1676.973%', 'marketValue': ',481', 'url': '/market-activity/institutional-portfolio/vr-adviser-llc-1117526'}, {'ownerName': 'ROCK SPRINGS CAPITAL MANAGEMENT LP', 'date': '12/31/2020', 'sharesHeld': '567,552', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',626', 'url': '/market-activity/institutional-portfolio/rock-springs-capital-management-lp-927624'}, {'ownerName': 'BLACKROCK INC.', 'date': '12/31/2020', 'sharesHeld': '536,965', 'sharesChange': '114,325', 'sharesChangePCT': '27.05%', 'marketValue': ',376', 'url': '/market-activity/institutional-portfolio/blackrock-inc-711679'}, {'ownerName': 'VANGUARD GROUP INC', 'date': '12/31/2020', 'sharesHeld': '501,882', 'sharesChange': '134,366', 'sharesChangePCT': '36.561%', 'marketValue': ',090', 'url': '/market-activity/institutional-portfolio/vanguard-group-inc-61322'}, {'ownerName': 'SPHERA FUNDS MANAGEMENT LTD.', 'date': '12/31/2020', 'sharesHeld': '467,662', 'sharesChange': '-1,583', 'sharesChangePCT': '-0.337%', 'marketValue': ',811', 'url': '/market-activity/institutional-portfolio/sphera-funds-management-ltd-833867'}, {'ownerName': 'PERCEPTIVE ADVISORS LLC', 'date': '12/31/2020', 'sharesHeld': '325,000', 'sharesChange': '0', 'sharesChangePCT': '0%', 'marketValue': ',649', 'url': '/market-activity/institutional-portfolio/perceptive-advisors-llc-401881'}, {'ownerName': 'FRAZIER MANAGEMENT LLC', 'date': '12/31/2020', 'sharesHeld': '241,000', 'sharesChange': '241,000', 'sharesChangePCT': 'New', 'marketValue': ',964', 'url': '/market-activity/institutional-portfolio/frazier-management-llc-36088'}, {'ownerName': 'CAAS CAPITAL MANAGEMENT LP', 'date': '12/31/2020', 'sharesHeld': '178,141', 'sharesChange': '178,141', 'sharesChangePCT': 'New', 'marketValue': ',452', 'url': '/market-activity/institutional-portfolio/caas-capital-management-lp-1117454'}, {'ownerName': 'STATE STREET CORP', 'date': '12/31/2020', 'sharesHeld': '121,141', 'sharesChange': '38,369', 'sharesChangePCT': '46.355%', 'marketValue': '7', 'url': '/market-activity/institutional-portfolio/state-street-corp-6697'}]}}}, 'message': None, 'status': {'rCode': 200, 'bCodeMessage': None, 'developerMessage': None}}