AttributeError: 'NoneType' object has no attribute 'assign' | Dataframe Python using Pandas
AttributeError: 'NoneType' object has no attribute 'assign' | Dataframe Python using Pandas
我正在尝试使用网络抓取访问数据并使用 pandas 将其制作成数据框。使用以下代码,我已经能够获取数据框。我想将所有数据框与 append 组合成一个大数据框。
import requests
import re
import pandas as pd
from urllib.parse import unquote
from json import loads
from bs4 import BeautifulSoup
# Download URL
url = "https://riwayat-file-covid-19-dki-jakarta-jakartagis.hub.arcgis.com/"
req = requests.get(url)
# Get encoded JSON from HTML source
encoded_data = re.search("window\.__SITE=\"(.*)\"", req.text).groups()[0]
# Decode and load as dictionary
json_data = loads(unquote(encoded_data))
# Get the HTML source code for the links
html_src = json_data["site"]["data"]["values"]["layout"]["sections"][1]["rows"][0]["cards"][0]["component"]["settings"]["markdown"]
# Parse it using BeautifulSoup
soup = BeautifulSoup(html_src, 'html.parser')
# Get links
links = soup.find_all('a')
# For each link...
link_list = []
id_list = []
date_list = []
dataframe_csv = []
for link in links:
if "2021" in link.text:
link_list.append(link.text+" - "+link.attrs['href'])
link_list.remove("31 Januari 2021 Pukul 10.00 - https://drive.google.com/file/d/1vd1tToQbx3A420KMDA63aKviLjgGPJMd/view?usp=sharing")
for i in link_list:
id_list.append(i.split("/")[5])
date_list.append(i.split("/")[0][:-21])
for ID in id_list:
dataframe_csv.append("https://docs.google.com/spreadsheets/d/"+ID+"/export?format=csv")
我想通过循环合并我拥有的所有数据帧。对于每个循环,我想删除索引 0 行并添加一个新列,即日期。代码如下:
date_num = 0
df_total = pd.DataFrame()
for i in dataframe_csv:
df = pd.read_csv(i)
df = df.drop(index=df.index[0], axis=0, inplace=True)
df = df.assign(Date = date_list[date_num])
date_num += 1
df_total.append(df,ignore_index=True)
问题是,我收到这样的错误:
AttributeError Traceback (most recent call last)
<ipython-input-11-ef67f0a87a8e> in <module>
5 df = pd.read_csv(i)
6 df = df.drop(index=df.index[0], axis=0, inplace=True)
----> 7 df = df.assign(Date = date_list[date_num])
8
9 date_num += 1
AttributeError: 'NoneType' object has no attribute 'assign'
inplace=True
直接修改数据帧,所以要么删除它:
date_num = 0
df_total = pd.DataFrame()
for i in dataframe_csv:
df = pd.read_csv(i)
df = df.drop(index=df.index[0], axis=0)
df = df.assign(Date = date_list[date_num])
date_num += 1
df_total.append(df,ignore_index=True)
或者不分配回去:
date_num = 0
df_total = pd.DataFrame()
for i in dataframe_csv:
df = pd.read_csv(i)
df.drop(index=df.index[0], axis=0, inplace=True)
df = df.assign(Date = date_list[date_num])
date_num += 1
df_total.append(df,ignore_index=True)
如drop
的documentation所述:
inplace : bool, default False
If False, return a copy. Otherwise, do operation inplace and return None.
我正在尝试使用网络抓取访问数据并使用 pandas 将其制作成数据框。使用以下代码,我已经能够获取数据框。我想将所有数据框与 append 组合成一个大数据框。
import requests
import re
import pandas as pd
from urllib.parse import unquote
from json import loads
from bs4 import BeautifulSoup
# Download URL
url = "https://riwayat-file-covid-19-dki-jakarta-jakartagis.hub.arcgis.com/"
req = requests.get(url)
# Get encoded JSON from HTML source
encoded_data = re.search("window\.__SITE=\"(.*)\"", req.text).groups()[0]
# Decode and load as dictionary
json_data = loads(unquote(encoded_data))
# Get the HTML source code for the links
html_src = json_data["site"]["data"]["values"]["layout"]["sections"][1]["rows"][0]["cards"][0]["component"]["settings"]["markdown"]
# Parse it using BeautifulSoup
soup = BeautifulSoup(html_src, 'html.parser')
# Get links
links = soup.find_all('a')
# For each link...
link_list = []
id_list = []
date_list = []
dataframe_csv = []
for link in links:
if "2021" in link.text:
link_list.append(link.text+" - "+link.attrs['href'])
link_list.remove("31 Januari 2021 Pukul 10.00 - https://drive.google.com/file/d/1vd1tToQbx3A420KMDA63aKviLjgGPJMd/view?usp=sharing")
for i in link_list:
id_list.append(i.split("/")[5])
date_list.append(i.split("/")[0][:-21])
for ID in id_list:
dataframe_csv.append("https://docs.google.com/spreadsheets/d/"+ID+"/export?format=csv")
我想通过循环合并我拥有的所有数据帧。对于每个循环,我想删除索引 0 行并添加一个新列,即日期。代码如下:
date_num = 0
df_total = pd.DataFrame()
for i in dataframe_csv:
df = pd.read_csv(i)
df = df.drop(index=df.index[0], axis=0, inplace=True)
df = df.assign(Date = date_list[date_num])
date_num += 1
df_total.append(df,ignore_index=True)
问题是,我收到这样的错误:
AttributeError Traceback (most recent call last)
<ipython-input-11-ef67f0a87a8e> in <module>
5 df = pd.read_csv(i)
6 df = df.drop(index=df.index[0], axis=0, inplace=True)
----> 7 df = df.assign(Date = date_list[date_num])
8
9 date_num += 1
AttributeError: 'NoneType' object has no attribute 'assign'
inplace=True
直接修改数据帧,所以要么删除它:
date_num = 0
df_total = pd.DataFrame()
for i in dataframe_csv:
df = pd.read_csv(i)
df = df.drop(index=df.index[0], axis=0)
df = df.assign(Date = date_list[date_num])
date_num += 1
df_total.append(df,ignore_index=True)
或者不分配回去:
date_num = 0
df_total = pd.DataFrame()
for i in dataframe_csv:
df = pd.read_csv(i)
df.drop(index=df.index[0], axis=0, inplace=True)
df = df.assign(Date = date_list[date_num])
date_num += 1
df_total.append(df,ignore_index=True)
如drop
的documentation所述:
inplace : bool, default False
If False, return a copy. Otherwise, do operation inplace and return None.