如何将我的 main() 函数输出为 CSV 文件
How to output my main() function as a CSV file
我正在尝试将我的主要功能输出为 CSV 文件。我试过返回我的数据框、打印我的数据框等,但似乎无法正常工作。任何人都可以阐明一下吗?谢谢。
CSV 输出代码:
if __name__ == '__main__':
with open(f'{today}HPD.csv', 'w') as fp:
a = csv.writer(fp, delimiter = ',')
a.writerows(main())
更新的 CSV 输出代码: Click for CSV output image
我使用下面用户提供的答案进行了更新,但我抓取的西里尔文本返回替换块 ����
if __name__ == '__main__':
df = main()
df.to_csv(f'{today}HPD.csv', encoding='cp1251', errors='ignore', index=False)
完整代码:
import requests
from bs4 import BeautifulSoup as BS
from datetime import datetime
import pandas as pd
import re
import csv
today = datetime.today().strftime('%y%m%d ')
def main():
page = 0
name = []
date = []
address = []
district = []
city = []
price = []
area_sqm = []
rooms = []
floor = []
commission_year = []
building_floors = []
garage = []
balcony = []
windows = []
window_type = []
floor_type = []
door_type = []
leasing = []
description = []
link = []
BASE = 'https://www.unegui.mn'
URL = f'{BASE}/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/5-r/?page='
COLUMNS=['Name','Date','Address','District','City','Price','Area_sqm','Rooms','Floor','Commission_year',
'Building_floors','Garage', 'Balcony','Windows','Window_type','Floor_type','door_type','Leasing','Description','Link']
with requests.Session() as session:
while True:
(r := session.get(f'{URL}{page+1}')).raise_for_status()
m = re.search('.*page=(\d+)$', r.url)
if m and int(m.group(1)) == page:
break
page += 1
print(f'Scrapping page {page}')
soup = BS(r.text, 'lxml')
for tag in soup.findAll('div', class_='list-announcement-block'):
_name = tag.find('a', attrs={'itemprop': 'name'})
name.append(_name.get('content', 'N/A'))
if (_link := _name.get('href', None)):
link.append(f'{BASE}{_link}')
(_r := session.get(link[-1])).raise_for_status()
_spanlist = BS(_r.text, 'lxml').find_all('span', class_='value-chars')
floor_type.append(_spanlist[0])
balcony.append(_spanlist[1])
garage.append(_spanlist[2])
window_type.append(_spanlist[3])
door_type.append(_spanlist[4])
windows.append(_spanlist[5])
_alist = BS(_r.text, 'lxml').find_all('a', class_='value-chars')
commission_year.append(_alist[0])
building_floors.append(_alist[1])
area_sqm.append(_alist[2])
floor.append(_alist[3])
leasing.append(_alist[4])
district.append(_alist[5])
address.append(_alist[6])
commission_year.append(_spanlist[2])
rooms.append(tag.find('div', attrs={'announcement-block__breadcrumbs'}).get_text().split('»')[1].strip())
description.append(tag.find('div', class_='announcement-block__description').get_text().strip())
date.append(tag.find('div', class_='announcement-block__date').get_text().split(',')[0].strip())
city.append((tag.find('meta', attrs={'itemprop': 'areaServed'})).get('content'))
if (_price := tag.find('meta', attrs={'itemprop': 'price'})) is None:
_price = tag.find('div', class_='announcement-block__price _premium')
price.append(_price.get_text().strip() if _price else 'N/A')
df = pd.DataFrame(zip(name, date, address, district, city,
price, area_sqm, rooms, floor, commission_year,
building_floors, garage, balcony, windows, window_type,
floor_type, door_type, leasing, description, link), columns=COLUMNS)
print(df)
if __name__ == '__main__':
with open(f'{today}HPD.csv', 'w') as fp:
a = csv.writer(fp, delimiter = ',')
a.writerows(main())
Return df
来自您的 main
函数,而不是 print(df)
。
您还可以在数据帧上使用 to_csv 函数。
def main()
...
return df
if __name__ == '__main__':
df = main()
df.to_csv(f'{today}HPD.csv', index=False)
我正在尝试将我的主要功能输出为 CSV 文件。我试过返回我的数据框、打印我的数据框等,但似乎无法正常工作。任何人都可以阐明一下吗?谢谢。
CSV 输出代码:
if __name__ == '__main__':
with open(f'{today}HPD.csv', 'w') as fp:
a = csv.writer(fp, delimiter = ',')
a.writerows(main())
更新的 CSV 输出代码: Click for CSV output image
我使用下面用户提供的答案进行了更新,但我抓取的西里尔文本返回替换块 ����
if __name__ == '__main__':
df = main()
df.to_csv(f'{today}HPD.csv', encoding='cp1251', errors='ignore', index=False)
完整代码:
import requests
from bs4 import BeautifulSoup as BS
from datetime import datetime
import pandas as pd
import re
import csv
today = datetime.today().strftime('%y%m%d ')
def main():
page = 0
name = []
date = []
address = []
district = []
city = []
price = []
area_sqm = []
rooms = []
floor = []
commission_year = []
building_floors = []
garage = []
balcony = []
windows = []
window_type = []
floor_type = []
door_type = []
leasing = []
description = []
link = []
BASE = 'https://www.unegui.mn'
URL = f'{BASE}/l-hdlh/l-hdlh-zarna/oron-suuts-zarna/5-r/?page='
COLUMNS=['Name','Date','Address','District','City','Price','Area_sqm','Rooms','Floor','Commission_year',
'Building_floors','Garage', 'Balcony','Windows','Window_type','Floor_type','door_type','Leasing','Description','Link']
with requests.Session() as session:
while True:
(r := session.get(f'{URL}{page+1}')).raise_for_status()
m = re.search('.*page=(\d+)$', r.url)
if m and int(m.group(1)) == page:
break
page += 1
print(f'Scrapping page {page}')
soup = BS(r.text, 'lxml')
for tag in soup.findAll('div', class_='list-announcement-block'):
_name = tag.find('a', attrs={'itemprop': 'name'})
name.append(_name.get('content', 'N/A'))
if (_link := _name.get('href', None)):
link.append(f'{BASE}{_link}')
(_r := session.get(link[-1])).raise_for_status()
_spanlist = BS(_r.text, 'lxml').find_all('span', class_='value-chars')
floor_type.append(_spanlist[0])
balcony.append(_spanlist[1])
garage.append(_spanlist[2])
window_type.append(_spanlist[3])
door_type.append(_spanlist[4])
windows.append(_spanlist[5])
_alist = BS(_r.text, 'lxml').find_all('a', class_='value-chars')
commission_year.append(_alist[0])
building_floors.append(_alist[1])
area_sqm.append(_alist[2])
floor.append(_alist[3])
leasing.append(_alist[4])
district.append(_alist[5])
address.append(_alist[6])
commission_year.append(_spanlist[2])
rooms.append(tag.find('div', attrs={'announcement-block__breadcrumbs'}).get_text().split('»')[1].strip())
description.append(tag.find('div', class_='announcement-block__description').get_text().strip())
date.append(tag.find('div', class_='announcement-block__date').get_text().split(',')[0].strip())
city.append((tag.find('meta', attrs={'itemprop': 'areaServed'})).get('content'))
if (_price := tag.find('meta', attrs={'itemprop': 'price'})) is None:
_price = tag.find('div', class_='announcement-block__price _premium')
price.append(_price.get_text().strip() if _price else 'N/A')
df = pd.DataFrame(zip(name, date, address, district, city,
price, area_sqm, rooms, floor, commission_year,
building_floors, garage, balcony, windows, window_type,
floor_type, door_type, leasing, description, link), columns=COLUMNS)
print(df)
if __name__ == '__main__':
with open(f'{today}HPD.csv', 'w') as fp:
a = csv.writer(fp, delimiter = ',')
a.writerows(main())
Return df
来自您的 main
函数,而不是 print(df)
。
您还可以在数据帧上使用 to_csv 函数。
def main()
...
return df
if __name__ == '__main__':
df = main()
df.to_csv(f'{today}HPD.csv', index=False)