如何在 csv 中的每个单元格中递增数据以及一些问题[python]
How to increament data in every cell in csv and few questions[python]
我写了一段代码,可以从网页上抓取一些细节。我的问题是,每当我 运行 我的代码时,它都会打印如下输出:
|['Kapil Sarawagi' 'ksdesigngroup@gmail.com' '1412702594']| |['MA ARCHICTECTS PRIVATE LIMITED' 'studio@maarchitects.in' '1414299999']| |['Prabhu Dayal Kanojiya' 'prabhudayalkanojiya@yahoo.in' '9829055412']|
不过我希望它是这样的。
|['Kapil Sarawagi' 'ksdesigngroup@gmail.com' '1412702594']|
|['MA ARCHICTECTS PRIVATE LIMITED' 'studio@maarchitects.in' '1414299999']|
|['Prabhu Dayal Kanojiya' 'prabhudayalkanojiya@yahoo.in' '9829055412']|
就像在每个单元格中...我该怎么做?
第二个问题,如何让我的代码看起来专业?我的编码风格不好吗?我怎样才能让它更短?下面是我的代码:
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import csv
url = "http://www.rera-rajasthan.in/Home/ViewProject?id=JgMAAA"
html = urlopen(url)
soup = BeautifulSoup(html, "html.parser")
finaldata = []
data = soup.find_all("div", {"class":"panel-body"})
#filename = "Rajasthan.csv"
#f = open(filename, "r")
for i in data:# to get engineer
date = i.find_all("table", {"class":"table table-bordered"})
getname = date[21].find_all("td")
name = getname[1].text
email = getname[0].text
phone = getname[3].text
sublist = []
fname = [name, email, phone]
sublist.append(fname)
for i in data:# to extract architect
date = i.find_all("table", {"class":"table table-bordered"})
getname = date[20].find_all("td")
name = getname[1].text
email = getname[0].text
phone = getname[3].text
#sublist = []
fname = [name, email, phone]
sublist.append(fname)
for i in data:# to extract contractor
date = i.find_all("table", {"class":"table table-bordered"})
getname = date[19].find_all("td")
name = getname[1].text
email = getname[0].text
phone = getname[3].text
#sublist = []
fname = [name, email, phone]
sublist.append(fname)
finaldata.append(sublist)
with open("output.csv", "w")as csvfile:
writer = csv.writer(csvfile, delimiter=',',quotechar='|', lineterminator='\n')
for i in range(0, len(finaldata)):
writer.writerow(finaldata[i])
实现相同目标的更短代码:
import requests
from lxml import html
response = requests.get('http://www.rera-rajasthan.in/Home/ViewProject?id=JgMAAA')
tree = html.fromstring(response.content)
# Getting al <h3> tags with 'TableHeading' class
for heading in tree.xpath('//h3[@class="TableHeading"]'):
# Extracting <h3> heading name/text
heading_name = heading.xpath('text()')[0]
# Checking if <h3> heading name has one of these names
# We only want to get data from the table next to each one of them
if heading_name in ['CONTRACTOR', 'ARCHITECT', 'STRUCTURAL ENGINEER']:
# As each table heading has a table below (following-sibling) them
# We extract the data from that only table (table[1])
email, name, address, phone = heading.xpath('.//following-sibling::table[1]//tr/td/text()')
print [name, email, phone]
结果:
['Prabhu Dayal Kanojiya', 'prabhudayalkanojiya@yahoo.in', '9829055412']
['MA ARCHICTECTS PRIVATE LIMITED', 'studio@maarchitects.in', '1414299999']
['Kapil Sarawagi', 'ksdesigngroup@gmail.com', '1412702594']
我写了一段代码,可以从网页上抓取一些细节。我的问题是,每当我 运行 我的代码时,它都会打印如下输出:
|['Kapil Sarawagi' 'ksdesigngroup@gmail.com' '1412702594']| |['MA ARCHICTECTS PRIVATE LIMITED' 'studio@maarchitects.in' '1414299999']| |['Prabhu Dayal Kanojiya' 'prabhudayalkanojiya@yahoo.in' '9829055412']|
不过我希望它是这样的。
|['Kapil Sarawagi' 'ksdesigngroup@gmail.com' '1412702594']|
|['MA ARCHICTECTS PRIVATE LIMITED' 'studio@maarchitects.in' '1414299999']|
|['Prabhu Dayal Kanojiya' 'prabhudayalkanojiya@yahoo.in' '9829055412']|
就像在每个单元格中...我该怎么做?
第二个问题,如何让我的代码看起来专业?我的编码风格不好吗?我怎样才能让它更短?下面是我的代码:
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
import csv
url = "http://www.rera-rajasthan.in/Home/ViewProject?id=JgMAAA"
html = urlopen(url)
soup = BeautifulSoup(html, "html.parser")
finaldata = []
data = soup.find_all("div", {"class":"panel-body"})
#filename = "Rajasthan.csv"
#f = open(filename, "r")
for i in data:# to get engineer
date = i.find_all("table", {"class":"table table-bordered"})
getname = date[21].find_all("td")
name = getname[1].text
email = getname[0].text
phone = getname[3].text
sublist = []
fname = [name, email, phone]
sublist.append(fname)
for i in data:# to extract architect
date = i.find_all("table", {"class":"table table-bordered"})
getname = date[20].find_all("td")
name = getname[1].text
email = getname[0].text
phone = getname[3].text
#sublist = []
fname = [name, email, phone]
sublist.append(fname)
for i in data:# to extract contractor
date = i.find_all("table", {"class":"table table-bordered"})
getname = date[19].find_all("td")
name = getname[1].text
email = getname[0].text
phone = getname[3].text
#sublist = []
fname = [name, email, phone]
sublist.append(fname)
finaldata.append(sublist)
with open("output.csv", "w")as csvfile:
writer = csv.writer(csvfile, delimiter=',',quotechar='|', lineterminator='\n')
for i in range(0, len(finaldata)):
writer.writerow(finaldata[i])
实现相同目标的更短代码:
import requests
from lxml import html
response = requests.get('http://www.rera-rajasthan.in/Home/ViewProject?id=JgMAAA')
tree = html.fromstring(response.content)
# Getting al <h3> tags with 'TableHeading' class
for heading in tree.xpath('//h3[@class="TableHeading"]'):
# Extracting <h3> heading name/text
heading_name = heading.xpath('text()')[0]
# Checking if <h3> heading name has one of these names
# We only want to get data from the table next to each one of them
if heading_name in ['CONTRACTOR', 'ARCHITECT', 'STRUCTURAL ENGINEER']:
# As each table heading has a table below (following-sibling) them
# We extract the data from that only table (table[1])
email, name, address, phone = heading.xpath('.//following-sibling::table[1]//tr/td/text()')
print [name, email, phone]
结果:
['Prabhu Dayal Kanojiya', 'prabhudayalkanojiya@yahoo.in', '9829055412']
['MA ARCHICTECTS PRIVATE LIMITED', 'studio@maarchitects.in', '1414299999']
['Kapil Sarawagi', 'ksdesigngroup@gmail.com', '1412702594']