从文本文件中读取 url
read urls from a text file
大家好,我想把更多内容 URL 放到一个文本文件中,然后使用下面的代码逐一阅读
我想从文本文件中打开 urls 链接,我想 运行 脚本从 10 个链接中提取数据,例如不仅从 1 个中提取数据 如何创建代码
非常感谢您的帮助
import requests
from bs4 import BeautifulSoup
import csv
final_data = []
url = "https://denver.craigslist.org/search/cto?purveyor-input=owner&postedToday=1"
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, "html.parser")
get_details = soup.find_all(class_="result-row")
for details in get_details:
getclass = details.find_all(class_="hdrlnk")
for link in getclass:
link1 = link.get("href")
sublist = []
sublist.append(link1)
final_data.append(sublist)
print(final_data)
filename = "link.txt"
with open("./"+filename, "w") as csvfile:
csvfile = csv.writer(csvfile, delimiter = ",")
csvfile.writerow("")
for i in range(0, len(final_data)):
csvfile.writerow(final_data[i])
如果你在新行中有每个 url 那么只需打开文件,阅读所有文本并在 \n
上拆分以获得行列表(没有(\n
))
with open('input.txt') as fh
text = fh.read()
all_links = text.split('\n')
或更短
with open('input.txt') as fh
all_links = fh.read().split('\n')
之后你必须使用 for
-loop to 运行 code for all urls
# - before loop -
final_data = []
# - loop -
for url in all_links:
# ... code ...
# - after loop -
print(final_data)
# ... write in csv ...
编辑:
import requests
from bs4 import BeautifulSoup
import csv
# - before loop -
#all_links = [
# "https://denver.craigslist.org/search/cto?purveyor-input=owner&postedToday=1",
#]
with open('input.txt') as fh:
all_links = fh.read().split('\n')
final_data = []
# - loop -
for url in all_links:
print('url:', url)
response = requests.get(url)
#print('[DEBUG] code:', response.status_code)
soup = BeautifulSoup(response.text, "html.parser")
all_rows = soup.find_all(class_="result-row")
for row in all_rows:
all_links = row.find_all(class_="hdrlnk")
for link in all_links:
href = link.get("href")
final_data.append( [href] )
print(' >', href)
print('----------')
# - after loop -
#print(final_data)
filename = "output.csv" # no need to add `./`
with open(filename, "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter=",")
csv_writer.writerow( ["links"] )
csv_writer.writerows( final_data ) # with `s` at the end
大家好,我想把更多内容 URL 放到一个文本文件中,然后使用下面的代码逐一阅读 我想从文本文件中打开 urls 链接,我想 运行 脚本从 10 个链接中提取数据,例如不仅从 1 个中提取数据 如何创建代码 非常感谢您的帮助
import requests
from bs4 import BeautifulSoup
import csv
final_data = []
url = "https://denver.craigslist.org/search/cto?purveyor-input=owner&postedToday=1"
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, "html.parser")
get_details = soup.find_all(class_="result-row")
for details in get_details:
getclass = details.find_all(class_="hdrlnk")
for link in getclass:
link1 = link.get("href")
sublist = []
sublist.append(link1)
final_data.append(sublist)
print(final_data)
filename = "link.txt"
with open("./"+filename, "w") as csvfile:
csvfile = csv.writer(csvfile, delimiter = ",")
csvfile.writerow("")
for i in range(0, len(final_data)):
csvfile.writerow(final_data[i])
如果你在新行中有每个 url 那么只需打开文件,阅读所有文本并在 \n
上拆分以获得行列表(没有(\n
))
with open('input.txt') as fh
text = fh.read()
all_links = text.split('\n')
或更短
with open('input.txt') as fh
all_links = fh.read().split('\n')
之后你必须使用 for
-loop to 运行 code for all urls
# - before loop -
final_data = []
# - loop -
for url in all_links:
# ... code ...
# - after loop -
print(final_data)
# ... write in csv ...
编辑:
import requests
from bs4 import BeautifulSoup
import csv
# - before loop -
#all_links = [
# "https://denver.craigslist.org/search/cto?purveyor-input=owner&postedToday=1",
#]
with open('input.txt') as fh:
all_links = fh.read().split('\n')
final_data = []
# - loop -
for url in all_links:
print('url:', url)
response = requests.get(url)
#print('[DEBUG] code:', response.status_code)
soup = BeautifulSoup(response.text, "html.parser")
all_rows = soup.find_all(class_="result-row")
for row in all_rows:
all_links = row.find_all(class_="hdrlnk")
for link in all_links:
href = link.get("href")
final_data.append( [href] )
print(' >', href)
print('----------')
# - after loop -
#print(final_data)
filename = "output.csv" # no need to add `./`
with open(filename, "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter=",")
csv_writer.writerow( ["links"] )
csv_writer.writerows( final_data ) # with `s` at the end