Python - 是否可以为提取的 .DOC 文件创建标题?
Python - Is there anyway to create a headline for the extracted .DOC file?
这是我的代码:
from selenium import webdriver
from bs4 import BeautifulSoup
from docx import Document
import pandas as pd
import requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.``3945.88 Safari/537.37"
url = "https://emetonline.org/events/past-events/"
data = requests.get(url, headers={"User-Agent": user_agent})
soup = BeautifulSoup(data.text, "lxml")
document = Document()
events = soup.find_all("div", class_="col-12")
for event in events:
event_name = event.find("h4")
try:
print(event_name.text)
document.add_paragraph(event_name.text, style='List Bullet')
except:
continue
print(event_name)
document.save('demo.docx')
我想要文档的标题为 Times New Roman - 14 字体大小。无论如何我可以做到吗?
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Pt
import requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.``3945.88 Safari/537.37"
url = "https://emetonline.org/events/past-events/"
data = requests.get(url, headers={"User-Agent": user_agent})
soup = BeautifulSoup(data.text, "lxml")
document = Document()
heading = document.add_heading().add_run("Past Events")
heading.font.name = "Times New Roman"
heading.font.size = Pt(14)
events = soup.find_all("div", class_="col-12")
for event in events:
event_name = event.find("h4")
try:
print(event_name.text)
document.add_paragraph(event_name.text, style='List Bullet')
except:
continue
print(event_name)
document.save('demo.docx')
我在顶部添加了额外的导入:
from docx.shared import Pt
这样我们就可以将字体大小设置为 14 磅。
然后我添加了成为标题的代码:
heading = document.add_heading().add_run("Past Events")
heading.font.name = "Times New Roman"
heading.font.size = Pt(14)
这是我的代码:
from selenium import webdriver
from bs4 import BeautifulSoup
from docx import Document
import pandas as pd
import requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.``3945.88 Safari/537.37"
url = "https://emetonline.org/events/past-events/"
data = requests.get(url, headers={"User-Agent": user_agent})
soup = BeautifulSoup(data.text, "lxml")
document = Document()
events = soup.find_all("div", class_="col-12")
for event in events:
event_name = event.find("h4")
try:
print(event_name.text)
document.add_paragraph(event_name.text, style='List Bullet')
except:
continue
print(event_name)
document.save('demo.docx')
我想要文档的标题为 Times New Roman - 14 字体大小。无论如何我可以做到吗?
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Pt
import requests
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.``3945.88 Safari/537.37"
url = "https://emetonline.org/events/past-events/"
data = requests.get(url, headers={"User-Agent": user_agent})
soup = BeautifulSoup(data.text, "lxml")
document = Document()
heading = document.add_heading().add_run("Past Events")
heading.font.name = "Times New Roman"
heading.font.size = Pt(14)
events = soup.find_all("div", class_="col-12")
for event in events:
event_name = event.find("h4")
try:
print(event_name.text)
document.add_paragraph(event_name.text, style='List Bullet')
except:
continue
print(event_name)
document.save('demo.docx')
我在顶部添加了额外的导入:
from docx.shared import Pt
这样我们就可以将字体大小设置为 14 磅。
然后我添加了成为标题的代码:
heading = document.add_heading().add_run("Past Events")
heading.font.name = "Times New Roman"
heading.font.size = Pt(14)