python-docx 将粗体和非粗体字符串添加到 table 中的同一单元格
python-docx adding bold and non-bold strings to same cell in table
我正在使用 python-docx 创建一个文档,其中 table 我想从文本数据中填充。我的文本如下所示:
01:02:10.3
a: Lorem ipsum dolor sit amet,
b: consectetur adipiscing elit.
a: Mauris a turpis erat.
01:02:20.4
a: Vivamus dignissim aliquam
b: Nam ultricies
(etc.)
我需要像这样 table 组织它(使用 ASCII 进行可视化):
+---+--------------------+---------------------------------+
| | A | B |
+---+--------------------+---------------------------------+
| 1 | 01:02:10.3 | a: Lorem ipsum dolor sit amet, |
| 2 | | b: consectetur adipiscing elit. |
| 3 | | a: Mauris a turpis erat. |
| 4 | ------------------ | ------------------------------- |
| 5 | 01:02:20.4 | a: Vivamus dignissim aliqua |
| 6 | | b: Nam ultricies |
+---+--------------------+---------------------------------+
但是,我需要将“a:”之后的所有内容都加粗,而“b:”之后的所有内容都不是,而它们都占据同一个单元格 .按照我想要的方式迭代和组织它非常容易,但我真的不确定如何只将某些行设为粗体:
IS_BOLD = {
'a': True
'b': False
}
row_cells = table.add_row().cells
for line in lines:
if is_timestamp(line): # function that uses regex to discern between columns
if row_cells[1]:
row_cells = table.add_row().cells
row_cells[0].text = line
else
row_cells[1].text += line
if IS_BOLD[ line.split(":")[0] ]:
# make only this line within the cell bold, somehow.
(这是一种伪代码,我正在做一些更多的文本处理,但这在这里有点无关紧要)。我找到了一个 ,其中有人使用了一种叫做 run
的东西,但我发现很难理解如何将它应用到我的案例中。
有什么帮助吗?
谢谢。
您需要在单元格的段落中添加 run
。这样您就可以控制要加粗的特定文本
完整示例:
from docx import Document
from docx.shared import Inches
import os
import re
def is_timestamp(line):
# it's flaky, I saw you have your own method and probably you did a better job parsing this.
return re.match(r'^\d{2}:\d{2}:\d{2}', line) is not None
def parse_raw_script(raw_script):
current_timestamp = ''
current_content = ''
for line in raw_script.splitlines():
line = line.strip()
if is_timestamp(line):
if current_timestamp:
yield {
'timestamp': current_timestamp,
'content': current_content
}
current_timestamp = line
current_content = ''
continue
if current_content:
current_content += '\n'
current_content += line
if current_timestamp:
yield {
'timestamp': current_timestamp,
'content': current_content
}
def should_bold(line):
# i leave it to you to replace with your logic
return line.startswith('a:')
def load_raw_script():
# I placed here the example from your question. read from file instead I presume
return '''01:02:10.3
a: Lorem ipsum dolor sit amet,
b: consectetur adipiscing elit.
a: Mauris a turpis erat.
01:02:20.4
a: Vivamus dignissim aliquam
b: Nam ultricies'''
def convert_raw_script_to_docx(raw_script, output_file_path):
document = Document()
table = document.add_table(rows=1, cols=3, style="Table Grid")
# add header row
header_row = table.rows[0]
header_row.cells[0].text = ''
header_row.cells[1].text = 'A'
header_row.cells[2].text = 'B'
# parse the raw script into something iterable
script_rows = parse_raw_script(raw_script)
# create a row for each timestamp row
for script_row in script_rows:
timestamp = script_row['timestamp']
content = script_row['content']
row = table.add_row()
timestamp_cell = row.cells[1]
timestamp_cell.text = timestamp
content_cell = row.cells[2]
content_paragraph = content_cell.paragraphs[0] # using the cell's default paragraph here instead of creating one
for line in content.splitlines():
run = content_paragraph.add_run(line)
if should_bold(line):
run.bold = True
run.add_break()
# resize table columns (optional)
for row in table.rows:
row.cells[0].width = Inches(0.2)
row.cells[1].width = Inches(1.9)
row.cells[2].width = Inches(3.9)
document.save(output_file_path)
def main():
script_dir = os.path.dirname(__file__)
dist_dir = os.path.join(script_dir, 'dist')
if not os.path.isdir(dist_dir):
os.makedirs(dist_dir)
output_file_path = os.path.join(dist_dir, 'so-template.docx')
raw_script = load_raw_script()
convert_raw_script_to_docx(raw_script, output_file_path)
if __name__ == '__main__':
main()
结果(文件应该在./dist/so-template.docx
):
顺便说一句 - 如果您更喜欢坚持使用自己的示例,则需要更改以下内容:
IS_BOLD = {
'a': True,
'b': False
}
row_cells = table.add_row().cells
for line in lines:
if is_timestamp(line):
if row_cells[1]:
row_cells = table.add_row().cells
row_cells[0].text = line
else:
run = row_cells[1].paragraphs[0].add_run(line)
if IS_BOLD[line.split(":")[0]]:
run.bold = True
run.add_break()
我正在使用 python-docx 创建一个文档,其中 table 我想从文本数据中填充。我的文本如下所示:
01:02:10.3
a: Lorem ipsum dolor sit amet,
b: consectetur adipiscing elit.
a: Mauris a turpis erat.
01:02:20.4
a: Vivamus dignissim aliquam
b: Nam ultricies
(etc.)
我需要像这样 table 组织它(使用 ASCII 进行可视化):
+---+--------------------+---------------------------------+
| | A | B |
+---+--------------------+---------------------------------+
| 1 | 01:02:10.3 | a: Lorem ipsum dolor sit amet, |
| 2 | | b: consectetur adipiscing elit. |
| 3 | | a: Mauris a turpis erat. |
| 4 | ------------------ | ------------------------------- |
| 5 | 01:02:20.4 | a: Vivamus dignissim aliqua |
| 6 | | b: Nam ultricies |
+---+--------------------+---------------------------------+
但是,我需要将“a:”之后的所有内容都加粗,而“b:”之后的所有内容都不是,而它们都占据同一个单元格 .按照我想要的方式迭代和组织它非常容易,但我真的不确定如何只将某些行设为粗体:
IS_BOLD = {
'a': True
'b': False
}
row_cells = table.add_row().cells
for line in lines:
if is_timestamp(line): # function that uses regex to discern between columns
if row_cells[1]:
row_cells = table.add_row().cells
row_cells[0].text = line
else
row_cells[1].text += line
if IS_BOLD[ line.split(":")[0] ]:
# make only this line within the cell bold, somehow.
(这是一种伪代码,我正在做一些更多的文本处理,但这在这里有点无关紧要)。我找到了一个 run
的东西,但我发现很难理解如何将它应用到我的案例中。
有什么帮助吗? 谢谢。
您需要在单元格的段落中添加 run
。这样您就可以控制要加粗的特定文本
完整示例:
from docx import Document
from docx.shared import Inches
import os
import re
def is_timestamp(line):
# it's flaky, I saw you have your own method and probably you did a better job parsing this.
return re.match(r'^\d{2}:\d{2}:\d{2}', line) is not None
def parse_raw_script(raw_script):
current_timestamp = ''
current_content = ''
for line in raw_script.splitlines():
line = line.strip()
if is_timestamp(line):
if current_timestamp:
yield {
'timestamp': current_timestamp,
'content': current_content
}
current_timestamp = line
current_content = ''
continue
if current_content:
current_content += '\n'
current_content += line
if current_timestamp:
yield {
'timestamp': current_timestamp,
'content': current_content
}
def should_bold(line):
# i leave it to you to replace with your logic
return line.startswith('a:')
def load_raw_script():
# I placed here the example from your question. read from file instead I presume
return '''01:02:10.3
a: Lorem ipsum dolor sit amet,
b: consectetur adipiscing elit.
a: Mauris a turpis erat.
01:02:20.4
a: Vivamus dignissim aliquam
b: Nam ultricies'''
def convert_raw_script_to_docx(raw_script, output_file_path):
document = Document()
table = document.add_table(rows=1, cols=3, style="Table Grid")
# add header row
header_row = table.rows[0]
header_row.cells[0].text = ''
header_row.cells[1].text = 'A'
header_row.cells[2].text = 'B'
# parse the raw script into something iterable
script_rows = parse_raw_script(raw_script)
# create a row for each timestamp row
for script_row in script_rows:
timestamp = script_row['timestamp']
content = script_row['content']
row = table.add_row()
timestamp_cell = row.cells[1]
timestamp_cell.text = timestamp
content_cell = row.cells[2]
content_paragraph = content_cell.paragraphs[0] # using the cell's default paragraph here instead of creating one
for line in content.splitlines():
run = content_paragraph.add_run(line)
if should_bold(line):
run.bold = True
run.add_break()
# resize table columns (optional)
for row in table.rows:
row.cells[0].width = Inches(0.2)
row.cells[1].width = Inches(1.9)
row.cells[2].width = Inches(3.9)
document.save(output_file_path)
def main():
script_dir = os.path.dirname(__file__)
dist_dir = os.path.join(script_dir, 'dist')
if not os.path.isdir(dist_dir):
os.makedirs(dist_dir)
output_file_path = os.path.join(dist_dir, 'so-template.docx')
raw_script = load_raw_script()
convert_raw_script_to_docx(raw_script, output_file_path)
if __name__ == '__main__':
main()
结果(文件应该在./dist/so-template.docx
):
顺便说一句 - 如果您更喜欢坚持使用自己的示例,则需要更改以下内容:
IS_BOLD = {
'a': True,
'b': False
}
row_cells = table.add_row().cells
for line in lines:
if is_timestamp(line):
if row_cells[1]:
row_cells = table.add_row().cells
row_cells[0].text = line
else:
run = row_cells[1].paragraphs[0].add_run(line)
if IS_BOLD[line.split(":")[0]]:
run.bold = True
run.add_break()