python-docx 将粗体和非粗体字符串添加到 table 中的同一单元格

python-docx adding bold and non-bold strings to same cell in table

我正在使用 python-docx 创建一个文档,其中 table 我想从文本数据中填充。我的文本如下所示:

a: Lorem ipsum dolor sit amet,  
b: consectetur adipiscing elit.
a: Mauris a turpis erat. 
a: Vivamus dignissim aliquam
b: Nam ultricies

我需要像这样 table 组织它(使用 ASCII 进行可视化):

|   |         A          |                B                |
| 1 | 01:02:10.3         | a: Lorem ipsum dolor sit amet,  |
| 2 |                    | b: consectetur adipiscing elit. |
| 3 |                    | a: Mauris a turpis erat.        |
| 4 | ------------------ | ------------------------------- |
| 5 | 01:02:20.4         | a: Vivamus dignissim aliqua     |
| 6 |                    | b: Nam ultricies                |

但是,我需要将“a:”之后的所有内容都加粗,而“b:”之后的所有内容都不是,而它们都占据同一个单元格 .按照我想要的方式迭代和组织它非常容易,但我真的不确定如何只将某些行设为粗体:

IS_BOLD = { 
    'a': True
    'b': False

row_cells = table.add_row().cells

for line in lines: 
    if is_timestamp(line): # function that uses regex to discern between columns
        if row_cells[1]:
            row_cells = table.add_row().cells

        row_cells[0].text = line

        row_cells[1].text += line

        if IS_BOLD[ line.split(":")[0] ]:
            # make only this line within the cell bold, somehow.

(这是一种伪代码,我正在做一些更多的文本处理,但这在这里有点无关紧要)。我找到了一个 ,其中有人使用了一种叫做 run 的东西,但我发现很难理解如何将它应用到我的案例中。

有什么帮助吗? 谢谢。

您需要在单元格的段落中添加 run。这样您就可以控制要加粗的特定文本


from docx import Document
from docx.shared import Inches
import os
import re

def is_timestamp(line):
    # it's flaky, I saw you have your own method and probably you did a better job parsing this.
    return re.match(r'^\d{2}:\d{2}:\d{2}', line) is not None

def parse_raw_script(raw_script):
    current_timestamp = ''
    current_content = ''
    for line in raw_script.splitlines():
        line = line.strip()
        if is_timestamp(line):
            if current_timestamp:
                yield {
                    'timestamp': current_timestamp,
                    'content': current_content

            current_timestamp = line
            current_content = ''

        if current_content:
            current_content += '\n'

        current_content += line

    if current_timestamp:
        yield {
            'timestamp': current_timestamp,
            'content': current_content

def should_bold(line):
    # i leave it to you to replace with your logic
    return line.startswith('a:')

def load_raw_script():
    # I placed here the example from your question. read from file instead I presume

    return '''01:02:10.3 
a: Lorem ipsum dolor sit amet,  
b: consectetur adipiscing elit.
a: Mauris a turpis erat. 
a: Vivamus dignissim aliquam
b: Nam ultricies'''

def convert_raw_script_to_docx(raw_script, output_file_path):
    document = Document()
    table = document.add_table(rows=1, cols=3, style="Table Grid")

    # add header row
    header_row = table.rows[0]
    header_row.cells[0].text = ''
    header_row.cells[1].text = 'A'
    header_row.cells[2].text = 'B'

    # parse the raw script into something iterable
    script_rows = parse_raw_script(raw_script)

    # create a row for each timestamp row
    for script_row in script_rows:
        timestamp = script_row['timestamp']
        content = script_row['content']

        row = table.add_row()
        timestamp_cell = row.cells[1]
        timestamp_cell.text = timestamp

        content_cell = row.cells[2]
        content_paragraph = content_cell.paragraphs[0]  # using the cell's default paragraph here instead of creating one

        for line in content.splitlines():
            run = content_paragraph.add_run(line)
            if should_bold(line):
                run.bold = True


    # resize table columns (optional)
    for row in table.rows:
        row.cells[0].width = Inches(0.2)
        row.cells[1].width = Inches(1.9)
        row.cells[2].width = Inches(3.9)

def main():
    script_dir = os.path.dirname(__file__)
    dist_dir = os.path.join(script_dir, 'dist')

    if not os.path.isdir(dist_dir):

    output_file_path = os.path.join(dist_dir, 'so-template.docx')
    raw_script = load_raw_script()
    convert_raw_script_to_docx(raw_script, output_file_path)

if __name__ == '__main__':


顺便说一句 - 如果您更喜欢坚持使用自己的示例,则需要更改以下内容:

    'a': True,
    'b': False

row_cells = table.add_row().cells

for line in lines:
    if is_timestamp(line):
        if row_cells[1]:
            row_cells = table.add_row().cells
        row_cells[0].text = line

        run = row_cells[1].paragraphs[0].add_run(line)
        if IS_BOLD[line.split(":")[0]]:
            run.bold = True
