python-docx 将粗体和非粗体字符串添加到 table 中的同一单元格

python-docx adding bold and non-bold strings to same cell in table

我正在使用 python-docx 创建一个文档,其中 table 我想从文本数据中填充。我的文本如下所示:

01:02:10.3 
a: Lorem ipsum dolor sit amet,  
b: consectetur adipiscing elit.
a: Mauris a turpis erat. 
01:02:20.4 
a: Vivamus dignissim aliquam
b: Nam ultricies
(etc.)

我需要像这样 table 组织它(使用 ASCII 进行可视化):

+---+--------------------+---------------------------------+
|   |         A          |                B                |
+---+--------------------+---------------------------------+
| 1 | 01:02:10.3         | a: Lorem ipsum dolor sit amet,  |
| 2 |                    | b: consectetur adipiscing elit. |
| 3 |                    | a: Mauris a turpis erat.        |
| 4 | ------------------ | ------------------------------- |
| 5 | 01:02:20.4         | a: Vivamus dignissim aliqua     |
| 6 |                    | b: Nam ultricies                |
+---+--------------------+---------------------------------+

但是,我需要将“a:”之后的所有内容都加粗,而“b:”之后的所有内容都不是,而它们都占据同一个单元格 .按照我想要的方式迭代和组织它非常容易,但我真的不确定如何只将某些行设为粗体:

IS_BOLD = { 
    'a': True
    'b': False
}

row_cells = table.add_row().cells

for line in lines: 
    if is_timestamp(line): # function that uses regex to discern between columns
        if row_cells[1]:
            row_cells = table.add_row().cells

        row_cells[0].text = line

    else 
        row_cells[1].text += line

        if IS_BOLD[ line.split(":")[0] ]:
            # make only this line within the cell bold, somehow.

(这是一种伪代码,我正在做一些更多的文本处理,但这在这里有点无关紧要)。我找到了一个 ,其中有人使用了一种叫做 run 的东西,但我发现很难理解如何将它应用到我的案例中。

有什么帮助吗? 谢谢。

您需要在单元格的段落中添加 run。这样您就可以控制要加粗的特定文本

完整示例:

from docx import Document
from docx.shared import Inches
import os
import re


def is_timestamp(line):
    # it's flaky, I saw you have your own method and probably you did a better job parsing this.
    return re.match(r'^\d{2}:\d{2}:\d{2}', line) is not None


def parse_raw_script(raw_script):
    current_timestamp = ''
    current_content = ''
    for line in raw_script.splitlines():
        line = line.strip()
        if is_timestamp(line):
            if current_timestamp:
                yield {
                    'timestamp': current_timestamp,
                    'content': current_content
                }

            current_timestamp = line
            current_content = ''
            continue

        if current_content:
            current_content += '\n'

        current_content += line

    if current_timestamp:
        yield {
            'timestamp': current_timestamp,
            'content': current_content
        }


def should_bold(line):
    # i leave it to you to replace with your logic
    return line.startswith('a:')


def load_raw_script():
    # I placed here the example from your question. read from file instead I presume

    return '''01:02:10.3 
a: Lorem ipsum dolor sit amet,  
b: consectetur adipiscing elit.
a: Mauris a turpis erat. 
01:02:20.4 
a: Vivamus dignissim aliquam
b: Nam ultricies'''


def convert_raw_script_to_docx(raw_script, output_file_path):
    document = Document()
    table = document.add_table(rows=1, cols=3, style="Table Grid")

    # add header row
    header_row = table.rows[0]
    header_row.cells[0].text = ''
    header_row.cells[1].text = 'A'
    header_row.cells[2].text = 'B'

    # parse the raw script into something iterable
    script_rows = parse_raw_script(raw_script)

    # create a row for each timestamp row
    for script_row in script_rows:
        timestamp = script_row['timestamp']
        content = script_row['content']

        row = table.add_row()
        timestamp_cell = row.cells[1]
        timestamp_cell.text = timestamp

        content_cell = row.cells[2]
        content_paragraph = content_cell.paragraphs[0]  # using the cell's default paragraph here instead of creating one

        for line in content.splitlines():
            run = content_paragraph.add_run(line)
            if should_bold(line):
                run.bold = True

            run.add_break()

    # resize table columns (optional)
    for row in table.rows:
        row.cells[0].width = Inches(0.2)
        row.cells[1].width = Inches(1.9)
        row.cells[2].width = Inches(3.9)

    document.save(output_file_path)


def main():
    script_dir = os.path.dirname(__file__)
    dist_dir = os.path.join(script_dir, 'dist')

    if not os.path.isdir(dist_dir):
        os.makedirs(dist_dir)

    output_file_path = os.path.join(dist_dir, 'so-template.docx')
    raw_script = load_raw_script()
    convert_raw_script_to_docx(raw_script, output_file_path)


if __name__ == '__main__':
    main()

结果(文件应该在./dist/so-template.docx):


顺便说一句 - 如果您更喜欢坚持使用自己的示例,则需要更改以下内容:

IS_BOLD = {
    'a': True,
    'b': False
}

row_cells = table.add_row().cells

for line in lines:
    if is_timestamp(line):
        if row_cells[1]:
            row_cells = table.add_row().cells
        row_cells[0].text = line

    else:
        run = row_cells[1].paragraphs[0].add_run(line)
        if IS_BOLD[line.split(":")[0]]:
            run.bold = True

        run.add_break()