在 Python 中以编程方式将 reStructuredText 转换为纯文本

Convert reStructuredText to plain text programmatically in Python

假设我在字符串中有一些 reStructuredText 源

source = """
============
Introduction
============

Hello world.

.. code-block:: bash

    $ echo Greetings.


"""

import sys

import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import sphinx.writers.text
import sphinx.builders.text

def parse_rst(text: str) -> docutils.nodes.document:
    parser = docutils.parsers.rst.Parser()
    components = (docutils.parsers.rst.Parser,)
    settings = docutils.frontend.OptionParser(components=components).get_default_values()
    document = docutils.utils.new_document('<rst-doc>', settings=settings)
    parser.parse(text, document)
    return document

if __name__ == '__main__':        
    document = parse_rst(source)

我想使用 Python 将其转换为没有 reST 标记的纯文本。

我尝试使用 sphinx.builders.text.TextBuilder,但它似乎需要一个 App 对象,而不是字符串。


此代码有效。它有一些技巧,比如设置一个假的配置目录,也许有更好的方法。

import sys
import textwrap
import types

import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import sphinx.writers.text
import sphinx.builders.text
import sphinx.util.osutil


def parse_rst(text: str) -> docutils.nodes.document:
    parser = docutils.parsers.rst.Parser()
    components = (docutils.parsers.rst.Parser,)
    settings = docutils.frontend.OptionParser(
        components=components
    ).get_default_values()
    document = docutils.utils.new_document("<rst-doc>", settings=settings)
    parser.parse(text, document)
    return document


if __name__ == "__main__":
    source = textwrap.dedent(
        """\
    ============
    Introduction
    ============

    Hello world.

    .. code-block:: bash

        $ echo Greetings.


    """
    )

    document = parse_rst(source)

    app = types.SimpleNamespace(
        srcdir=None,
        confdir=None,
        outdir=None,
        doctreedir="/",
        config=types.SimpleNamespace(
            text_newlines="native",
            text_sectionchars="=",
            text_add_secnumbers=False,
            text_secnumber_suffix=".",
        ),
        tags=set(),
        registry=types.SimpleNamespace(
            create_translator=lambda self, something, new_builder: sphinx.writers.text.TextTranslator(
                document, new_builder
            )
        ),
    )

    builder = sphinx.builders.text.TextBuilder(app)

    translator = sphinx.writers.text.TextTranslator(document, builder)

    document.walkabout(translator)

    print(translator.body)

输出:

    Introduction
    ============

    Hello world.

       $ echo Greetings.

Sphinx 带有 TextBuilder。从命令行:

make text