如何使用此 CFG 随机生成字符串？

Question

我有这段描述上下文无关语法的代码，我正在尝试生成匹配它的随机字符串；例如，像这样：

"John thinks that Mary hates every green cat"

但我当前的输出是：

[['_S', ['_NP _VP']], ['_NP', ['_Det _Adj _N', '_Det _N', '_Adj _PropN', '_PropN']], ['_VP', ['_Vi', '_Vt _NP', '_Vc _Comp _S']]]
[['_Det', ['the', 'a', 'some', 'any', 'every']], ['_Adj', ['green', 'young', 'tired', 'confused']], ['_N', ['dog', 'cat']], ['_PropN', ['John', 'Mary']], ['_Vi', ['sleeps', 'walks']], ['_Vt', ['loves', 'hates']], ['_Vc', ['says', 'thinks', 'believes']], ['_Comp', ['that']]]

请帮忙！

import random


psg_rules_str = "S → NP VP\n" \
                "NP → Det Adj N | Det N | Adj PropN | PropN\n" \
                "VP → Vi | Vt NP | Vc Comp S"

terminals_str = "Det → the | a | some | any | every\n" \
                "Adj → green | young | tired | confused\n" \
                "N → dog | cat\n" \
                "PropN → John | Mary\n" \
                "Vi → sleeps | walks\n" \
                "Vt → loves | hates\n" \
                "Vc → says | thinks | believes\n" \
                "Comp → that"

psg_rules_list = [a.split("→") for a in psg_rules_str.split("\n")]
for p in psg_rules_list:
    p[0] = "_" + p[0].strip()
    p[1] = p[1].split("|")
    p[1] = ["_" + a.strip().replace(" ", " _") for a in p[1]]
print(psg_rules_list)
# [['_S', ['_NP _VP']], ['_NP', ['_Det _Adj _N', '_Det _N', '_Adj _PropN', '_PropN']], ['_VP', ['_Vi', '_Vt _NP', '_Vc _Comp _S']]]

terminals_list = [a.split("→") for a in terminals_str.split("\n")]
for t in terminals_list:
    t[0] = "_" + t[0].strip()
    t[1] = t[1].split("|")
    t[1] = [a.strip() for a in t[1]]
print(terminals_list)
# [['_Det', ['the', 'a', 'some', 'any', 'every']], ['_Adj', ['green', 'young', 'tired', 'confused']], ['_N', ['dog', 'cat']], ['_PropN', ['John', 'Mary']], ['_Vi', ['sleeps', 'walks']], ['_Vt', ['loves', 'hates']], ['_Vc', ['says', 'thinks', 'believes']], ['_Comp', ['that']]]

def reachTerminals(from_nts, with_rules, with_ts):
    from_nts = str.upper("_" + from_nts.replace("_", "").strip().replace(" ", " _"))
    rule_tags = [a[0] for a in with_rules]
    ts_tags = [a[0] for a in with_ts]
    nts_todo = [a for a in rule_tags if a in from_nts]
    while nts_todo != list():
        tag = nts_todo[0]
        wr_index = rule_tags.index(tag)
        repl_choices = with_rules[wr_index][1]

        nts_todo = [a for a in rule_tags if a in from_nts]


sentence = reachTerminals(from_nts="s", with_rules=psg_rules_list, with_ts=terminals_list)

Answer 1

您的程序即将运行。这是完成 reachTerminals 功能的方法：

import random

psg_rules_str = "S → NP VP\n" \
                "NP → Det Adj N | Det N | Adj PropN | PropN\n" \
                "VP → Vi | Vt NP | Vc Comp S"

terminals_str = "Det → the | a | some | any | every\n" \
                "Adj → green | young | tired | confused\n" \
                "N → dog | cat\n" \
                "PropN → John | Mary\n" \
                "Vi → sleeps | walks\n" \
                "Vt → loves | hates\n" \
                "Vc → says | thinks | believes\n" \
                "Comp → that"

psg_rules_list = [a.split("→") for a in psg_rules_str.split("\n")]
for p in psg_rules_list:
    p[0] = "_" + p[0].strip()
    p[1] = p[1].split("|")
    p[1] = ["_" + a.strip().replace(" ", " _") for a in p[1]]

terminals_list = [a.split("→") for a in terminals_str.split("\n")]
for t in terminals_list:
    t[0] = "_" + t[0].strip()
    t[1] = t[1].split("|")
    t[1] = [a.strip() for a in t[1]]

def reachTerminals(from_nts, with_rules, with_ts):
    from_nts = str.upper("_" + from_nts.replace("_", "").strip().replace(" ", " _"))
    rule_tags = [a[0] for a in with_rules]
    ts_tags = [a[0] for a in with_ts]
    nts_todo = [a for a in rule_tags if a in from_nts]
    while nts_todo:
        for tag in nts_todo:
            wr_index = rule_tags.index(tag)
            repl_choices = with_rules[wr_index][1]

            choice = random.choice(repl_choices)
            from_nts = from_nts.replace(tag, choice, 1)
        nts_todo = [a for a in rule_tags if a in from_nts]

    ts_todo = [a for a in ts_tags if a in from_nts]
    while ts_todo:
        for tag in ts_todo:
            wr_index = ts_tags.index(tag)
            repl_choices = with_ts[wr_index][1]

            choice = random.choice(repl_choices)
            from_nts = from_nts.replace(tag, choice, 1)
        ts_todo = [a for a in ts_tags if a in from_nts]

    return from_nts


print(reachTerminals(from_nts = "s", with_rules = psg_rules_list, with_ts = terminals_list))

您可以使用的重要工具是 random.choice function and the str.replace 函数的第三个参数，它可以让您仅替换第一次出现的子字符串。我还没有彻底测试代码，但它似乎按预期工作。示例输出：

green John loves some confused dog

Mary says that the tired dog says that some green cat hates some cat

every green dog loves young John

John loves the tired cat

如何使用此 CFG 随机生成字符串？

How can I randomly generate strings with this CFG?

python

context-free-grammar