Python - 使用递归函数按字母顺序打印出一个 trie
Python - printing out a trie in alphabetically sorted order with a recursive function
我正在研究 Bird、Klein 和 Loper 的 NLTK book,但遇到了一个问题。我读这本书是为了充实自己,而不是为了 class.
我遇到的问题是 4.29:
编写一个递归函数,按字母排序的顺序漂亮地打印一个 trie,例如:
chair: 'flesh'
---t: 'cat'
--ic: 'stylish'
---en: 'dog'
我正在使用书中的这段代码来创建 trie:
def insert(trie, key, value):
if key:
first, rest = key[0], key[1:]
if first not in trie:
trie[first] = {}
insert(trie[first], rest, value)
else:
trie['value'] = value
trie = {}
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
我修改了 的一个答案,用于递归遍历 trie 并提取完整的键和值的函数:
def trawl_trie(trie):
unsorted = []
for key, value in trie.items():
if 'value' not in key:
for item in trawl_trie(trie[key]):
unsorted.append(key + item)
else:
unsorted.append(': ' + value)
return unsorted
但是我无法使用递归来制作按字母顺序排列的列表,也无法弄清楚如何使用递归来替换键的重复部分。我能做的最好的事情就是创建一个遍历上述函数结果的辅助函数:
def print_trie(trie):
# sort list alphabetically
alphabetized = list(sorted(set(trawl_trie(trie))))
print(alphabetized[0])
# compare the 2nd item ~ to the previous one in the list.
for k in range(1, len(alphabetized)):
# separate words from value
prev_w, prev_d = (re.findall(r'(\w+):', alphabetized[k - 1]), re.findall(r': (\w+)', alphabetized[k - 1]))
curr_w, curr_d = (re.findall(r'(\w+):', alphabetized[k]), re.findall(r': (\w+)', alphabetized[k]))
word = ''
# find parts that match and replace them with dashes
for i in range(min(len(prev_w[0]), len(curr_w[0]))):
if prev_w[0][i] == curr_w[0][i]:
word += prev_w[0][i]
curr_w[0] = re.sub(word, '-' * len(word), curr_w[0])
print(curr_w[0] + ": " + str(curr_d[0]))
这将是输出:
print_trie(trie)
chair: flesh
---t: cat
--ic: stylish
---en: dog
有谁知道是否可以用一个递归函数得到相同的结果?或者我被困在使用递归函数遍历 trie 和第二个辅助函数来使一切看起来不错?
干杯,
- MC
def insert(trie, key, value):
"""Insert into Trie"""
if key:
first, rest = key[0], key[1:]
if first not in trie:
trie[first] = {}
insert(trie[first], rest, value)
else:
trie['value'] = value
def display(trie, s = ""):
"""Recursive function to Display Trie entries in alphabetical order"""
first = True
for k, v in sorted(trie.items(), key = lambda x: x[0]):
# dictionary sorted based upon the keys
if isinstance(v, dict):
if first:
prefix = s + k # first to show common prefix
first = False
else:
prefix = '-'*len(s) + k # dashes for common prefix
display(v, prefix) # s+k is extending string s for display by appending current key k
else:
print(s, ":", v) # not a dictionary, so print current # not a dictionary, so print current string s and value
# Create Trie
trie = {}
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
#Display Use Recursive function (second argument will default to "" on call)
display(trie)
输出
chair : flesh
---t : cat
--ic : stylish
---en : dog
我修改了 DarrylG 的回答(再次感谢!)添加了一个通过递归传递的接受密钥列表和一对迭代的 for
循环通过这个列表,看看字符串开头有没有共同的元素可以替换。
编辑 2019 年 11 月 2 日:此修改有一个我没有注意到的错误。第一次它会 运行 正确,但在随后的 运行 中它会替换太多的字符,如破折号,即:
----r: flesh
---t: cat
---c: stylish
----n: dog
def insert(trie, key, value):
"""Insert into Trie"""
if key:
first, rest = key[0], key[1:]
if first not in trie:
trie[first] = {}
insert(trie[first], rest, value)
else:
trie['value'] = value
def display(trie, s = "", final = []):
"""Recursive function to Display Trie entries in alphabetical order"""
for k, v in sorted(trie.items(), key = lambda x: x[0]):
# dictionary sorted based upon the keys
if isinstance(v, dict):
display(v, s + k, final) # s+k is extending string s for display by appending current key k
else:
# replace common elements at beginning of strings with dashes
i = sum([any([f.startswith(string[:j]) for f in final]) for j in range(1, len(string))])
string = '-' * i + s[i:]
print(string + ":", v) # not a dictionary, so print current edited s and value
final.append(s)
# Create Trie
trie = {}
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
#Display Use Recursive function (second argument will default to "" on call)
display(trie)
我正在研究 Bird、Klein 和 Loper 的 NLTK book,但遇到了一个问题。我读这本书是为了充实自己,而不是为了 class.
我遇到的问题是 4.29:
编写一个递归函数,按字母排序的顺序漂亮地打印一个 trie,例如:
chair: 'flesh'
---t: 'cat'
--ic: 'stylish'
---en: 'dog'
我正在使用书中的这段代码来创建 trie:
def insert(trie, key, value):
if key:
first, rest = key[0], key[1:]
if first not in trie:
trie[first] = {}
insert(trie[first], rest, value)
else:
trie['value'] = value
trie = {}
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
我修改了
def trawl_trie(trie):
unsorted = []
for key, value in trie.items():
if 'value' not in key:
for item in trawl_trie(trie[key]):
unsorted.append(key + item)
else:
unsorted.append(': ' + value)
return unsorted
但是我无法使用递归来制作按字母顺序排列的列表,也无法弄清楚如何使用递归来替换键的重复部分。我能做的最好的事情就是创建一个遍历上述函数结果的辅助函数:
def print_trie(trie):
# sort list alphabetically
alphabetized = list(sorted(set(trawl_trie(trie))))
print(alphabetized[0])
# compare the 2nd item ~ to the previous one in the list.
for k in range(1, len(alphabetized)):
# separate words from value
prev_w, prev_d = (re.findall(r'(\w+):', alphabetized[k - 1]), re.findall(r': (\w+)', alphabetized[k - 1]))
curr_w, curr_d = (re.findall(r'(\w+):', alphabetized[k]), re.findall(r': (\w+)', alphabetized[k]))
word = ''
# find parts that match and replace them with dashes
for i in range(min(len(prev_w[0]), len(curr_w[0]))):
if prev_w[0][i] == curr_w[0][i]:
word += prev_w[0][i]
curr_w[0] = re.sub(word, '-' * len(word), curr_w[0])
print(curr_w[0] + ": " + str(curr_d[0]))
这将是输出:
print_trie(trie)
chair: flesh
---t: cat
--ic: stylish
---en: dog
有谁知道是否可以用一个递归函数得到相同的结果?或者我被困在使用递归函数遍历 trie 和第二个辅助函数来使一切看起来不错?
干杯,
- MC
def insert(trie, key, value):
"""Insert into Trie"""
if key:
first, rest = key[0], key[1:]
if first not in trie:
trie[first] = {}
insert(trie[first], rest, value)
else:
trie['value'] = value
def display(trie, s = ""):
"""Recursive function to Display Trie entries in alphabetical order"""
first = True
for k, v in sorted(trie.items(), key = lambda x: x[0]):
# dictionary sorted based upon the keys
if isinstance(v, dict):
if first:
prefix = s + k # first to show common prefix
first = False
else:
prefix = '-'*len(s) + k # dashes for common prefix
display(v, prefix) # s+k is extending string s for display by appending current key k
else:
print(s, ":", v) # not a dictionary, so print current # not a dictionary, so print current string s and value
# Create Trie
trie = {}
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
#Display Use Recursive function (second argument will default to "" on call)
display(trie)
输出
chair : flesh
---t : cat
--ic : stylish
---en : dog
我修改了 DarrylG 的回答(再次感谢!)添加了一个通过递归传递的接受密钥列表和一对迭代的 for
循环通过这个列表,看看字符串开头有没有共同的元素可以替换。
编辑 2019 年 11 月 2 日:此修改有一个我没有注意到的错误。第一次它会 运行 正确,但在随后的 运行 中它会替换太多的字符,如破折号,即:
----r: flesh
---t: cat
---c: stylish
----n: dog
def insert(trie, key, value):
"""Insert into Trie"""
if key:
first, rest = key[0], key[1:]
if first not in trie:
trie[first] = {}
insert(trie[first], rest, value)
else:
trie['value'] = value
def display(trie, s = "", final = []):
"""Recursive function to Display Trie entries in alphabetical order"""
for k, v in sorted(trie.items(), key = lambda x: x[0]):
# dictionary sorted based upon the keys
if isinstance(v, dict):
display(v, s + k, final) # s+k is extending string s for display by appending current key k
else:
# replace common elements at beginning of strings with dashes
i = sum([any([f.startswith(string[:j]) for f in final]) for j in range(1, len(string))])
string = '-' * i + s[i:]
print(string + ":", v) # not a dictionary, so print current edited s and value
final.append(s)
# Create Trie
trie = {}
insert(trie, 'chat', 'cat')
insert(trie, 'chien', 'dog')
insert(trie, 'chair', 'flesh')
insert(trie, 'chic', 'stylish')
#Display Use Recursive function (second argument will default to "" on call)
display(trie)