如何从 python 脚本中的单个 bibtex 输入字段获取可读的 unicode 字符串
How to get readable unicode string from single bibtex entry field in python script
假设您有一个包含 bibtex-formatted 个条目的 .bib 文件。我想从条目中提取“标题”字段,然后将其格式化为可读的 unicode 字符串。
例如,如果条目是:
@article{mypaper,
author = {myself},
title = {A very nice {title} with annoying {symbols} like {\^{a}}}
}
我要提取的是字符串:
A very nice title with annoying symbols like â
我目前正在尝试使用 pybtex
包,但我不知道该怎么做。 command-line 实用程序 pybtex-format
在转换完整的 .bib 文件方面做得很好,但我需要在脚本中和单个标题条目中执行此操作。
想通了:
def load_bib(filename):
from pybtex.database.input.bibtex import Parser
parser = Parser()
DB = parser.parse_file(filename)
return DB
def get_title(entry):
from pybtex.plugin import find_plugin
style = find_plugin('pybtex.style.formatting', 'plain')()
backend = find_plugin('pybtex.backends', 'plaintext')()
sentence = style.format_title(entry, 'title')
data = {'entry': entry,
'style': style,
'bib_data': None}
T = sentence.f(sentence.children, data)
title = T.render(backend)
return title
DB = load_bib("bibliography.bib")
print ( get_title(DB.entries["entry_label"]) )
其中 entry_label
必须与您在 Latex 中使用的标签匹配以引用参考书目条目。
基于 Daniele 的回答,我编写了这个函数,无需使用文件即可渲染字段。
from io import StringIO
from pybtex.database.input.bibtex import Parser
from pybtex.plugin import find_plugin
def render_fields(author="", title=""):
"""The arguments are in bibtex format. For example, they may contain
things like \'{i}. The output is a dictionary with these fields
rendered in plain text.
If you run tests by defining a string in Python, use r'''string''' to
avoid issues with escape characters.
"""
parser = Parser()
istr = r'''
@article{foo,
Author = {''' + author + r'''},
Title = {''' + title + '''},
}
'''
bib_data = parser.parse_stream(StringIO(istr))
style = find_plugin('pybtex.style.formatting', 'plain')()
backend = find_plugin('pybtex.backends', 'plaintext')()
entry = bib_data.entries["foo"]
data = {'entry': entry, 'style': style, 'bib_data': None}
sentence = style.format_author_or_editor(entry)
T = sentence.f(sentence.children, data)
rendered_author = T.render(backend)[0:-1] # exclude period
sentence = style.format_title(entry, 'title')
T = sentence.f(sentence.children, data)
rendered_title = T.render(backend)[0:-1] # exclude period
return {'title': rendered_title, 'author': rendered_author}
假设您有一个包含 bibtex-formatted 个条目的 .bib 文件。我想从条目中提取“标题”字段,然后将其格式化为可读的 unicode 字符串。
例如,如果条目是:
@article{mypaper,
author = {myself},
title = {A very nice {title} with annoying {symbols} like {\^{a}}}
}
我要提取的是字符串:
A very nice title with annoying symbols like â
我目前正在尝试使用 pybtex
包,但我不知道该怎么做。 command-line 实用程序 pybtex-format
在转换完整的 .bib 文件方面做得很好,但我需要在脚本中和单个标题条目中执行此操作。
想通了:
def load_bib(filename):
from pybtex.database.input.bibtex import Parser
parser = Parser()
DB = parser.parse_file(filename)
return DB
def get_title(entry):
from pybtex.plugin import find_plugin
style = find_plugin('pybtex.style.formatting', 'plain')()
backend = find_plugin('pybtex.backends', 'plaintext')()
sentence = style.format_title(entry, 'title')
data = {'entry': entry,
'style': style,
'bib_data': None}
T = sentence.f(sentence.children, data)
title = T.render(backend)
return title
DB = load_bib("bibliography.bib")
print ( get_title(DB.entries["entry_label"]) )
其中 entry_label
必须与您在 Latex 中使用的标签匹配以引用参考书目条目。
基于 Daniele 的回答,我编写了这个函数,无需使用文件即可渲染字段。
from io import StringIO
from pybtex.database.input.bibtex import Parser
from pybtex.plugin import find_plugin
def render_fields(author="", title=""):
"""The arguments are in bibtex format. For example, they may contain
things like \'{i}. The output is a dictionary with these fields
rendered in plain text.
If you run tests by defining a string in Python, use r'''string''' to
avoid issues with escape characters.
"""
parser = Parser()
istr = r'''
@article{foo,
Author = {''' + author + r'''},
Title = {''' + title + '''},
}
'''
bib_data = parser.parse_stream(StringIO(istr))
style = find_plugin('pybtex.style.formatting', 'plain')()
backend = find_plugin('pybtex.backends', 'plaintext')()
entry = bib_data.entries["foo"]
data = {'entry': entry, 'style': style, 'bib_data': None}
sentence = style.format_author_or_editor(entry)
T = sentence.f(sentence.children, data)
rendered_author = T.render(backend)[0:-1] # exclude period
sentence = style.format_title(entry, 'title')
T = sentence.f(sentence.children, data)
rendered_title = T.render(backend)[0:-1] # exclude period
return {'title': rendered_title, 'author': rendered_author}