从 arXiv ID 到 BibTeX 条目
from arXiv ID to BibTeX entry
我如何编写一个健壮的 Perl 脚本来为 arXiv ID 生成 BibTeX 条目?
我的猜测是我应该使用 arXiv API and parse its response with XML::Atom。它应该给我构建 BibTeX 条目所需的信息。
以下是我要开始的方式:
use LWP::UserAgent;
use Text::BibTeX::Entry;
use XML::Atom;
my $arxivid = "hep-ph/9609357";
my $url = "http://export.arxiv.org/api/query?search_query=" . $arxivid . "&start=0&max_results=1";
my $browser = LWP::UserAgent->new();
my $response = $browser->get($url);
my $entry = Text::BibTeX::Entry->new();
也欢迎不使用 arXiv API 或 XML::Atom 的回答。
这里是使用 XML::Twig
解析下载的 XML 文件的起点:
use feature qw(say);
use strict;
use warnings;
use LWP::UserAgent;
use Text::BibTeX;
use Text::BibTeX::Entry;
use XML::Twig;
use DateTime::Format::Strptime;
{
my $arxivid = "hep-ph/9609357";
my $url = "http://export.arxiv.org/api/query?search_query=" . $arxivid . "&start=0&max_results=1";
my $browser = LWP::UserAgent->new();
my $response = $browser->get($url);
my $xml = $response->content;
my $twig = XML::Twig->new->parse( $xml );
my $title = $twig->get_xpath ( '//entry/title',0 )->text;
my @authors;
for my $node ( $twig->findnodes( '//entry/author/name' )) {
push @authors, $node->text;
}
my $doi = $twig->get_xpath ( '//entry/link[@title="doi"]',0 )->att('href');
my $published = $twig->get_xpath ( '//entry/published',0 )->text;
my ( $year, $month) = parse_published( $published) ;
my $entry = Text::BibTeX::Entry->new();
$entry->set_metatype(BTE_REGULAR);
$entry->set_type('article');
$entry->set_key('article1');
$entry->set( 'title', $title );
$entry->set( 'author', join ' and ', @authors );
$entry->set( 'year', $year );
$entry->set( 'month', $month );
$entry->set( 'doi', $doi );
$entry->print(\*STDOUT);
}
sub parse_published {
my ( $published) = @_;
my $parser = DateTime::Format::Strptime->new(
pattern => '%FT%T%Z',
time_zone => 'UTC',
on_error => 'croak',
);
my $dt = $parser->parse_datetime($published);
return ( $dt->year, $dt->month_name);
}
输出:
@article{article1,
title = {Mixing-induced CP Asymmetries in Inclusive $B$ Decays},
author = {Martin Beneke and Gerhard Buchalla and Isard Dunietz},
year = {1996},
month = {September},
doi = {http://dx.doi.org/10.1016/S0370-2693(96)01648-6},
}
我如何编写一个健壮的 Perl 脚本来为 arXiv ID 生成 BibTeX 条目?
我的猜测是我应该使用 arXiv API and parse its response with XML::Atom。它应该给我构建 BibTeX 条目所需的信息。
以下是我要开始的方式:
use LWP::UserAgent;
use Text::BibTeX::Entry;
use XML::Atom;
my $arxivid = "hep-ph/9609357";
my $url = "http://export.arxiv.org/api/query?search_query=" . $arxivid . "&start=0&max_results=1";
my $browser = LWP::UserAgent->new();
my $response = $browser->get($url);
my $entry = Text::BibTeX::Entry->new();
也欢迎不使用 arXiv API 或 XML::Atom 的回答。
这里是使用 XML::Twig
解析下载的 XML 文件的起点:
use feature qw(say);
use strict;
use warnings;
use LWP::UserAgent;
use Text::BibTeX;
use Text::BibTeX::Entry;
use XML::Twig;
use DateTime::Format::Strptime;
{
my $arxivid = "hep-ph/9609357";
my $url = "http://export.arxiv.org/api/query?search_query=" . $arxivid . "&start=0&max_results=1";
my $browser = LWP::UserAgent->new();
my $response = $browser->get($url);
my $xml = $response->content;
my $twig = XML::Twig->new->parse( $xml );
my $title = $twig->get_xpath ( '//entry/title',0 )->text;
my @authors;
for my $node ( $twig->findnodes( '//entry/author/name' )) {
push @authors, $node->text;
}
my $doi = $twig->get_xpath ( '//entry/link[@title="doi"]',0 )->att('href');
my $published = $twig->get_xpath ( '//entry/published',0 )->text;
my ( $year, $month) = parse_published( $published) ;
my $entry = Text::BibTeX::Entry->new();
$entry->set_metatype(BTE_REGULAR);
$entry->set_type('article');
$entry->set_key('article1');
$entry->set( 'title', $title );
$entry->set( 'author', join ' and ', @authors );
$entry->set( 'year', $year );
$entry->set( 'month', $month );
$entry->set( 'doi', $doi );
$entry->print(\*STDOUT);
}
sub parse_published {
my ( $published) = @_;
my $parser = DateTime::Format::Strptime->new(
pattern => '%FT%T%Z',
time_zone => 'UTC',
on_error => 'croak',
);
my $dt = $parser->parse_datetime($published);
return ( $dt->year, $dt->month_name);
}
输出:
@article{article1,
title = {Mixing-induced CP Asymmetries in Inclusive $B$ Decays},
author = {Martin Beneke and Gerhard Buchalla and Isard Dunietz},
year = {1996},
month = {September},
doi = {http://dx.doi.org/10.1016/S0370-2693(96)01648-6},
}