未从 HTML::TreeBuilder 获得输出
Not getting output from HTML::TreeBuilder
我正在尝试从大约 3,000 个 HTML 文件中获取一大堆值并将它们保存到电子表格中。
我正在使用
HTML::TreeBuilder
处理 HTML 并使用创建电子表格
Spreadsheet::WriteExcel
.
但是我的脚本没有成功获取值。我明白了
Use of uninitialized value $val in concatenation (.) or string at spreadsheet.pl line 63.
我可能做错了什么?
这是 my HTML files on pastebin.com
的示例。问题太大 post。
我的 Perl 代码
use warnings 'all';
use strict;
use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
use constant URL => 'http://pastebin.com/raw/qLwu80ZW';
my $teamNumber = "";
my $teamName = "";
my $schoolName = "";
my $area = "";
my $district = "";
my $agDeptPhone = "";
my $schoolPhone = "";
my $fax = "";
my $addressOne = "";
my $addressTwo = "";
my $city = "";
my $state = "";
my $zipCode = "";
my $name = "";
my $email = "";
my $row = "";
my $Ypos = 0;
my $path = "Z:\_WEB_CLIENTS\Morgan Livestock\Judging Card";
my $workbook = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();
sub getTeamNumber {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_TeamNumber/)->attr('value');
}
print "Got Team Number $val\n";
return $val;
}
sub getTeamName {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_TeamName/)->attr('value');
}
print "Got Team Name $val\n";
return $val;
}
sub getSchoolName {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(tag_ => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_SchoolName/)->attr('value');
}
print "Got School Name $val\n";
return $val;
}
sub getArea{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(tag_ => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Area/)->attr('value');
}
print "Got Area $val\n";
return $val;
}
sub getDistrict{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_District/)->attr('value');
}
print "Got District $val\n";
return $val;
}
sub getDeptPhone {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Phone/)->attr('value');
}
print "Got Dept Phone $val\n";
return $val;
}
sub getSchoolPhone{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Phone2/)->attr('value');
}
print "Got School Phone $val\n";
return $val;
}
sub getFax{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Fax/)->attr('value');
}
print "Got Fax $val\n";
return $val;
}
sub getAddress1 {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Address1/)->attr('value');
}
print "Got Address One $val\n";
return $val;
}
sub getAddress2 {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Address2/)->attr('value');
}
print "Got Address Two $val\n";
return $val;
}
sub getCity {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_City/)->attr('value');
}
print "Got Address Two $val\n";
return $val;
}
sub getState {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_State/)->attr('value');
}
print "Got State $val\n";
return $val;
}
sub getZip {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Zip/)->attr('value');
}
print "Got Zip $val\n";
return $val;
}
sub getWebsite {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Website/)->attr('value');
}
print "Got Website $val\n";
return $val;
}
sub getNameAndEmail {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content(get URL);
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
my ($name, $email) = map { $_->as_trimmed_text } @td[0,1];
}
print "Got Name and Email $name and $email\n";
return ($name, $email);
}
# FILLER: This fills the spreadsheet with all the variables we've acquired
sub fill {
my ($name, $email, $teamNumber, $teamName, $schoolName,
$area, $district, $agDeptPhone, $schoolPhone,
$fax, $addressOne, $addressTwo, $city, $state, $zipCode) = (@_);
$worksheet->write($Ypos, 1, $name);
$worksheet->write($Ypos, 2, $email);
$worksheet->write($Ypos, 3, $teamNumber);
$worksheet->write($Ypos, 4, $teamName);
$worksheet->write($Ypos, 5, $schoolName);
$worksheet->write($Ypos, 6, $area);
$worksheet->write($Ypos, 7, $district);
$worksheet->write($Ypos, 8, $agDeptPhone);
$worksheet->write($Ypos, 9, $schoolPhone);
$worksheet->write($Ypos, 10, $fax);
$worksheet->write($Ypos, 11, $addressOne);
$worksheet->write($Ypos, 12, $addressTwo);
$worksheet->write($Ypos, 13, $city);
$worksheet->write($Ypos, 14, $state);
$worksheet->write($Ypos, 15, $zipCode);
}
# Open judgingcard directory
opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);
# This fills out all top row info
$worksheet->write("A1", "Name");
$worksheet->write("B1", "Email");
$worksheet->write("C1", "Team Number");
$worksheet->write("D1", "Team Name");
$worksheet->write("E1", "School Name");
$worksheet->write("F1", "Area");
$worksheet->write("G1", "District");
$worksheet->write("H1", "Ag Dept Phone");
$worksheet->write("I1", "School Phone");
$worksheet->write("J1", "Fax");
$worksheet->write("K1", "Address One");
$worksheet->write("L1", "Address Two");
$worksheet->write("M1", "City");
$worksheet->write("N1", "State");
$worksheet->write("O1", "Zip Code");
###################################
foreach my $file (@files) { # run through all files in directory
next if (-d $file); # Skip file if file is folder
$Ypos = $Ypos + 1;
my ($name1, $email1) = getNameAndEmail($file);
$name = $name1;
$email = $email1;
$teamNumber = getTeamNumber($file);
$teamName = getTeamName($file);
$schoolName = getSchoolName($file);
$area = getArea($file);
$district = getDistrict($file);
$agDeptPhone = getDeptPhone($file);
$schoolPhone = getSchoolPhone($file);
$fax = getFax($file);
$addressOne = getAddress1($file);
$addressTwo = getAddress2($file);
$city = getCity($file);
$state = getState($file);
$zipCode = getZip($file);
fill($name, $email, $teamNumber, $teamName, $schoolName,
$area, $district, $agDeptPhone, $schoolPhone, $fax,
$addressOne, $addressTwo, $city, $state, $zipCode);
print "Progressing $file ($Ypos)\n"
}
closedir(DIR);
sub getTeamNumber {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $name;
my $val;
foreach my $node (@nodes) {
$name = $node->look_down('name', qr/$txt_TeamNumber/);
}
if ( ! defined $name ) {
print "Couldn't get team number\n";
}
if ( $name ) {
$val = $name->attr('value');
print "Got Team number $val\n";
}
return $val;
}
新脚本:
use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
my $path = "Z:\_WEB_CLIENTS\Morgan Livestock\Judging Card";
my $workbook = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();
opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);
# Specify spreadsheet headers in desired order and write to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One'
, 'Address Two', 'City', 'State', 'Zip Code');
$worksheet->write_row(0, 0, \@headers); # first row
# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email},
TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, SchoolName => $ho{'School Name'}, Area => $ho{'Area'}, District => $ho{'District'},
AgDeptPhone => $ho{'Ag Dept Phone'}, SchoolPhone => $ho{'School Phone'}, Fax => $ho{'Fax'}, AddressOne => $ho{'Address One'},
AddressTwo => $ho{'Address Two'}, City => $ho{'City'}, State => $ho{'State'}, Zip => $ho{'Zip Code'});
sub getNames {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content( path($file) );
my @nodes = $tree->look_down(_tag => 'input');
# List phrases to find, and build hash with their derived names
# Should probably be defined globally, once for the whole program
my @patterns = map { '$txt_' . $_ }
qw(TeamName TeamNumber SchoolName Area District
Phone Phone2 Fax Address1 Address2 City State Zip Website);
# Name for each pattern: everything after first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/);
if ($name) {
$name_val{$patt_name{$patt}} = $name->attr('value') || '';
}
}
}
# Name and Email are stored differently. Fetch those now
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# Discard incomplete Name-Email records -- either both or none
@name_val{qw(Name Email)} =
map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
}
return \%name_val;
}
sub fill_row {
my ($ws, $row, $rdata, $rorder) = @_;
my %name_val = %$rdata;
my %name_order = %$rorder;
my @vals = map { $name_val{$_} }
sort { $name_order{$a} <=> $name_order{$b} }
keys %name_val;
$ws->write_row($row, 0, \@vals); # add check (returns 0 on success)
return 1;
my $row = 1;
}
foreach my $file (@files) {
next if -d $file;
my %name_val = %{ getNames($file) };
foreach my $name (sort keys %name_val) {
# Fill the spreadsheet with all info in one go
if ($name_val{$name}) {
print "$name => $name_val{$name}\n";
} else {
print "Not found $name in $file\n";
}
}
my %name_val = %{ getNames($file) };
fill_row($worksheet, $row++, \%name_val, \%name_order);
foreach my $name (sort keys %name_val) { # demo
if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
else { print "Not found $name in $file\n" }
}
print "Progressing $Ypos \n"
}
简而言之,其中一些 'name'
可能只是在(某些)HTML 文件中找不到。所以首先测试它是否存在,然后写入 $val
或打印关于它未被发现的消息。
最明显的改进点:不需要单独的功能。您可以在一次调用中搜索并找到所有这些,并将它们存储在返回的散列 name => value
中。
sub getNames {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content( path($file) );
my @nodes = $tree->look_down(_tag => 'input');
# List phrases to find, and build hash with their derived names
# Should probably be defined globally, once for the whole program
my @patterns = map { '$txt_' . $_ }
qw(TeamName TeamNumber SchoolName Area District
Phone Phone2 Fax Address1 Address2 City State Zip Website);
# Name for each pattern: everything after first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/);
if ($name) {
$name_val{$patt_name{$patt}} = $name->attr('value') || '';
}
}
}
# Name and Email are stored differently. Fetch those now
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# Discard incomplete Name-Email records -- either both or none
if (2 == grep { not ref $_ } @td) {
@name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
}
else { @name_val{qw(Name Email)} = ('', '') }
}
return \%name_val;
}
对于 Name
和 Email
我们要求两者都作为文本存在,或者两者都被丢弃。 (示例源在 Name
的 div
中有 There are no people ...
,Email
没有任何内容。)
要获得那里的任何东西,而不是上面的 if-else
使用
@name_val{qw(Name Email)} =
map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
我们得到上面引用的 Name
的注释和 Email
的空字符串,以及这个示例。
然后
# Specify spreadsheet headers in desired order and write to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', ...);
$worksheet->write_row(0, 0, \@headers); # first row
# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email},
TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, ...
);
my $row = 1;
foreach my $file (@files) {
next if -d $file;
my %name_val = %{ getNames($file) };
fill_row($worksheet, $row++, \%name_val, \%name_order);
foreach my $name (sort keys %name_val) { # demo
if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
else { print "Not found $name in $file\n" }
}
}
sub fill_row {
my ($ws, $row, $rdata, $rorder) = @_;
my %name_val = %$rdata;
my %name_order = %$rorder;
my @vals = map { $name_val{$_} }
sort { $name_order{$a} <=> $name_order{$b} }
keys %name_val;
$ws->write_row($row, 0, \@vals); # add check (returns 0 on success)
return 1;
}
write_row 引用一个数组并写出一行
它的元素。请注意,当给出数组引用时,write
也可以这样使用。
链接的 HTML 文件上的输出
Area => 1
District => 1
State => NM
TeamName => Ruidoso
TeamNumber => 83
和 Not found ...
对于其他人。 .xls
文件是正确的(当使用完整的名称列表时)。
整个节目
use warnings;
use strict;
use feature qw(say state);
use Path::Tiny;
use HTML::TreeBuilder;
use Spreadsheet::WriteExcel;
my @src = qw(TeamName TeamNumber SchoolName Area District Phone Phone2
Fax Address1 Address2 City State Zip Website);
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name',
'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One',
'Address Two', 'City', 'State', 'Zip Code', 'Web Site'
);
my @lens = map { length } @headers; # for printing
# Numeric order of headers' fields (so, columns)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# Translation: name from HTML source => column number (retrieved from %ho)
my %name_order = (
Name => $ho{Name}, Email => $ho{Email},
TeamNumber => $ho{'Team Number'},
TeamName => $ho{'Team Name'},
SchoolName => $ho{'School Name'}, Area => $ho{'Area'},
District => $ho{'District'}, Phone2 => $ho{'Ag Dept Phone'},
Phone => $ho{'School Phone'}, Fax => $ho{'Fax'},
Address1 => $ho{'Address One'},
Address2 => $ho{'Address Two'},
City => $ho{'City'}, State => $ho{'State'}, 'Zip' => $ho{'Zip Code'},
Website => $ho{'Web Site'}
);
say "Order (column) of names from HTML source to follow headers:";
printf("%-10s ==> %s\n", $_, $name_order{$_})
for sort { $name_order{$a} <=> $name_order{$b} } keys %name_order;
say '';
my $workbook = Spreadsheet::WriteExcel->new('data.xls');
my $worksheet = $workbook->add_worksheet();
# Print headers to .xls file (and to screen)
$worksheet->write_row(0, 0, \@headers);
say "Spreadsheet, header and rows:";
prn_row(\@headers); # print to screen
my @files = ('fetch_names.html');
my $row = 1;
foreach my $file (@files) {
next if -d $file;
# Parse the file, print the row to spreadsheet
my %name_val = %{ getNames($file) };
fill_row($worksheet, $row++, \%name_val, \%name_order);
}
# Functions
sub fill_row {
my ($ws, $row, $rdata, $rorder) = @_;
my %name_val = %$rdata;
my $name_order = %$rorder;
my @vals =
map { $name_val{$_} }
sort { $name_order{$a} <=> $name_order{$b} }
grep { exists $name_order{$_} }
keys %name_val;
prn_row(\@vals); # print to screen
$worksheet->write_row($row, 0, \@vals); # test this (returns 0 on success)
return 1;
}
sub prn_row {
my @ary = @{ $_[0] };
for (0..$#ary) {
my $len = $lens[$_];
printf("%${len}s ", $ary[$_]);
}
say '';
}
sub getNames {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content( path($file)->slurp );
my @nodes = $tree->look_down(_tag => 'input');
my @patterns = map { '$txt_' . $_ } @src;
# List phrases to find, and build hash with their derived names
# Name for each pattern: everything first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/) or next;
$name_val{$patt_name{$patt}} = $name->attr('value') // '';
}
}
# Name and Email are stored differently, fetch those now
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# Discard incomplete Name-Email records -- either both or none
if (2 == grep { not ref } @td) {
@name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
}
else { @name_val{qw(Name Email)} = ('', '') }
}
return \%name_val;
}
这是一个完整的程序,带有提供的示例 HTML 源代码。
补充:实际页面可能有多个name-email对
use LWP::Simple qw(get);
sub getNames {
my ($file) = @_;
my $url = 'https://www.judgingcard.com/Directory/Directory.aspx?ID=1643';
my $page = get($url) or die "Can't get the page $url: $!";
my $tree = HTML::TreeBuilder->new_from_content( $page );
my @nodes = $tree->look_down(_tag => 'input');
my @patterns = map { '$txt_' . $_ } @src;
# List phrases to find, and build hash with their derived names
# Name for each pattern: everything first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/) or next;
$name_val{$patt_name{$patt}} = $name->attr('value') // '';
}
}
# Name and Email are stored differently, fetch those now
my %name_email;
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# There may be more than one Name-Email pair
# so enter key-value pair explicitely
if (2 <= grep { ref } @td) {
$name_email{$td[0]->as_trimmed_text} = $td[1]->as_trimmed_text;
}
else { %name_email = ('', '') }
}
return \%name_val, \%name_email;
}
那么你主要需要
foreach my $file (@files) {
next if -d $file;
# Parse the file, unpack name-value and name-email hashes
my ($rname_val, $rname_email) = getNames($file);
my %name_val = %$rname_val;
my %name_email = %$rname_email;
# Print a row for each Name-Email, adding them to %name_val
foreach my $name (keys %name_email) {
$name_val{Name} = $name;
$name_val{Email} = $name_email{$name};
fill_row($worksheet, $row++, \%name_val, \%name_order);
}
}
具有多个 Name-Email 对的所需格式是:相同 headers,并且对于每一对,将单独的一行打印到文件,其中除 Name-Email 之外的所有信息都是相同的.
打印的电子表格(URL 在评论中提供)
您或许可以减少代码。即使在下面的示例中我没有使用 HTML::TreeBuidler,方法也是相似的。使用 Mojo::DOM58、
use 5.014;
use warnings;
use Mojo::DOM58;
use Path::Tiny;
use Data::Dumper;
my @fields = qw( TeamName TeamNumber SchoolName Area District Phone Phone2 Fax Address1 Address2 City State Zip Website );
my $html = path('team.html')->slurp;
my $dom = Mojo::DOM58->new($html);
my $data;
for my $field( @fields ) {
$data->{$field} = $dom->at(qq{input[name*="txt_$field"]})->attr('value') // "";
}
say Dumper $data;
打印:
$VAR1 = {
'TeamName' => 'Ruidoso',
'Zip' => '',
'State' => 'NM',
'City' => '',
'District' => '1',
'Phone2' => '',
'Area' => '1',
'SchoolName' => '',
'Address2' => '',
'Website' => '',
'Address1' => '',
'Phone' => '',
'Fax' => '',
'TeamNumber' => '83'
};
我正在尝试从大约 3,000 个 HTML 文件中获取一大堆值并将它们保存到电子表格中。
我正在使用
HTML::TreeBuilder
处理 HTML 并使用创建电子表格
Spreadsheet::WriteExcel
.
但是我的脚本没有成功获取值。我明白了
Use of uninitialized value $val in concatenation (.) or string at spreadsheet.pl line 63.
我可能做错了什么?
这是 my HTML files on pastebin.com
的示例。问题太大 post。
我的 Perl 代码
use warnings 'all';
use strict;
use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
use constant URL => 'http://pastebin.com/raw/qLwu80ZW';
my $teamNumber = "";
my $teamName = "";
my $schoolName = "";
my $area = "";
my $district = "";
my $agDeptPhone = "";
my $schoolPhone = "";
my $fax = "";
my $addressOne = "";
my $addressTwo = "";
my $city = "";
my $state = "";
my $zipCode = "";
my $name = "";
my $email = "";
my $row = "";
my $Ypos = 0;
my $path = "Z:\_WEB_CLIENTS\Morgan Livestock\Judging Card";
my $workbook = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();
sub getTeamNumber {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_TeamNumber/)->attr('value');
}
print "Got Team Number $val\n";
return $val;
}
sub getTeamName {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_TeamName/)->attr('value');
}
print "Got Team Name $val\n";
return $val;
}
sub getSchoolName {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(tag_ => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_SchoolName/)->attr('value');
}
print "Got School Name $val\n";
return $val;
}
sub getArea{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(tag_ => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Area/)->attr('value');
}
print "Got Area $val\n";
return $val;
}
sub getDistrict{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_District/)->attr('value');
}
print "Got District $val\n";
return $val;
}
sub getDeptPhone {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Phone/)->attr('value');
}
print "Got Dept Phone $val\n";
return $val;
}
sub getSchoolPhone{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Phone2/)->attr('value');
}
print "Got School Phone $val\n";
return $val;
}
sub getFax{
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Fax/)->attr('value');
}
print "Got Fax $val\n";
return $val;
}
sub getAddress1 {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Address1/)->attr('value');
}
print "Got Address One $val\n";
return $val;
}
sub getAddress2 {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Address2/)->attr('value');
}
print "Got Address Two $val\n";
return $val;
}
sub getCity {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_City/)->attr('value');
}
print "Got Address Two $val\n";
return $val;
}
sub getState {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_State/)->attr('value');
}
print "Got State $val\n";
return $val;
}
sub getZip {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Zip/)->attr('value');
}
print "Got Zip $val\n";
return $val;
}
sub getWebsite {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $val;
foreach my $node (@nodes) {
$val = $node->look_down('name', qr/$txt_Website/)->attr('value');
}
print "Got Website $val\n";
return $val;
}
sub getNameAndEmail {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content(get URL);
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
my ($name, $email) = map { $_->as_trimmed_text } @td[0,1];
}
print "Got Name and Email $name and $email\n";
return ($name, $email);
}
# FILLER: This fills the spreadsheet with all the variables we've acquired
sub fill {
my ($name, $email, $teamNumber, $teamName, $schoolName,
$area, $district, $agDeptPhone, $schoolPhone,
$fax, $addressOne, $addressTwo, $city, $state, $zipCode) = (@_);
$worksheet->write($Ypos, 1, $name);
$worksheet->write($Ypos, 2, $email);
$worksheet->write($Ypos, 3, $teamNumber);
$worksheet->write($Ypos, 4, $teamName);
$worksheet->write($Ypos, 5, $schoolName);
$worksheet->write($Ypos, 6, $area);
$worksheet->write($Ypos, 7, $district);
$worksheet->write($Ypos, 8, $agDeptPhone);
$worksheet->write($Ypos, 9, $schoolPhone);
$worksheet->write($Ypos, 10, $fax);
$worksheet->write($Ypos, 11, $addressOne);
$worksheet->write($Ypos, 12, $addressTwo);
$worksheet->write($Ypos, 13, $city);
$worksheet->write($Ypos, 14, $state);
$worksheet->write($Ypos, 15, $zipCode);
}
# Open judgingcard directory
opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);
# This fills out all top row info
$worksheet->write("A1", "Name");
$worksheet->write("B1", "Email");
$worksheet->write("C1", "Team Number");
$worksheet->write("D1", "Team Name");
$worksheet->write("E1", "School Name");
$worksheet->write("F1", "Area");
$worksheet->write("G1", "District");
$worksheet->write("H1", "Ag Dept Phone");
$worksheet->write("I1", "School Phone");
$worksheet->write("J1", "Fax");
$worksheet->write("K1", "Address One");
$worksheet->write("L1", "Address Two");
$worksheet->write("M1", "City");
$worksheet->write("N1", "State");
$worksheet->write("O1", "Zip Code");
###################################
foreach my $file (@files) { # run through all files in directory
next if (-d $file); # Skip file if file is folder
$Ypos = $Ypos + 1;
my ($name1, $email1) = getNameAndEmail($file);
$name = $name1;
$email = $email1;
$teamNumber = getTeamNumber($file);
$teamName = getTeamName($file);
$schoolName = getSchoolName($file);
$area = getArea($file);
$district = getDistrict($file);
$agDeptPhone = getDeptPhone($file);
$schoolPhone = getSchoolPhone($file);
$fax = getFax($file);
$addressOne = getAddress1($file);
$addressTwo = getAddress2($file);
$city = getCity($file);
$state = getState($file);
$zipCode = getZip($file);
fill($name, $email, $teamNumber, $teamName, $schoolName,
$area, $district, $agDeptPhone, $schoolPhone, $fax,
$addressOne, $addressTwo, $city, $state, $zipCode);
print "Progressing $file ($Ypos)\n"
}
closedir(DIR);
sub getTeamNumber {
my ($file) = @_;
my $html = path($file);
my $tree = HTML::TreeBuilder->new_from_content($html);
my @nodes = $tree->look_down(_tag => 'input');
my $name;
my $val;
foreach my $node (@nodes) {
$name = $node->look_down('name', qr/$txt_TeamNumber/);
}
if ( ! defined $name ) {
print "Couldn't get team number\n";
}
if ( $name ) {
$val = $name->attr('value');
print "Got Team number $val\n";
}
return $val;
}
新脚本:
use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
my $path = "Z:\_WEB_CLIENTS\Morgan Livestock\Judging Card";
my $workbook = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();
opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);
# Specify spreadsheet headers in desired order and write to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One'
, 'Address Two', 'City', 'State', 'Zip Code');
$worksheet->write_row(0, 0, \@headers); # first row
# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email},
TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, SchoolName => $ho{'School Name'}, Area => $ho{'Area'}, District => $ho{'District'},
AgDeptPhone => $ho{'Ag Dept Phone'}, SchoolPhone => $ho{'School Phone'}, Fax => $ho{'Fax'}, AddressOne => $ho{'Address One'},
AddressTwo => $ho{'Address Two'}, City => $ho{'City'}, State => $ho{'State'}, Zip => $ho{'Zip Code'});
sub getNames {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content( path($file) );
my @nodes = $tree->look_down(_tag => 'input');
# List phrases to find, and build hash with their derived names
# Should probably be defined globally, once for the whole program
my @patterns = map { '$txt_' . $_ }
qw(TeamName TeamNumber SchoolName Area District
Phone Phone2 Fax Address1 Address2 City State Zip Website);
# Name for each pattern: everything after first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/);
if ($name) {
$name_val{$patt_name{$patt}} = $name->attr('value') || '';
}
}
}
# Name and Email are stored differently. Fetch those now
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# Discard incomplete Name-Email records -- either both or none
@name_val{qw(Name Email)} =
map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
}
return \%name_val;
}
sub fill_row {
my ($ws, $row, $rdata, $rorder) = @_;
my %name_val = %$rdata;
my %name_order = %$rorder;
my @vals = map { $name_val{$_} }
sort { $name_order{$a} <=> $name_order{$b} }
keys %name_val;
$ws->write_row($row, 0, \@vals); # add check (returns 0 on success)
return 1;
my $row = 1;
}
foreach my $file (@files) {
next if -d $file;
my %name_val = %{ getNames($file) };
foreach my $name (sort keys %name_val) {
# Fill the spreadsheet with all info in one go
if ($name_val{$name}) {
print "$name => $name_val{$name}\n";
} else {
print "Not found $name in $file\n";
}
}
my %name_val = %{ getNames($file) };
fill_row($worksheet, $row++, \%name_val, \%name_order);
foreach my $name (sort keys %name_val) { # demo
if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
else { print "Not found $name in $file\n" }
}
print "Progressing $Ypos \n"
}
简而言之,其中一些 'name'
可能只是在(某些)HTML 文件中找不到。所以首先测试它是否存在,然后写入 $val
或打印关于它未被发现的消息。
最明显的改进点:不需要单独的功能。您可以在一次调用中搜索并找到所有这些,并将它们存储在返回的散列 name => value
中。
sub getNames {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content( path($file) );
my @nodes = $tree->look_down(_tag => 'input');
# List phrases to find, and build hash with their derived names
# Should probably be defined globally, once for the whole program
my @patterns = map { '$txt_' . $_ }
qw(TeamName TeamNumber SchoolName Area District
Phone Phone2 Fax Address1 Address2 City State Zip Website);
# Name for each pattern: everything after first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/);
if ($name) {
$name_val{$patt_name{$patt}} = $name->attr('value') || '';
}
}
}
# Name and Email are stored differently. Fetch those now
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# Discard incomplete Name-Email records -- either both or none
if (2 == grep { not ref $_ } @td) {
@name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
}
else { @name_val{qw(Name Email)} = ('', '') }
}
return \%name_val;
}
对于 Name
和 Email
我们要求两者都作为文本存在,或者两者都被丢弃。 (示例源在 Name
的 div
中有 There are no people ...
,Email
没有任何内容。)
要获得那里的任何东西,而不是上面的 if-else
使用
@name_val{qw(Name Email)} =
map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
我们得到上面引用的 Name
的注释和 Email
的空字符串,以及这个示例。
然后
# Specify spreadsheet headers in desired order and write to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', ...);
$worksheet->write_row(0, 0, \@headers); # first row
# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email},
TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, ...
);
my $row = 1;
foreach my $file (@files) {
next if -d $file;
my %name_val = %{ getNames($file) };
fill_row($worksheet, $row++, \%name_val, \%name_order);
foreach my $name (sort keys %name_val) { # demo
if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
else { print "Not found $name in $file\n" }
}
}
sub fill_row {
my ($ws, $row, $rdata, $rorder) = @_;
my %name_val = %$rdata;
my %name_order = %$rorder;
my @vals = map { $name_val{$_} }
sort { $name_order{$a} <=> $name_order{$b} }
keys %name_val;
$ws->write_row($row, 0, \@vals); # add check (returns 0 on success)
return 1;
}
write_row 引用一个数组并写出一行
它的元素。请注意,当给出数组引用时,write
也可以这样使用。
链接的 HTML 文件上的输出
Area => 1 District => 1 State => NM TeamName => Ruidoso TeamNumber => 83
和 Not found ...
对于其他人。 .xls
文件是正确的(当使用完整的名称列表时)。
整个节目
use warnings;
use strict;
use feature qw(say state);
use Path::Tiny;
use HTML::TreeBuilder;
use Spreadsheet::WriteExcel;
my @src = qw(TeamName TeamNumber SchoolName Area District Phone Phone2
Fax Address1 Address2 City State Zip Website);
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name',
'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One',
'Address Two', 'City', 'State', 'Zip Code', 'Web Site'
);
my @lens = map { length } @headers; # for printing
# Numeric order of headers' fields (so, columns)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# Translation: name from HTML source => column number (retrieved from %ho)
my %name_order = (
Name => $ho{Name}, Email => $ho{Email},
TeamNumber => $ho{'Team Number'},
TeamName => $ho{'Team Name'},
SchoolName => $ho{'School Name'}, Area => $ho{'Area'},
District => $ho{'District'}, Phone2 => $ho{'Ag Dept Phone'},
Phone => $ho{'School Phone'}, Fax => $ho{'Fax'},
Address1 => $ho{'Address One'},
Address2 => $ho{'Address Two'},
City => $ho{'City'}, State => $ho{'State'}, 'Zip' => $ho{'Zip Code'},
Website => $ho{'Web Site'}
);
say "Order (column) of names from HTML source to follow headers:";
printf("%-10s ==> %s\n", $_, $name_order{$_})
for sort { $name_order{$a} <=> $name_order{$b} } keys %name_order;
say '';
my $workbook = Spreadsheet::WriteExcel->new('data.xls');
my $worksheet = $workbook->add_worksheet();
# Print headers to .xls file (and to screen)
$worksheet->write_row(0, 0, \@headers);
say "Spreadsheet, header and rows:";
prn_row(\@headers); # print to screen
my @files = ('fetch_names.html');
my $row = 1;
foreach my $file (@files) {
next if -d $file;
# Parse the file, print the row to spreadsheet
my %name_val = %{ getNames($file) };
fill_row($worksheet, $row++, \%name_val, \%name_order);
}
# Functions
sub fill_row {
my ($ws, $row, $rdata, $rorder) = @_;
my %name_val = %$rdata;
my $name_order = %$rorder;
my @vals =
map { $name_val{$_} }
sort { $name_order{$a} <=> $name_order{$b} }
grep { exists $name_order{$_} }
keys %name_val;
prn_row(\@vals); # print to screen
$worksheet->write_row($row, 0, \@vals); # test this (returns 0 on success)
return 1;
}
sub prn_row {
my @ary = @{ $_[0] };
for (0..$#ary) {
my $len = $lens[$_];
printf("%${len}s ", $ary[$_]);
}
say '';
}
sub getNames {
my ($file) = @_;
my $tree = HTML::TreeBuilder->new_from_content( path($file)->slurp );
my @nodes = $tree->look_down(_tag => 'input');
my @patterns = map { '$txt_' . $_ } @src;
# List phrases to find, and build hash with their derived names
# Name for each pattern: everything first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/) or next;
$name_val{$patt_name{$patt}} = $name->attr('value') // '';
}
}
# Name and Email are stored differently, fetch those now
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# Discard incomplete Name-Email records -- either both or none
if (2 == grep { not ref } @td) {
@name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
}
else { @name_val{qw(Name Email)} = ('', '') }
}
return \%name_val;
}
这是一个完整的程序,带有提供的示例 HTML 源代码。
补充:实际页面可能有多个name-email对
use LWP::Simple qw(get);
sub getNames {
my ($file) = @_;
my $url = 'https://www.judgingcard.com/Directory/Directory.aspx?ID=1643';
my $page = get($url) or die "Can't get the page $url: $!";
my $tree = HTML::TreeBuilder->new_from_content( $page );
my @nodes = $tree->look_down(_tag => 'input');
my @patterns = map { '$txt_' . $_ } @src;
# List phrases to find, and build hash with their derived names
# Name for each pattern: everything first _ (so after $txt_)
my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
my %name_val;
foreach my $node (@nodes) {
foreach my $patt (@patterns) {
my $name = $node->look_down('name', qr/\Q$patt/) or next;
$name_val{$patt_name{$patt}} = $name->attr('value') // '';
}
}
# Name and Email are stored differently, fetch those now
my %name_email;
my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
for my $tr ( $table->look_down(_tag => 'tr') ) {
next unless my @td = $tr->look_down(_tag => 'td');
# There may be more than one Name-Email pair
# so enter key-value pair explicitely
if (2 <= grep { ref } @td) {
$name_email{$td[0]->as_trimmed_text} = $td[1]->as_trimmed_text;
}
else { %name_email = ('', '') }
}
return \%name_val, \%name_email;
}
那么你主要需要
foreach my $file (@files) {
next if -d $file;
# Parse the file, unpack name-value and name-email hashes
my ($rname_val, $rname_email) = getNames($file);
my %name_val = %$rname_val;
my %name_email = %$rname_email;
# Print a row for each Name-Email, adding them to %name_val
foreach my $name (keys %name_email) {
$name_val{Name} = $name;
$name_val{Email} = $name_email{$name};
fill_row($worksheet, $row++, \%name_val, \%name_order);
}
}
具有多个 Name-Email 对的所需格式是:相同 headers,并且对于每一对,将单独的一行打印到文件,其中除 Name-Email 之外的所有信息都是相同的.
打印的电子表格(URL 在评论中提供)
您或许可以减少代码。即使在下面的示例中我没有使用 HTML::TreeBuidler,方法也是相似的。使用 Mojo::DOM58、
use 5.014;
use warnings;
use Mojo::DOM58;
use Path::Tiny;
use Data::Dumper;
my @fields = qw( TeamName TeamNumber SchoolName Area District Phone Phone2 Fax Address1 Address2 City State Zip Website );
my $html = path('team.html')->slurp;
my $dom = Mojo::DOM58->new($html);
my $data;
for my $field( @fields ) {
$data->{$field} = $dom->at(qq{input[name*="txt_$field"]})->attr('value') // "";
}
say Dumper $data;
打印:
$VAR1 = {
'TeamName' => 'Ruidoso',
'Zip' => '',
'State' => 'NM',
'City' => '',
'District' => '1',
'Phone2' => '',
'Area' => '1',
'SchoolName' => '',
'Address2' => '',
'Website' => '',
'Address1' => '',
'Phone' => '',
'Fax' => '',
'TeamNumber' => '83'
};