使用相同的代码解析多个字符串
Parsing multiple strings with the same code
我正在尝试在下方打印 URL 内的所有网球运动员。但是 split 函数只打印一个玩家的名字,尽管其他玩家的名字可以从下面的 split 函数 names1.split('">')[1].split('
import time
import urllib2
from urllib2 import urlopen
import datetime
def Tennis():
try:
australianOpen = urllib2.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read()
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
for Eachnames in names1 :
Eachnames = names1.split('">')[1].split('</a><a href="')[0]
print Eachnames
except Exception,e:
print str(e)
Tennis()
只需添加 names1 = names1[names1.find(Eachnames)+len(Eachnames):]
def Tennis():
try:
australianOpen = urllib2.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read()
#print australianOpen
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
for Eachnames in names1 :
Eachnames = names1.split('">')[1].split('</a><a href="')[0]
names1 = names1[names1.find(Eachnames)+len(Eachnames):]
if Eachnames.find('<')!= -1:
Eachnames=Eachnames[:Eachnames.find('<')]
print Eachnames
except Exception,e:
print str(e)
Tennis()
问题与 Eachnames = names1.split('">')[1].split('</a><a href="')[0]
行有关,您在这里所做的是将字符串拆分为子字符串数组,然后 [1] 是 selecting 1:st 元素。由于您在找到第一个名字后没有修改变量 names1
,因此您将一遍又一遍地 select 相同的名字。一个简单的修改是
import time
import urllib
import urllib.request
import datetime
def Tennis():
try:
australianOpen = urllib.request.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read().decode('utf-8')
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
the_names = names1.split('">')
for name in the_names:
print (name.split('</a><a href="'))
except Exception:
print ("Exception", sys.exc_info()[0])
Tennis()
但是打印输出将是错误的,因为您的搜索条件都是错误的(除非您想要像 half URL adressen et.c 这样的乱码)。我认为一个好的和简单的解决方案是为此使用正则表达式。
一个简单的正则表达式来捕获名称,没有特殊字符左右是
the_names = re.findall("\">([A-Za-z]*, [A-Za-z]*)", names1)
使用正则表达式的稍微简化的程序是
import urllib.request
import re
def Tennis():
try:
australianOpen = urllib.request.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read().decode('utf-8')
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
except Exception:
print ("Exception")
the_names = re.findall("\">([A-Za-z]*, [A-Za-z]*)", names1)
for name in the_names:
print (name)
Tennis()
希望这对您有所帮助
我正在尝试在下方打印 URL 内的所有网球运动员。但是 split 函数只打印一个玩家的名字,尽管其他玩家的名字可以从下面的 split 函数 names1.split('">')[1].split('
import time
import urllib2
from urllib2 import urlopen
import datetime
def Tennis():
try:
australianOpen = urllib2.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read()
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
for Eachnames in names1 :
Eachnames = names1.split('">')[1].split('</a><a href="')[0]
print Eachnames
except Exception,e:
print str(e)
Tennis()
只需添加 names1 = names1[names1.find(Eachnames)+len(Eachnames):]
def Tennis():
try:
australianOpen = urllib2.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read()
#print australianOpen
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
for Eachnames in names1 :
Eachnames = names1.split('">')[1].split('</a><a href="')[0]
names1 = names1[names1.find(Eachnames)+len(Eachnames):]
if Eachnames.find('<')!= -1:
Eachnames=Eachnames[:Eachnames.find('<')]
print Eachnames
except Exception,e:
print str(e)
Tennis()
问题与 Eachnames = names1.split('">')[1].split('</a><a href="')[0]
行有关,您在这里所做的是将字符串拆分为子字符串数组,然后 [1] 是 selecting 1:st 元素。由于您在找到第一个名字后没有修改变量 names1
,因此您将一遍又一遍地 select 相同的名字。一个简单的修改是
import time
import urllib
import urllib.request
import datetime
def Tennis():
try:
australianOpen = urllib.request.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read().decode('utf-8')
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
the_names = names1.split('">')
for name in the_names:
print (name.split('</a><a href="'))
except Exception:
print ("Exception", sys.exc_info()[0])
Tennis()
但是打印输出将是错误的,因为您的搜索条件都是错误的(除非您想要像 half URL adressen et.c 这样的乱码)。我认为一个好的和简单的解决方案是为此使用正则表达式。 一个简单的正则表达式来捕获名称,没有特殊字符左右是
the_names = re.findall("\">([A-Za-z]*, [A-Za-z]*)", names1)
使用正则表达式的稍微简化的程序是
import urllib.request
import re
def Tennis():
try:
australianOpen = urllib.request.urlopen('http://www.ausopen.com/en_AU/players/profiles.html').read().decode('utf-8')
names1 =australianOpen.split('</div><div id="section_A" class="sectionHeading"><div class="men">A</div><div class="women">A</div></div><div class="section"><div class="men">')[1].split('</a></div></div></div></div>')[0]
except Exception:
print ("Exception")
the_names = re.findall("\">([A-Za-z]*, [A-Za-z]*)", names1)
for name in the_names:
print (name)
Tennis()
希望这对您有所帮助