如何从 for in 循环 ( Python ) 中获取第一个 url
How to get the first url from for in loop ( Python )
I try to get the first link from dataInfo from the loop.
This script make it possible for me to get image links and to download the image files. I want only the first image not all, thats my problem.
# Get results using JSON
results = simplejson.load(response)
data = results['responseData']
dataInfo = data['results']
# Iterate for each result and get unescaped url
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
here is the whole source code
import os
import sys
import time
from urllib import FancyURLopener
import urllib2
import simplejson
# Define search term
searchTerm = "intel i7"
# Replace spaces ' ' in search term for '%20' in order to comply with request
searchTerm = searchTerm.replace(' ','%20')
# Start FancyURLopener with defined version
class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
myopener = MyOpener()
# Set count to 0
count= 0
for i in range(0,10):
# Notice that the start changes for each iteration in order to request a new set of images for each loop
url = ('https://ajax.googleapis.com/ajax/services/search/images?' + 'v=1.0&q='+searchTerm+'&start='+str(i*4)+'&userip=MyIP')
print url
request = urllib2.Request(url, None, {'Referer': 'testing'})
response = urllib2.urlopen(request)
# Get results using JSON
results = simplejson.load(response)
data = results['responseData']
dataInfo = data['results']
# Iterate for each result and get unescaped url
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
# Sleep for one second to prevent IP blocking from Google
time.sleep(1)
获得图像后,尝试 break
在 for 循环的第一次迭代后转义。例如,
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
break
你可以for myUrl in dataInfo[:1]:
或在您成功下载第一个图像后中断:
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
try:
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
break
except:
pass
需要说明的是,此处的 dataInfo 包含一个 URL 列表,您只需要第一个。对吗?
如果是这样,您应该能够简单地引用第一个(第 0 个)索引,而不是循环遍历 dataInfo。
而不是
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
然后您应该可以使用
myopener.retrieve(dataInfo[0]['unescapedUrl'],'0.jpg')
dataInfo[0]['unescapedUrl']
应该是第一个url
I try to get the first link from dataInfo from the loop. This script make it possible for me to get image links and to download the image files. I want only the first image not all, thats my problem.
# Get results using JSON
results = simplejson.load(response)
data = results['responseData']
dataInfo = data['results']
# Iterate for each result and get unescaped url
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
here is the whole source code
import os
import sys
import time
from urllib import FancyURLopener
import urllib2
import simplejson
# Define search term
searchTerm = "intel i7"
# Replace spaces ' ' in search term for '%20' in order to comply with request
searchTerm = searchTerm.replace(' ','%20')
# Start FancyURLopener with defined version
class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
myopener = MyOpener()
# Set count to 0
count= 0
for i in range(0,10):
# Notice that the start changes for each iteration in order to request a new set of images for each loop
url = ('https://ajax.googleapis.com/ajax/services/search/images?' + 'v=1.0&q='+searchTerm+'&start='+str(i*4)+'&userip=MyIP')
print url
request = urllib2.Request(url, None, {'Referer': 'testing'})
response = urllib2.urlopen(request)
# Get results using JSON
results = simplejson.load(response)
data = results['responseData']
dataInfo = data['results']
# Iterate for each result and get unescaped url
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
# Sleep for one second to prevent IP blocking from Google
time.sleep(1)
获得图像后,尝试 break
在 for 循环的第一次迭代后转义。例如,
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
break
你可以for myUrl in dataInfo[:1]:
或在您成功下载第一个图像后中断:
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
try:
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
break
except:
pass
需要说明的是,此处的 dataInfo 包含一个 URL 列表,您只需要第一个。对吗?
如果是这样,您应该能够简单地引用第一个(第 0 个)索引,而不是循环遍历 dataInfo。
而不是
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')
然后您应该可以使用
myopener.retrieve(dataInfo[0]['unescapedUrl'],'0.jpg')
dataInfo[0]['unescapedUrl']
应该是第一个url