如何将搜索到的每个项目的链接存储到列表中?
How can I store the links found for each item searched into a list?
此代码对短代码进行简单的 google 搜索,然后打印最多找到 10 个链接的链接。如何将为每个简码找到的链接存储到与搜索到的简码对应的列表或字典中?
try:
from googlesearch import search
except ImportError:
print("No module named 'google' found")
with open('Unknown.xlsx', "rb") as f:
df = pd.read_excel(f) # can also index sheet by name or fetch all sheets
shortcode_list = df['Short Code'].tolist()
def stopwatch(sec):
while sec:
minn, sec = divmod(sec, 60)
timeformat = '{:02d}:{:02d}'.format(minn, sec)
print(timeformat, end='\r')
time.sleep(1)
sec -= 1
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)
for i in tqdm(range(len(shortcode_list))):
try:
shortcode = shortcode_list[i]
delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
stopwatch(delay)
string = "text * to " + '"' + str(shortcode) + '"'
query = string
url = ('https://www.google.com?q=' + query)
res = requests.get(url, headers=headers)
print("\nThe query will be " + query + " " + str(res))
for k in search(query, tld="co.in", num=10, stop=10, pause=pause, country='US',
user_agent=googlesearch.get_random_user_agent(), verify_ssl=True):
print(k)
except HTTPError as exception:
if exception.code == 429:
print(exception)
print("Waiting for 8 minutes and Continue")
stopwatch(480)
continue
您可以使用以简码为键、以列表为值的字典。
通过使用这种方法,您的代码应该会产生如下结果:
import numpy as np
from tqdm import tqdm
import time
import requests
try:
from googlesearch import search
except ImportError:
print("No module named 'google' found")
shortcode_list = ["abc", "SO"]
def stopwatch(sec):
while sec:
minn, sec = divmod(sec, 60)
timeformat = '{:02d}:{:02d}'.format(minn, sec)
print(timeformat, end='\r')
time.sleep(1)
sec -= 1
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)
results = {} # Create an empty dict
for i in tqdm(range(len(shortcode_list))):
try:
shortcode = shortcode_list[i]
delays = np.arange(1, 5, 1).tolist()
delay = np.random.choice(delays)
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
stopwatch(delay)
string = "text * to " + '"' + str(shortcode) + '"'
query = string
url = ('https://www.google.com?q=' + query)
res = requests.get(url)
print("\nThe query will be " + query + " " + str(res))
cur_res = [] # Create a list to store the results for that shortcode
for k in search(query, num_results=10):
print(k)
cur_res.append(k) # Add a single res to the list
results[shortcode_list[i]] = cur_res # Update the res dict
except Exception as exception:
print(exception)
print("Waiting for 8 minutes and Continue")
stopwatch(480)
continue
作为对你下一个问题的建议,你应该删除或修改不可重现的代码(即未知的 xls)并且你的代码片段应该准备好进行调试以帮助想要帮助你的人(包括所有导入, .. ).
此代码对短代码进行简单的 google 搜索,然后打印最多找到 10 个链接的链接。如何将为每个简码找到的链接存储到与搜索到的简码对应的列表或字典中?
try:
from googlesearch import search
except ImportError:
print("No module named 'google' found")
with open('Unknown.xlsx', "rb") as f:
df = pd.read_excel(f) # can also index sheet by name or fetch all sheets
shortcode_list = df['Short Code'].tolist()
def stopwatch(sec):
while sec:
minn, sec = divmod(sec, 60)
timeformat = '{:02d}:{:02d}'.format(minn, sec)
print(timeformat, end='\r')
time.sleep(1)
sec -= 1
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)
for i in tqdm(range(len(shortcode_list))):
try:
shortcode = shortcode_list[i]
delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
stopwatch(delay)
string = "text * to " + '"' + str(shortcode) + '"'
query = string
url = ('https://www.google.com?q=' + query)
res = requests.get(url, headers=headers)
print("\nThe query will be " + query + " " + str(res))
for k in search(query, tld="co.in", num=10, stop=10, pause=pause, country='US',
user_agent=googlesearch.get_random_user_agent(), verify_ssl=True):
print(k)
except HTTPError as exception:
if exception.code == 429:
print(exception)
print("Waiting for 8 minutes and Continue")
stopwatch(480)
continue
您可以使用以简码为键、以列表为值的字典。
通过使用这种方法,您的代码应该会产生如下结果:
import numpy as np
from tqdm import tqdm
import time
import requests
try:
from googlesearch import search
except ImportError:
print("No module named 'google' found")
shortcode_list = ["abc", "SO"]
def stopwatch(sec):
while sec:
minn, sec = divmod(sec, 60)
timeformat = '{:02d}:{:02d}'.format(minn, sec)
print(timeformat, end='\r')
time.sleep(1)
sec -= 1
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)
results = {} # Create an empty dict
for i in tqdm(range(len(shortcode_list))):
try:
shortcode = shortcode_list[i]
delays = np.arange(1, 5, 1).tolist()
delay = np.random.choice(delays)
pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)
stopwatch(delay)
string = "text * to " + '"' + str(shortcode) + '"'
query = string
url = ('https://www.google.com?q=' + query)
res = requests.get(url)
print("\nThe query will be " + query + " " + str(res))
cur_res = [] # Create a list to store the results for that shortcode
for k in search(query, num_results=10):
print(k)
cur_res.append(k) # Add a single res to the list
results[shortcode_list[i]] = cur_res # Update the res dict
except Exception as exception:
print(exception)
print("Waiting for 8 minutes and Continue")
stopwatch(480)
continue
作为对你下一个问题的建议,你应该删除或修改不可重现的代码(即未知的 xls)并且你的代码片段应该准备好进行调试以帮助想要帮助你的人(包括所有导入, .. ).