如何将搜索到的每个项目的链接存储到列表中?

How can I store the links found for each item searched into a list?

此代码对短代码进行简单的 google 搜索,然后打印最多找到 10 个链接的链接。如何将为每个简码找到的链接存储到与搜索到的简码对应的列表或字典中?

try:
    from googlesearch import search
except ImportError:
    print("No module named 'google' found")

with open('Unknown.xlsx', "rb") as f:
    df = pd.read_excel(f)  # can also index sheet by name or fetch all sheets
    shortcode_list = df['Short Code'].tolist()

def stopwatch(sec):
    while sec:
        minn, sec = divmod(sec, 60)
        timeformat = '{:02d}:{:02d}'.format(minn, sec)
        print(timeformat, end='\r')
        time.sleep(1)
        sec -= 1

pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)

delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)

for i in tqdm(range(len(shortcode_list))):
    try:
        shortcode = shortcode_list[i]
        delays = np.arange(1, 60, 1).tolist()
        delay = np.random.choice(delays)
        pauses = np.arange(2, 8, 1).tolist()
        pause = np.random.choice(pauses)
        stopwatch(delay)
        string = "text * to " + '"' + str(shortcode) + '"'
        query = string
        url = ('https://www.google.com?q=' + query)
        res = requests.get(url, headers=headers)
        print("\nThe query will be " + query + " " + str(res))
        for k in search(query, tld="co.in", num=10, stop=10, pause=pause, country='US',
                        user_agent=googlesearch.get_random_user_agent(), verify_ssl=True):
            print(k)
    except HTTPError as exception:
        if exception.code == 429:
            print(exception)
            print("Waiting for 8 minutes and Continue")
            stopwatch(480)
            continue

您可以使用以简码为键、以列表为值的字典。

通过使用这种方法,您的代码应该会产生如下结果:

import numpy as np
from tqdm import tqdm
import time
import requests

try:
    from googlesearch import search
except ImportError:
    print("No module named 'google' found")

shortcode_list = ["abc", "SO"]
def stopwatch(sec):
    while sec:
        minn, sec = divmod(sec, 60)
        timeformat = '{:02d}:{:02d}'.format(minn, sec)
        print(timeformat, end='\r')
        time.sleep(1)
        sec -= 1

pauses = np.arange(2, 8, 1).tolist()
pause = np.random.choice(pauses)

delays = np.arange(1, 60, 1).tolist()
delay = np.random.choice(delays)

results = {}  # Create an empty dict 
for i in tqdm(range(len(shortcode_list))):
    try:
        shortcode = shortcode_list[i]
        delays = np.arange(1, 5, 1).tolist()
        delay = np.random.choice(delays)
        pauses = np.arange(2, 8, 1).tolist()
        pause = np.random.choice(pauses)
        stopwatch(delay)
        string = "text * to " + '"' + str(shortcode) + '"'
        query = string
        url = ('https://www.google.com?q=' + query)
        res = requests.get(url)
        print("\nThe query will be " + query + " " + str(res))
        cur_res = []  # Create a list to store the results for that shortcode
        for k in search(query,  num_results=10):
            print(k)
            cur_res.append(k)  # Add a single res to the list
        results[shortcode_list[i]] = cur_res  # Update the res dict

    except Exception as exception:
        print(exception)
        print("Waiting for 8 minutes and Continue")
        stopwatch(480)
        continue

作为对你下一个问题的建议,你应该删除或修改不可重现的代码(即未知的 xls)并且你的代码片段应该准备好进行调试以帮助想要帮助你的人(包括所有导入, .. ).