python 异步图片下载（多个 url）

python asyncronous images download (multiple urls)

我正在学习 Python 4/5 个月，这是我从头开始构建的第三个项目，但我无法独自解决这个问题。

此脚本为每个给定的 url 下载 1 张图像。我无法找到有关如何在此脚本中实现线程池执行器或异步的解决方案。我不知道如何将带有图像编号的 link url 保存到图像部分。我为我需要下载的所有 url 构建了一个字典，但我如何实际保存具有正确名称的图像？还有其他建议吗？

PS。目前出现的url个都是假的

同步版本：


    import requests
    import argparse
    import re
    import os
    import logging

    from bs4 import BeautifulSoup


    parser = argparse.ArgumentParser()
    parser.add_argument("-n", "--num", help="Book number", type=int, required=True) 
    parser.add_argument("-p", dest=r"path_name", default=r"F:\Users3", help="Save to dir", )
    args = parser.parse_args()



    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        level=logging.ERROR)
    logger = logging.getLogger(__name__)  


    def get_parser(url_c):  
        url = f'https://test.net/g/{url_c}/1'
        logger.info(f'Main url: {url_c}')
        responce = requests.get(url, timeout=5) # timeout will raise an exeption
        if responce.status_code == 200:
            page = requests.get(url, timeout=5).content
            soup = BeautifulSoup(page, 'html.parser')
            return soup
        else:
            responce.raise_for_status()


    def get_locators(soup): # take get_parser
        # Extract first/last page num
        first = int(soup.select_one('span.current').string)
        logger.info(f'First page: {first}')
        last = int(soup.select_one('span.num-pages').string) + 1

        # Extract img_code and extension
        link = soup.find('img', {'class': 'fit-horizontal'}).attrs["src"]
        logger.info(f'Locator code: {link}')
        code = re.search('galleries.([0-9]+)\/.\.(\w{3})', link)
        book_code = code.group(1)   # internal code 
        extension = code.group(2)   # png or jpg

        # extract Dir book name
        pattern = re.compile('pretty":"(.*)"')
        found = soup.find('script', text=pattern)
        string = pattern.search(found.text).group(1)
        dir_name = string.split('"')[0]
        logger.info(f'Dir name: {dir_name}')

        logger.info(f'Hidden code: {book_code}')
        print(f'Extension: {extension}')
        print(f'Tot pages: {last}')
        print(f'')

        return {'first_p': first, 
                'last_p': last, 
                'book_code': book_code, 
                'ext': extension, 
                'dir': dir_name
                }


    def setup_download_dir(path, dir):  # (args.path_name, locator['dir'])
        # Make folder if it not exist
        filepath = os.path.join(f'{path}\{dir}')
        if not os.path.exists(filepath):
            try:
                os.makedirs(filepath)
                print(f'Directory created at: {filepath}')
            except OSError as err:
                print(f"Can't create {filepath}: {err}")    
         return filepath 


    def main(locator, filepath):
        for image_n in range(locator['first_p'], locator['last_p']):
            url = f"https://i.test.net/galleries/{locator['book_code']}/{image_n}.{locator['ext']}"
            logger.info(f'Url Img: {url}')
            responce = requests.get(url, timeout=3)
            if responce.status_code == 200:
                img_data = requests.get(url, timeout=3).content 
            else:    
                responce.raise_for_status()    # raise exepetion 

            with open((os.path.join(filepath, f"{image_n}.{locator['ext']}")), 'wb') as handler:
                handler.write(img_data) # write image
                print(f'Img {image_n} - DONE')


    if __name__ == '__main__':
        try:
            locator = get_locators(get_parser(args.num))    # args.num ex. 241461
            main(locator, setup_download_dir(args.path_name, locator['dir'])) 
        except KeyboardInterrupt:
            print(f'Program aborted...' + '\n')

网址列表：


    def img_links(locator):
        image_url = []
        for num in range(locator['first_p'], locator['last_p']):
            url = f"https://i.test.net/galleries/{locator['book_code']}/{num}.{locator['ext']}"
            image_url.append(url)
        logger.info(f'Url List: {image_url}')       
        return image_url

我在流利的书中找到了解决方案python。这里是片段：

    def download_many(cc_list, base_url, verbose, concur_req):
        counter = collections.Counter()
        with futures.ThreadPoolExecutor(max_workers=concur_req) as executor:
            to_do_map = {}
            for cc in sorted(cc_list):
                future = executor.submit(download_one, cc, base_url, verbose)
                to_do_map[future] = cc
            done_iter = futures.as_completed(to_do_map)
            if not verbose:
                done_iter = tqdm.tqdm(done_iter, total=len(cc_list))
            for future in done_iter:
                try:
                    res = future.result()
                except requests.exceptions.HTTPError as exc:
                    error_msg = 'HTTP {res.status_code} - {res.reason}'
                    error_msg = error_msg.format(res=exc.response)
                except requests.exceptions.ConnectionError as exc:
                    error_msg = 'Connection error'
                else:
                    error_msg = ''
                    status = res.status
                if error_msg:
                    status = HTTPStatus.error
                counter[status] += 1
                if verbose and error_msg:
                    cc = to_do_map[future]
                    print('*** Error for {}: {}'.format(cc, error_msg))
        return counter

python 异步图片下载（多个 url）

python asyncronous images download (multiple urls)

python

asynchronous

python-multithreading

python-3.x

imagedownload