如何找到 Python 中 S3 分段上传失败的位置?

How to find where an S3 multipart upload is failing in Python?

我正在实施一个 cron 作业,该作业会将一个大型的每日备份文件上传到 S3 存储桶。大部分时间都可以,但每隔一段时间,我会检查一下桶,文件大小明显小于实际大小。

应该是50GB左右,上次显示是34GB。我的主要问题是我不确定 try/catch.

有什么错误

我还在边学边学Python,所以请多多关照。

from progress import ProgressPercentage  # class file progress.py
from slack import *  # function file for Slack notifications
import random
import glob
import os
import boto3
import botocore
from boto3.s3.transfer import TransferConfig

bucket = "my-s3-backup"
s3 = boto3.resource('s3')

# Grabbing the last file, and removing the full path from the string
pattern = "/path/to/backup/file/xb_*"
files = list(filter(os.path.isfile, glob.glob(pattern)))
files.sort(key=lambda x: os.path.getmtime(x))
file_to_upload = files[-1]
file_name = file_to_upload.replace('/path/to/backup/file/', '')
key_path = 'physical_db_backups/' + file_name

# Multipart upload function
def multi_part_upload():
    config = TransferConfig(multipart_threshold=1024 * 25,
                            max_concurrency=10,
                            multipart_chunksize=1024 * 25,
                            use_threads=True)

    try:
        s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
                                   Callback=ProgressPercentage(file_to_upload))

        # Custom Slack notification to inform completion
        sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
    except botocore.exceptions.ClientError as error:
        
        # Custom Slack notification to inform of failure
        sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)


if __name__ == '__main__':
    multi_part_upload()

如果脚本没有“失败”,但它没有上传完整的文件大小,我要在此处捕获什么异常?我应该在某处记录输出吗?

我正在查看 Botocore Exceptions 文档。我只是不确定 try/catch 用这个做什么。

供参考,这是文件大小差异:

aws s3 ls --summarize --human-readable --recursive s3://my-s3-backup/physical_db_backups/

2022-05-07 14:31:28   50.7 GiB physical_db_backups/xb_202205070101.xb.zst
2022-05-08 12:48:07   50.8 GiB physical_db_backups/xb_202205080101.xb.zst
2022-05-09 01:30:04   34.2 GiB physical_db_backups/xb_202205090101.xb.zst <--- WRONG

好吧,因为我是个白痴,没有意识到文件还没有完成,所以我做了一些修改。

  1. 我编辑了 cron 以便稍后启动。

  2. 我已创建逻辑来确定备份脚本是否为 运行。

  3. 我可能会合并额外的检查以确保该文件存在,但目前这是一个已经过测试的工作 POC。

     from progress import ProgressPercentage  # class file progress.py
     from slack import *  # function file for Slack notifications
     import random
     from time import sleep
     import psutil
     import glob
     import os
     import boto3
     import botocore
     from boto3.s3.transfer import TransferConfig
     import logging
    
     bucket = "fsn-s3-backup"
     s3 = boto3.resource('s3')
     pattern = "/path/to/backup/file/xb_*"
     files = list(filter(os.path.isfile, glob.glob(pattern)))
     files.sort(key=lambda x: os.path.getmtime(x))
     file_to_upload = files[-1]
     file_name = file_to_upload.replace('/path/to/backup/file/', '')
     key_path = 'physical_db_backups/' + file_name
    
     logging.basicConfig(filename='/var/log/s3-backup.log', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filemode='a')
     logger = logging.getLogger()
     logger.setLevel(logging.INFO)
    
    
     def multi_part_upload():
         config = TransferConfig(multipart_threshold=1024 * 25,
                                 max_concurrency=10,
                                 multipart_chunksize=1024 * 25,
                                 use_threads=True)
    
         try:
             s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
                                        Callback=ProgressPercentage(file_to_upload),
                                        ExtraArgs={'ContentType': 'application/zstd'})
             logger.info("Physical Backup to S3 Complete")
             sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
         except botocore.exceptions.ClientError as error:
             logger.error("Physical Backup to S3 Failed: " + error)
             sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)
    
    
     def checkIfProcessRunning(processName):
         for proc in psutil.process_iter():
             cmdline = proc.cmdline()
             if processName in cmdline:
                 return True
         return False
    
    
     if __name__ == '__main__':
         backuprunning = True
         while backuprunning:
             logger.info("Checking if backup shell script is running")
             if checkIfProcessRunning('/path/to/physical_backup.sh'):
                 logger.info("Backup shell script still running. Sleeping for 60s")
                 sleep(60)
             else:
                 backuprunning = False
                 logger.info("Beginning multipart upload")
                 multi_part_upload()