如何找到 Python 中 S3 分段上传失败的位置?
How to find where an S3 multipart upload is failing in Python?
我正在实施一个 cron 作业,该作业会将一个大型的每日备份文件上传到 S3 存储桶。大部分时间都可以,但每隔一段时间,我会检查一下桶,文件大小明显小于实际大小。
应该是50GB左右,上次显示是34GB。我的主要问题是我不确定 try/catch.
有什么错误
我还在边学边学Python,所以请多多关照。
from progress import ProgressPercentage # class file progress.py
from slack import * # function file for Slack notifications
import random
import glob
import os
import boto3
import botocore
from boto3.s3.transfer import TransferConfig
bucket = "my-s3-backup"
s3 = boto3.resource('s3')
# Grabbing the last file, and removing the full path from the string
pattern = "/path/to/backup/file/xb_*"
files = list(filter(os.path.isfile, glob.glob(pattern)))
files.sort(key=lambda x: os.path.getmtime(x))
file_to_upload = files[-1]
file_name = file_to_upload.replace('/path/to/backup/file/', '')
key_path = 'physical_db_backups/' + file_name
# Multipart upload function
def multi_part_upload():
config = TransferConfig(multipart_threshold=1024 * 25,
max_concurrency=10,
multipart_chunksize=1024 * 25,
use_threads=True)
try:
s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
Callback=ProgressPercentage(file_to_upload))
# Custom Slack notification to inform completion
sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
except botocore.exceptions.ClientError as error:
# Custom Slack notification to inform of failure
sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)
if __name__ == '__main__':
multi_part_upload()
如果脚本没有“失败”,但它没有上传完整的文件大小,我要在此处捕获什么异常?我应该在某处记录输出吗?
我正在查看 Botocore Exceptions 文档。我只是不确定 try/catch
用这个做什么。
供参考,这是文件大小差异:
aws s3 ls --summarize --human-readable --recursive s3://my-s3-backup/physical_db_backups/
2022-05-07 14:31:28 50.7 GiB physical_db_backups/xb_202205070101.xb.zst
2022-05-08 12:48:07 50.8 GiB physical_db_backups/xb_202205080101.xb.zst
2022-05-09 01:30:04 34.2 GiB physical_db_backups/xb_202205090101.xb.zst <--- WRONG
好吧,因为我是个白痴,没有意识到文件还没有完成,所以我做了一些修改。
我编辑了 cron 以便稍后启动。
我已创建逻辑来确定备份脚本是否为 运行。
我可能会合并额外的检查以确保该文件存在,但目前这是一个已经过测试的工作 POC。
from progress import ProgressPercentage # class file progress.py
from slack import * # function file for Slack notifications
import random
from time import sleep
import psutil
import glob
import os
import boto3
import botocore
from boto3.s3.transfer import TransferConfig
import logging
bucket = "fsn-s3-backup"
s3 = boto3.resource('s3')
pattern = "/path/to/backup/file/xb_*"
files = list(filter(os.path.isfile, glob.glob(pattern)))
files.sort(key=lambda x: os.path.getmtime(x))
file_to_upload = files[-1]
file_name = file_to_upload.replace('/path/to/backup/file/', '')
key_path = 'physical_db_backups/' + file_name
logging.basicConfig(filename='/var/log/s3-backup.log', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filemode='a')
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def multi_part_upload():
config = TransferConfig(multipart_threshold=1024 * 25,
max_concurrency=10,
multipart_chunksize=1024 * 25,
use_threads=True)
try:
s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
Callback=ProgressPercentage(file_to_upload),
ExtraArgs={'ContentType': 'application/zstd'})
logger.info("Physical Backup to S3 Complete")
sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
except botocore.exceptions.ClientError as error:
logger.error("Physical Backup to S3 Failed: " + error)
sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)
def checkIfProcessRunning(processName):
for proc in psutil.process_iter():
cmdline = proc.cmdline()
if processName in cmdline:
return True
return False
if __name__ == '__main__':
backuprunning = True
while backuprunning:
logger.info("Checking if backup shell script is running")
if checkIfProcessRunning('/path/to/physical_backup.sh'):
logger.info("Backup shell script still running. Sleeping for 60s")
sleep(60)
else:
backuprunning = False
logger.info("Beginning multipart upload")
multi_part_upload()
我正在实施一个 cron 作业,该作业会将一个大型的每日备份文件上传到 S3 存储桶。大部分时间都可以,但每隔一段时间,我会检查一下桶,文件大小明显小于实际大小。
应该是50GB左右,上次显示是34GB。我的主要问题是我不确定 try/catch.
有什么错误我还在边学边学Python,所以请多多关照。
from progress import ProgressPercentage # class file progress.py
from slack import * # function file for Slack notifications
import random
import glob
import os
import boto3
import botocore
from boto3.s3.transfer import TransferConfig
bucket = "my-s3-backup"
s3 = boto3.resource('s3')
# Grabbing the last file, and removing the full path from the string
pattern = "/path/to/backup/file/xb_*"
files = list(filter(os.path.isfile, glob.glob(pattern)))
files.sort(key=lambda x: os.path.getmtime(x))
file_to_upload = files[-1]
file_name = file_to_upload.replace('/path/to/backup/file/', '')
key_path = 'physical_db_backups/' + file_name
# Multipart upload function
def multi_part_upload():
config = TransferConfig(multipart_threshold=1024 * 25,
max_concurrency=10,
multipart_chunksize=1024 * 25,
use_threads=True)
try:
s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
Callback=ProgressPercentage(file_to_upload))
# Custom Slack notification to inform completion
sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
except botocore.exceptions.ClientError as error:
# Custom Slack notification to inform of failure
sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)
if __name__ == '__main__':
multi_part_upload()
如果脚本没有“失败”,但它没有上传完整的文件大小,我要在此处捕获什么异常?我应该在某处记录输出吗?
我正在查看 Botocore Exceptions 文档。我只是不确定 try/catch
用这个做什么。
供参考,这是文件大小差异:
aws s3 ls --summarize --human-readable --recursive s3://my-s3-backup/physical_db_backups/
2022-05-07 14:31:28 50.7 GiB physical_db_backups/xb_202205070101.xb.zst
2022-05-08 12:48:07 50.8 GiB physical_db_backups/xb_202205080101.xb.zst
2022-05-09 01:30:04 34.2 GiB physical_db_backups/xb_202205090101.xb.zst <--- WRONG
好吧,因为我是个白痴,没有意识到文件还没有完成,所以我做了一些修改。
我编辑了 cron 以便稍后启动。
我已创建逻辑来确定备份脚本是否为 运行。
我可能会合并额外的检查以确保该文件存在,但目前这是一个已经过测试的工作 POC。
from progress import ProgressPercentage # class file progress.py from slack import * # function file for Slack notifications import random from time import sleep import psutil import glob import os import boto3 import botocore from boto3.s3.transfer import TransferConfig import logging bucket = "fsn-s3-backup" s3 = boto3.resource('s3') pattern = "/path/to/backup/file/xb_*" files = list(filter(os.path.isfile, glob.glob(pattern))) files.sort(key=lambda x: os.path.getmtime(x)) file_to_upload = files[-1] file_name = file_to_upload.replace('/path/to/backup/file/', '') key_path = 'physical_db_backups/' + file_name logging.basicConfig(filename='/var/log/s3-backup.log', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filemode='a') logger = logging.getLogger() logger.setLevel(logging.INFO) def multi_part_upload(): config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) try: s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config, Callback=ProgressPercentage(file_to_upload), ExtraArgs={'ContentType': 'application/zstd'}) logger.info("Physical Backup to S3 Complete") sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name) except botocore.exceptions.ClientError as error: logger.error("Physical Backup to S3 Failed: " + error) sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error) def checkIfProcessRunning(processName): for proc in psutil.process_iter(): cmdline = proc.cmdline() if processName in cmdline: return True return False if __name__ == '__main__': backuprunning = True while backuprunning: logger.info("Checking if backup shell script is running") if checkIfProcessRunning('/path/to/physical_backup.sh'): logger.info("Backup shell script still running. Sleeping for 60s") sleep(60) else: backuprunning = False logger.info("Beginning multipart upload") multi_part_upload()