Lambda 在完成后自动删除转录作业
Lambda automatically deletes transcribe job upon completion
我正在编辑我的 lambda,以便在作业状态为 "Complete" 时删除转录作业。我有以下代码:
import json
import time
import boto3
from urllib.request import urlopen
def lambda_handler(event, context):
transcribe = boto3.client("transcribe")
s3 = boto3.client("s3")
if event:
file_obj = event["Records"][0]
bucket_name = str(file_obj["s3"]["bucket"]["name"])
file_name = str(file_obj["s3"]["object"]["key"])
s3_uri = create_uri(bucket_name, file_name)
file_type = file_name.split("2019.")[1]
job_name = file_name
transcribe.start_transcription_job(TranscriptionJobName=job_name,
Media ={"MediaFileUri": s3_uri},
MediaFormat = file_type,
LanguageCode = "en-US",
Settings={
"VocabularyName": "Custom_Vocabulary_by_Brand_Other_Brands",
"ShowSpeakerLabels": True,
"MaxSpeakerLabels": 4
})
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["FAILED"]:
break
print("It's in progress")
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED"]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name
)
time.sleep(5)
load_url = urlopen(status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"])
load_json = json.dumps(json.load(load_url))
s3.put_object(Bucket = bucket_name, Key = "transcribeFile/{}.json".format(job_name), Body=load_json)
# TODO implement
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
def create_uri(bucket_name, file_name):
return "s3://"+bucket_name+"/"+file_name
处理该工作的部门是:
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["FAILED"]:
break
print("It's in progress")
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED"]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name
)
如果作业正在进行中,它会显示 "It's in progress",但当它显示为 "Completed" 时,它会删除。
知道为什么我的当前代码无法运行吗?它完成转录作业但不会删除它。
如果可以避免,则不应轮询信息,尤其是在 Lambda 中。
响应转录作业状态变化的正确方法是use CloudWatch Events。例如,您可以配置一个规则,以便在转录作业成功完成时将事件路由到 AWS Lambda 函数。
当您的 Lambda 函数因转录作业中的状态更改而被调用时,Lambda 函数将接收 event
数据,例如:
{
"version": "0",
"id": "1a234567-1a6d-3ab4-1234-abf8b19be1234",
"detail-type": "Transcribe Job State Change",
"source": "aws.transcribe",
"account": "123456789012",
"time": "2019-11-19T10:00:05Z",
"region": "us-east-1",
"resources": [],
"detail": {
"TranscriptionJobName": "my-transcribe-test",
"TranscriptionJobStatus": "COMPLETED"
}
}
使用 TranscriptionJobName
将状态更改关联回原始作业。
抱歉各位,我又看了一眼,犯了一个非常非常愚蠢的错误。我在完全错误的部分有 transcribe.delete_transcription_job(TranscriptionJobName=job_name
。
请在下面找到正确且有效的代码:
import json
import time
import boto3
from urllib.request import urlopen
def lambda_handler(event, context):
transcribe = boto3.client("transcribe")
s3 = boto3.client("s3")
if event:
file_obj = event["Records"][0]
bucket_name = str(file_obj["s3"]["bucket"]["name"])
file_name = str(file_obj["s3"]["object"]["key"])
s3_uri = create_uri(bucket_name, file_name)
file_type = file_name.split("2019.")[1]
job_name = file_name
transcribe.start_transcription_job(TranscriptionJobName=job_name,
Media ={"MediaFileUri": s3_uri},
MediaFormat = file_type,
LanguageCode = "en-US",
Settings={
"VocabularyName": "Custom_Vocabulary_by_Brand_Other_Brands",
"ShowSpeakerLabels": True,
"MaxSpeakerLabels": 4
})
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED", "FAILED"]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name)
break
print("It's in progress")
time.sleep(5)
load_url = urlopen(status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"])
load_json = json.dumps(json.load(load_url))
s3.put_object(Bucket = bucket_name, Key = "transcribeFile/{}.json".format(job_name), Body=load_json)
# TODO implement
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
def create_uri(bucket_name, file_name):
return "s3://"+bucket_name+"/"+file_name
我正在编辑我的 lambda,以便在作业状态为 "Complete" 时删除转录作业。我有以下代码:
import json
import time
import boto3
from urllib.request import urlopen
def lambda_handler(event, context):
transcribe = boto3.client("transcribe")
s3 = boto3.client("s3")
if event:
file_obj = event["Records"][0]
bucket_name = str(file_obj["s3"]["bucket"]["name"])
file_name = str(file_obj["s3"]["object"]["key"])
s3_uri = create_uri(bucket_name, file_name)
file_type = file_name.split("2019.")[1]
job_name = file_name
transcribe.start_transcription_job(TranscriptionJobName=job_name,
Media ={"MediaFileUri": s3_uri},
MediaFormat = file_type,
LanguageCode = "en-US",
Settings={
"VocabularyName": "Custom_Vocabulary_by_Brand_Other_Brands",
"ShowSpeakerLabels": True,
"MaxSpeakerLabels": 4
})
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["FAILED"]:
break
print("It's in progress")
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED"]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name
)
time.sleep(5)
load_url = urlopen(status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"])
load_json = json.dumps(json.load(load_url))
s3.put_object(Bucket = bucket_name, Key = "transcribeFile/{}.json".format(job_name), Body=load_json)
# TODO implement
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
def create_uri(bucket_name, file_name):
return "s3://"+bucket_name+"/"+file_name
处理该工作的部门是:
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["FAILED"]:
break
print("It's in progress")
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED"]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name
)
如果作业正在进行中,它会显示 "It's in progress",但当它显示为 "Completed" 时,它会删除。
知道为什么我的当前代码无法运行吗?它完成转录作业但不会删除它。
如果可以避免,则不应轮询信息,尤其是在 Lambda 中。
响应转录作业状态变化的正确方法是use CloudWatch Events。例如,您可以配置一个规则,以便在转录作业成功完成时将事件路由到 AWS Lambda 函数。
当您的 Lambda 函数因转录作业中的状态更改而被调用时,Lambda 函数将接收 event
数据,例如:
{
"version": "0",
"id": "1a234567-1a6d-3ab4-1234-abf8b19be1234",
"detail-type": "Transcribe Job State Change",
"source": "aws.transcribe",
"account": "123456789012",
"time": "2019-11-19T10:00:05Z",
"region": "us-east-1",
"resources": [],
"detail": {
"TranscriptionJobName": "my-transcribe-test",
"TranscriptionJobStatus": "COMPLETED"
}
}
使用 TranscriptionJobName
将状态更改关联回原始作业。
抱歉各位,我又看了一眼,犯了一个非常非常愚蠢的错误。我在完全错误的部分有 transcribe.delete_transcription_job(TranscriptionJobName=job_name
。
请在下面找到正确且有效的代码:
import json
import time
import boto3
from urllib.request import urlopen
def lambda_handler(event, context):
transcribe = boto3.client("transcribe")
s3 = boto3.client("s3")
if event:
file_obj = event["Records"][0]
bucket_name = str(file_obj["s3"]["bucket"]["name"])
file_name = str(file_obj["s3"]["object"]["key"])
s3_uri = create_uri(bucket_name, file_name)
file_type = file_name.split("2019.")[1]
job_name = file_name
transcribe.start_transcription_job(TranscriptionJobName=job_name,
Media ={"MediaFileUri": s3_uri},
MediaFormat = file_type,
LanguageCode = "en-US",
Settings={
"VocabularyName": "Custom_Vocabulary_by_Brand_Other_Brands",
"ShowSpeakerLabels": True,
"MaxSpeakerLabels": 4
})
while True:
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED", "FAILED"]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name)
break
print("It's in progress")
time.sleep(5)
load_url = urlopen(status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"])
load_json = json.dumps(json.load(load_url))
s3.put_object(Bucket = bucket_name, Key = "transcribeFile/{}.json".format(job_name), Body=load_json)
# TODO implement
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
def create_uri(bucket_name, file_name):
return "s3://"+bucket_name+"/"+file_name