Google 云视频智能标注视频 JSON vs 示例代码
Google Cloud Video Intelligence Annotate Video JSON vs example code
Google Cloud Video Intelligence 提供了以下代码来解析带有对象跟踪的标注结果:
features = [videointelligence.Feature.OBJECT_TRACKING]
context = videointelligence.VideoContext(segments=None)
request = videointelligence.AnnotateVideoRequest(input_uri=gs_video_path, features=features, video_context=context, output_uri=output_uri)
operation = video_client.annotate_video(request)
result = operation.result(timeout=3600)
object_annotations = result.annotation_results[0].object_annotations
for object_annotation in object_annotations:
print('Entity description: {}'.format(object_annotation.entity.description))
print('Segment: {}s to {}s'.format(
object_annotation.segment.start_time_offset.total_seconds(),
object_annotation.segment.end_time_offset.total_seconds()))
print('Confidence: {}'.format(object_annotation.confidence))
# Here we print only the bounding box of the first frame_annotation in the segment
frame_annotation = object_annotation.frames[0]
box = frame_annotation.normalized_bounding_box
timestamp = frame_annotation.time_offset.total_seconds()
timestamp_end = object_annotation.segment.end_time_offset.total_seconds()
print('Time offset of the first frame_annotation: {}s'.format(timestamp))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
但是,我想解析通过 output_uri 生成的 json 文件。 json 文件的格式如下:
{
"annotation_results": [ {
"input_uri": "/production.supereye.co.uk/video/54V5x8q0CRU/videofile.mp4",
"segment": {
"start_time_offset": {
},
"end_time_offset": {
"seconds": 22,
"nanos": 966666000
}
},
"object_annotations": [ {
"entity": {
"entity_id": "/m/01yrx",
"description": "cat",
"language_code": "en-US"
},
"confidence": 0.91939145,
"frames": [ {
"normalized_bounding_box": {
"left": 0.17845993,
"top": 0.44048917,
"right": 0.5315634,
"bottom": 0.7752136
},
"time_offset": {
}
}, {
如何使用示例代码来解析随 output_uri 一起提供的 JSON?这需要什么样的转换?
使用来自 output_uri
的文件,您可以使用此代码解析 json。我在本地将文件保存为 response.json 并将使用它进行解析。
这与您上面的代码相似,它在第一个 frame_annotation
处解析数据。但是此代码缺少时间偏移量的转换,因为用于转换的函数来自时间对象。
我评论了 start_end_offset
和 end_time_offset
,因为它有 2 个键,seconds
和 nano
。这取决于你想使用哪一个,只需取消注释行并相应地调整。
import json
f = open('response.json', "r")
data = json.loads(f.read())
for results in data["annotation_results"]:
for obj_ann in results["object_annotations"]:
#start_time_offset = obj_ann["segment"]["start_time_offset"]["seconds"]
#end_time_offset = obj_ann["segment"]["end_time_offset"]["seconds"]
frame_annotation = obj_ann["frames"][0]
entity = obj_ann["entity"]["description"]
confidence = obj_ann["confidence"]
box = frame_annotation["normalized_bounding_box"]
time_offset = frame_annotation["time_offset"] #apparently this also has 2 keys. Look out for the other key which is `seconds`
print('Entity description: {}'.format(entity))
#print('Segment: {}s to {}s'.format(start_time_offset,end_time_offset))
print('Confidence: {}'.format(confidence))
#You can modify the code here if you encounter the `second` key
if 'nanos' not in time_offset:
print('No time offset in frame')
print('Bounding box position:')
print('\tleft : {}'.format(str(box["left"])))
print('\tleft : {}'.format(str(box["top"])))
print('\tleft : {}'.format(str(box["right"])))
print('\tleft : {}'.format(str(box["bottom"])))
else:
print('Time offset of the first frame_annotation: {}'.format(time_offset["nanos"]))
print('Bounding box position:')
print('\tleft : {}'.format(str(box["left"])))
print('\tleft : {}'.format(str(box["top"])))
print('\tleft : {}'.format(str(box["right"])))
print('\tleft : {}'.format(str(box["bottom"])))
为了测试,我使用了 gs://cloud-samples-data/video/cat.mp4 并使用了它的响应:
使用 dotmap 包并为时间戳实现一个简单的 total_seconds 函数,事情与原始示例代码非常接近:
import json
import os
from dotmap import DotMap
def total_seconds(time_offset):
if type(time_offset.seconds) is DotMap:
seconds = 0
else:
seconds = time_offset.seconds
if type(time_offset.nanos) is DotMap:
nanos = 0
else:
nanos = time_offset.nanos
return seconds/1.0 + (float)(nanos/1e9)
f = open("./visual.json")
jsonDict = json.load(f)
result = DotMap(jsonDict)
print(result)
object_annotations = result.annotation_results[0].object_annotations
for object_annotation in object_annotations:
print('Entity description: {}'.format(object_annotation.entity.description))
frame_annotation = object_annotation.frames[0]
box = frame_annotation.normalized_bounding_box
timestamp = total_seconds(frame_annotation.time_offset)
timestamp_end = total_seconds(object_annotation.segment.end_time_offset)
print("Timestamps : {0} - {1}".format(timestamp, timestamp_end))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
Google Cloud Video Intelligence 提供了以下代码来解析带有对象跟踪的标注结果:
features = [videointelligence.Feature.OBJECT_TRACKING]
context = videointelligence.VideoContext(segments=None)
request = videointelligence.AnnotateVideoRequest(input_uri=gs_video_path, features=features, video_context=context, output_uri=output_uri)
operation = video_client.annotate_video(request)
result = operation.result(timeout=3600)
object_annotations = result.annotation_results[0].object_annotations
for object_annotation in object_annotations:
print('Entity description: {}'.format(object_annotation.entity.description))
print('Segment: {}s to {}s'.format(
object_annotation.segment.start_time_offset.total_seconds(),
object_annotation.segment.end_time_offset.total_seconds()))
print('Confidence: {}'.format(object_annotation.confidence))
# Here we print only the bounding box of the first frame_annotation in the segment
frame_annotation = object_annotation.frames[0]
box = frame_annotation.normalized_bounding_box
timestamp = frame_annotation.time_offset.total_seconds()
timestamp_end = object_annotation.segment.end_time_offset.total_seconds()
print('Time offset of the first frame_annotation: {}s'.format(timestamp))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')
但是,我想解析通过 output_uri 生成的 json 文件。 json 文件的格式如下:
{
"annotation_results": [ {
"input_uri": "/production.supereye.co.uk/video/54V5x8q0CRU/videofile.mp4",
"segment": {
"start_time_offset": {
},
"end_time_offset": {
"seconds": 22,
"nanos": 966666000
}
},
"object_annotations": [ {
"entity": {
"entity_id": "/m/01yrx",
"description": "cat",
"language_code": "en-US"
},
"confidence": 0.91939145,
"frames": [ {
"normalized_bounding_box": {
"left": 0.17845993,
"top": 0.44048917,
"right": 0.5315634,
"bottom": 0.7752136
},
"time_offset": {
}
}, {
如何使用示例代码来解析随 output_uri 一起提供的 JSON?这需要什么样的转换?
使用来自 output_uri
的文件,您可以使用此代码解析 json。我在本地将文件保存为 response.json 并将使用它进行解析。
这与您上面的代码相似,它在第一个 frame_annotation
处解析数据。但是此代码缺少时间偏移量的转换,因为用于转换的函数来自时间对象。
我评论了 start_end_offset
和 end_time_offset
,因为它有 2 个键,seconds
和 nano
。这取决于你想使用哪一个,只需取消注释行并相应地调整。
import json
f = open('response.json', "r")
data = json.loads(f.read())
for results in data["annotation_results"]:
for obj_ann in results["object_annotations"]:
#start_time_offset = obj_ann["segment"]["start_time_offset"]["seconds"]
#end_time_offset = obj_ann["segment"]["end_time_offset"]["seconds"]
frame_annotation = obj_ann["frames"][0]
entity = obj_ann["entity"]["description"]
confidence = obj_ann["confidence"]
box = frame_annotation["normalized_bounding_box"]
time_offset = frame_annotation["time_offset"] #apparently this also has 2 keys. Look out for the other key which is `seconds`
print('Entity description: {}'.format(entity))
#print('Segment: {}s to {}s'.format(start_time_offset,end_time_offset))
print('Confidence: {}'.format(confidence))
#You can modify the code here if you encounter the `second` key
if 'nanos' not in time_offset:
print('No time offset in frame')
print('Bounding box position:')
print('\tleft : {}'.format(str(box["left"])))
print('\tleft : {}'.format(str(box["top"])))
print('\tleft : {}'.format(str(box["right"])))
print('\tleft : {}'.format(str(box["bottom"])))
else:
print('Time offset of the first frame_annotation: {}'.format(time_offset["nanos"]))
print('Bounding box position:')
print('\tleft : {}'.format(str(box["left"])))
print('\tleft : {}'.format(str(box["top"])))
print('\tleft : {}'.format(str(box["right"])))
print('\tleft : {}'.format(str(box["bottom"])))
为了测试,我使用了 gs://cloud-samples-data/video/cat.mp4 并使用了它的响应:
使用 dotmap 包并为时间戳实现一个简单的 total_seconds 函数,事情与原始示例代码非常接近:
import json
import os
from dotmap import DotMap
def total_seconds(time_offset):
if type(time_offset.seconds) is DotMap:
seconds = 0
else:
seconds = time_offset.seconds
if type(time_offset.nanos) is DotMap:
nanos = 0
else:
nanos = time_offset.nanos
return seconds/1.0 + (float)(nanos/1e9)
f = open("./visual.json")
jsonDict = json.load(f)
result = DotMap(jsonDict)
print(result)
object_annotations = result.annotation_results[0].object_annotations
for object_annotation in object_annotations:
print('Entity description: {}'.format(object_annotation.entity.description))
frame_annotation = object_annotation.frames[0]
box = frame_annotation.normalized_bounding_box
timestamp = total_seconds(frame_annotation.time_offset)
timestamp_end = total_seconds(object_annotation.segment.end_time_offset)
print("Timestamps : {0} - {1}".format(timestamp, timestamp_end))
print('Bounding box position:')
print('\tleft : {}'.format(box.left))
print('\ttop : {}'.format(box.top))
print('\tright : {}'.format(box.right))
print('\tbottom: {}'.format(box.bottom))
print('\n')