Python GStreamer:为 appsink 缓冲区获取 Meta Api

Python GStreamer: getting Meta Api for appsink buffer

我正在使用 GStreamer 从 USB 网络摄像头 (Logitech C920) 捕获 H264 视频,我想在可能解码或流式传输到网络之前分析 h264 帧。

基于互联网上的不同来源,我构建了一个 python2.7 脚本,它允许我将帧放入 python,基本上使用原理图 gst-launch 命令:

gst-launch-1.0 v4l2src ! video/x-h264 ! h264parse ! appsink

但是,我一直试图解释接收到的缓冲区。我已经花了相当多的时间试图了解 python gstreamer 如何将元数据 api 附加到缓冲区,但现在是徒劳的。如果我理解正确,如果我以某种方式将元 api 附加到缓冲区,我将获得一个结构,该结构将允许我访问不同的元素以及有关帧编码的信息。 我怎样才能做到这一点? (没有为帧编写我自己的解码器)

下面是我当前的脚本,带有一些示例输出:

from __future__ import absolute_import, division, print_function
import sys, os, pdb
from datetime import datetime
import gi

gi.require_version("Gst","1.0")
from gi.repository import Gst
Gst.init(None)

def appsink_new_buffer(sink, data):
    sample = sink.emit("pull-sample")
    buf = sample.get_buffer()
    caps = sample.get_caps()

    print("\nGot new buffer: {}  Sample Info: {}\n".format(datetime.now(),sample.get_info()))
    print("Buffer size: {} ".format(buf.get_size()))
    print("Buffer n_memory: {} Presentation TS (PTS): {:.3f} s Decoding DTS: {:.3f} s Duration: {:.1f} ms".format(
        buf.n_memory(), buf.pts/1e9, buf.dts/1e9, buf.duration/1e6))
    st = caps.get_structure(0)
    field_names = [st.nth_field_name(i) for i in range(st.n_fields())]
    print("Caps {} n_fields: {} name: {}, format: {}, height: {}, width: {}".format(
        i,st.n_fields(),st.get_name(), st.get_value("format"), st.get_value("height"), st.get_value("width")))
    print("     all fields: {}".format(" ".join(field_names)))
    for fname in field_names:
        if fname not in ['pixel-aspect-ratio','framerate']: # cause error because Gst.FractionType not known
            print("  {:20}: ".format(fname), st.get_value(fname))

    #
    # somehow, here one nees to get the Meta API to understand the buffer content and to do further processing
    # of the encoded h264 frames.
    # Q: does one buffer after the h264parse represent exactly one frame?
    #

    return Gst.FlowReturn.OK

def appsink_webcam_h264():
    # adapted from https://gist.github.com/willpatera/7984486

    source = Gst.ElementFactory.make("v4l2src", "source")
    source.set_property("device", "/dev/video2")
    caps = Gst.caps_from_string("video/x-h264, width=640,height=480,framerate=10/1")
    capsfilter = Gst.ElementFactory.make("capsfilter", None)
    capsfilter.set_property("caps", caps)
    parse = Gst.ElementFactory.make("h264parse","h264parse")
    sink = Gst.ElementFactory.make("appsink", "sink")
    pipeline_elements = [source, capsfilter, parse, sink]

    sink.set_property("max-buffers",20) # prevent the app to consume huge part of memory
    sink.set_property('emit-signals',True) #tell sink to emit signals
    sink.set_property('sync',False) #no sync to make decoding as fast as possible
    sink.connect("new-sample", appsink_new_buffer, sink)

    # Create an empty pipeline & add/link elements
    pipeline = Gst.Pipeline.new("test-pipeline")
    for elem in pipeline_elements:
        pipeline.add(elem)
    for i in range(len(pipeline_elements[:-1])):
        if not Gst.Element.link(pipeline_elements[i], pipeline_elements[i+1]):
            raise Exception("Elements {} and {} could not be linked.".format(
                pipeline_elements[i], pipeline_elements[i+1]))

    ret = pipeline.set_state(Gst.State.PLAYING)

    # Wait until error or EOS
    bus = pipeline.get_bus()

    # Parse message
    while True:
        message = bus.timed_pop_filtered(10000, Gst.MessageType.ANY)
        if message:
            if message.type == Gst.MessageType.ERROR:
                err, debug = message.parse_error()
                print("Error received from element %s: %s" % (
                    message.src.get_name(), err))
                print("Debugging information: %s" % debug)
                break
            elif message.type == Gst.MessageType.EOS:
                print("End-Of-Stream reached.")
                break
            elif message.type == Gst.MessageType.STATE_CHANGED:
                if isinstance(message.src, Gst.Pipeline):
                    old_state, new_state, pending_state = message.parse_state_changed()
                    print("Pipeline state changed from %s to %s." %
                           (old_state.value_nick, new_state.value_nick))
            else:
                print("Unexpected message received: ", message, message.type)
    pipeline.set_state(Gst.State.NULL)



if __name__ == '__main__':
    appsink_webcam_h264()

以下是此脚本的一些示例输出:

...
Got new buffer: 2016-01-09 01:41:52.091462  Sample Info: None
Buffer size: 9409
Buffer n_memory: 1 Presentation TS (PTS): 0.390 s Decoding DTS: 0.000 s Duration: 100.0 ms
Caps 8 n_fields: 9 name: video/x-h264, format: None, height: 480, width: 640
     all fields: stream-format alignment width height pixel-aspect-ratio framerate parsed level profile
  stream-format       :  byte-stream
  alignment           :  au
  width               :  640
  height              :  480
  parsed              :  True
  level               :  4
  profile             :  constrained-baseline

Got new buffer: 2016-01-09 01:41:52.184990  Sample Info: None
Buffer size: 868
Buffer n_memory: 1 Presentation TS (PTS): 0.590 s Decoding DTS: 0.100 s Duration: 100.0 ms
Caps 8 n_fields: 9 name: video/x-h264, format: None, height: 480, width: 640
     all fields: stream-format alignment width height pixel-aspect-ratio framerate parsed level profile
  stream-format       :  byte-stream
  alignment           :  au
  width               :  640
  height              :  480
  parsed              :  True
  level               :  4
  profile             :  constrained-baseline

Got new buffer: 2016-01-09 01:41:52.285425  Sample Info: None
Buffer size: 3202
... 

我搜索了很多但找不到一个例子如何将元 api 映射到包含 python 中编码视频帧的缓冲区,我认为这不应该是这么难,因为似乎提供了功能。

有什么建议吗?

你能确认你不是在尝试访问原始 h264 数据吗?那将只是在缓冲区对象中。为了对其进行进一步的分析,比如是否有 I 帧或 P 帧或 SEI 信息,您需要使用 gsth264parser.c 之类的东西来解析原始 h264 数据。

为了获得元数据,您必须知道您正在寻找的元数据的类型。例如GstMetaXImage。我不知道 GStreamer 中有任何 h264 元数据类型。