x264编码严重质量损失
x264 encoding severe quality loss
我使用 this 存储库将 mjpeg 流编码为 h264 流,但输出效果不是很好。该流是 iPhone 的一系列屏幕截图。在输出流中,甚至连设置应用中两个项目之间的线条都消失了。如何提高输出流质量?
这是 x264-go 用来初始化编码器的代码片段
func NewEncoder(w io.Writer, opts *Options) (e *Encoder, err error) {
e = &Encoder{}
e.w = w
e.pts = 0
e.opts = opts
e.csp = x264c.CspI420
e.nals = make([]*x264c.Nal, 3)
e.img = NewYCbCr(image.Rect(0, 0, e.opts.Width, e.opts.Height))
param := x264c.Param{}
if e.opts.Preset != "" && e.opts.Profile != "" {
ret := x264c.ParamDefaultPreset(¶m, e.opts.Preset, e.opts.Tune)
if ret < 0 {
err = fmt.Errorf("x264: invalid preset/tune name")
return
}
} else {
x264c.ParamDefault(¶m)
}
param.IWidth = int32(e.opts.Width)
param.IHeight = int32(e.opts.Height)
param.ICsp = e.csp
param.BVfrInput = 0
param.BRepeatHeaders = 1
param.BAnnexb = 1
param.ILogLevel = e.opts.LogLevel
if e.opts.FrameRate > 0 {
param.IFpsNum = uint32(e.opts.FrameRate)
param.IFpsDen = 1
param.IKeyintMax = int32(e.opts.FrameRate)
param.BIntraRefresh = 1
}
if e.opts.Profile != "" {
ret := x264c.ParamApplyProfile(¶m, e.opts.Profile)
if ret < 0 {
err = fmt.Errorf("x264: invalid profile name")
return
}
}
// Allocate on create instead while encoding
var picIn x264c.Picture
ret := x264c.PictureAlloc(&picIn, e.csp, int32(e.opts.Width), int32(e.opts.Height))
if ret < 0 {
err = fmt.Errorf("x264: cannot allocate picture")
return
}
e.picIn = picIn
defer func() {
// Cleanup if intialization fail
if err != nil {
x264c.PictureClean(&picIn)
}
}()
e.e = x264c.EncoderOpen(¶m)
if e.e == nil {
err = fmt.Errorf("x264: cannot open the encoder")
return
}
ret = x264c.EncoderHeaders(e.e, e.nals, &e.nnals)
if ret < 0 {
err = fmt.Errorf("x264: cannot encode headers")
return
}
if ret > 0 {
b := C.GoBytes(e.nals[0].PPayload, C.int(ret))
n, er := e.w.Write(b)
if er != nil {
err = er
return
}
if int(ret) != n {
err = fmt.Errorf("x264: error writing headers, size=%d, n=%d", ret, n)
}
}
return
}
编码器选项定义为
opts := &x264.Options{
Width: int(width)/2*2,
Height: int(height)/2*2,
FrameRate: 15,
Tune: "zerolatency",
Preset: "medium",
Profile: "baseline",
LogLevel: x264.LogNone,
}
而且我还使用了 ffmpeg(虽然 api 已经很老了)
package screencast
import (
/*
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
typedef struct {
int w, h;
int pixfmt;
char *preset[2];
char *profile;
int bitrate;
int got;
AVCodec *c;
AVCodecContext *ctx;
AVFrame *f;
AVPacket pkt;
} h264enc_t;
static int h264enc_new(h264enc_t *m) {
m->c = avcodec_find_encoder(AV_CODEC_ID_H264);
m->ctx = avcodec_alloc_context3(m->c);
m->ctx->width = m->w;
m->ctx->height = m->h;
m->ctx->pix_fmt = m->pixfmt;
m->ctx->time_base = (AVRational){1,10};
av_opt_set(m->ctx->priv_data, "preset", "slow", 0);
av_opt_set(m->ctx->priv_data, "tune", "zerolatency", 0);
av_opt_set(m->ctx->priv_data, "profile", "baseline", 0);
av_opt_set(m->ctx->priv_data, "crf", "18.0.", 0);
m->f = av_frame_alloc();
m->f->format = m->ctx->pix_fmt;
m->f->width = m->ctx->width;
m->f->height = m->ctx->height;
avcodec_open2(m->ctx, m->c, NULL);
return av_image_alloc(m->f->data, m->f->linesize, m->ctx->width, m->ctx->height, m->ctx->pix_fmt, 32);
}
*/
"C"
"errors"
"image"
"unsafe"
//"log"
)
type H264Encoder struct {
m C.h264enc_t
Header []byte
Pixfmt image.YCbCrSubsampleRatio
W, H int
pts int
}
func NewH264Encoder(w, h int) (m *H264Encoder, err error) {
m = &H264Encoder{}
m.m.w = (C.int)(w)
m.m.h = (C.int)(h)
m.W = w
m.H = h
m.Pixfmt = image.YCbCrSubsampleRatio420
m.m.pixfmt = C.AV_PIX_FMT_YUV420P
r := C.h264enc_new(&m.m)
if int(r) < 0 {
err = errors.New("open encoder failed")
return
}
return
}
func (m *H264Encoder) Encode(img *image.YCbCr) (data []byte, err error) {
var f *C.AVFrame
if img == nil {
f = nil
} else {
if img.SubsampleRatio != m.Pixfmt {
err = errors.New("image pixfmt not match")
return
}
if img.Rect.Dx() != m.W || img.Rect.Dy() != m.H {
err = errors.New("image size not match")
return
}
f = m.m.f
f.data[0] = (*C.uint8_t)(unsafe.Pointer(&img.Y[0]))
f.data[1] = (*C.uint8_t)(unsafe.Pointer(&img.Cb[0]))
f.data[2] = (*C.uint8_t)(unsafe.Pointer(&img.Cr[0]))
f.linesize[0] = (C.int)(img.YStride)
f.linesize[1] = (C.int)(img.CStride)
f.linesize[2] = (C.int)(img.CStride)
}
C.av_init_packet(&m.m.pkt)
m.m.pkt.data = nil
m.m.pkt.size = 0
f.pts = (C.longlong)(m.pts)
m.pts++
r := C.avcodec_encode_video2(m.m.ctx, &m.m.pkt, f, &m.m.got)
defer C.av_packet_unref(&m.m.pkt)
if int(r) < 0 {
err = errors.New("encode failed")
return
}
if m.m.got == 0 {
err = errors.New("no picture")
return
}
if m.m.pkt.size == 0 {
err = errors.New("packet size == 0")
return
}
data = make([]byte, m.m.pkt.size)
C.memcpy(
unsafe.Pointer(&data[0]),
unsafe.Pointer(m.m.pkt.data),
(C.size_t)(m.m.pkt.size),
)
return data, nil
}
但得到了相同的输出。但是,当我使用 ffmpeg 二进制文件时,结果非常好,所以我猜我设置了错误的参数,但我不知道是哪个。
任何建议将不胜感激。如果您有更好的方法来实现这一点,我将不胜感激。
供您参考:我必须使用 golang 来完成此操作。
原来不是细节丢失,真正的问题是颜色丢失。输出流应该是 yuvj420p [0-255] 格式而不是 yuv420p [16-239].
我使用 this 存储库将 mjpeg 流编码为 h264 流,但输出效果不是很好。该流是 iPhone 的一系列屏幕截图。在输出流中,甚至连设置应用中两个项目之间的线条都消失了。如何提高输出流质量? 这是 x264-go 用来初始化编码器的代码片段
func NewEncoder(w io.Writer, opts *Options) (e *Encoder, err error) {
e = &Encoder{}
e.w = w
e.pts = 0
e.opts = opts
e.csp = x264c.CspI420
e.nals = make([]*x264c.Nal, 3)
e.img = NewYCbCr(image.Rect(0, 0, e.opts.Width, e.opts.Height))
param := x264c.Param{}
if e.opts.Preset != "" && e.opts.Profile != "" {
ret := x264c.ParamDefaultPreset(¶m, e.opts.Preset, e.opts.Tune)
if ret < 0 {
err = fmt.Errorf("x264: invalid preset/tune name")
return
}
} else {
x264c.ParamDefault(¶m)
}
param.IWidth = int32(e.opts.Width)
param.IHeight = int32(e.opts.Height)
param.ICsp = e.csp
param.BVfrInput = 0
param.BRepeatHeaders = 1
param.BAnnexb = 1
param.ILogLevel = e.opts.LogLevel
if e.opts.FrameRate > 0 {
param.IFpsNum = uint32(e.opts.FrameRate)
param.IFpsDen = 1
param.IKeyintMax = int32(e.opts.FrameRate)
param.BIntraRefresh = 1
}
if e.opts.Profile != "" {
ret := x264c.ParamApplyProfile(¶m, e.opts.Profile)
if ret < 0 {
err = fmt.Errorf("x264: invalid profile name")
return
}
}
// Allocate on create instead while encoding
var picIn x264c.Picture
ret := x264c.PictureAlloc(&picIn, e.csp, int32(e.opts.Width), int32(e.opts.Height))
if ret < 0 {
err = fmt.Errorf("x264: cannot allocate picture")
return
}
e.picIn = picIn
defer func() {
// Cleanup if intialization fail
if err != nil {
x264c.PictureClean(&picIn)
}
}()
e.e = x264c.EncoderOpen(¶m)
if e.e == nil {
err = fmt.Errorf("x264: cannot open the encoder")
return
}
ret = x264c.EncoderHeaders(e.e, e.nals, &e.nnals)
if ret < 0 {
err = fmt.Errorf("x264: cannot encode headers")
return
}
if ret > 0 {
b := C.GoBytes(e.nals[0].PPayload, C.int(ret))
n, er := e.w.Write(b)
if er != nil {
err = er
return
}
if int(ret) != n {
err = fmt.Errorf("x264: error writing headers, size=%d, n=%d", ret, n)
}
}
return
}
编码器选项定义为
opts := &x264.Options{
Width: int(width)/2*2,
Height: int(height)/2*2,
FrameRate: 15,
Tune: "zerolatency",
Preset: "medium",
Profile: "baseline",
LogLevel: x264.LogNone,
}
而且我还使用了 ffmpeg(虽然 api 已经很老了)
package screencast
import (
/*
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
typedef struct {
int w, h;
int pixfmt;
char *preset[2];
char *profile;
int bitrate;
int got;
AVCodec *c;
AVCodecContext *ctx;
AVFrame *f;
AVPacket pkt;
} h264enc_t;
static int h264enc_new(h264enc_t *m) {
m->c = avcodec_find_encoder(AV_CODEC_ID_H264);
m->ctx = avcodec_alloc_context3(m->c);
m->ctx->width = m->w;
m->ctx->height = m->h;
m->ctx->pix_fmt = m->pixfmt;
m->ctx->time_base = (AVRational){1,10};
av_opt_set(m->ctx->priv_data, "preset", "slow", 0);
av_opt_set(m->ctx->priv_data, "tune", "zerolatency", 0);
av_opt_set(m->ctx->priv_data, "profile", "baseline", 0);
av_opt_set(m->ctx->priv_data, "crf", "18.0.", 0);
m->f = av_frame_alloc();
m->f->format = m->ctx->pix_fmt;
m->f->width = m->ctx->width;
m->f->height = m->ctx->height;
avcodec_open2(m->ctx, m->c, NULL);
return av_image_alloc(m->f->data, m->f->linesize, m->ctx->width, m->ctx->height, m->ctx->pix_fmt, 32);
}
*/
"C"
"errors"
"image"
"unsafe"
//"log"
)
type H264Encoder struct {
m C.h264enc_t
Header []byte
Pixfmt image.YCbCrSubsampleRatio
W, H int
pts int
}
func NewH264Encoder(w, h int) (m *H264Encoder, err error) {
m = &H264Encoder{}
m.m.w = (C.int)(w)
m.m.h = (C.int)(h)
m.W = w
m.H = h
m.Pixfmt = image.YCbCrSubsampleRatio420
m.m.pixfmt = C.AV_PIX_FMT_YUV420P
r := C.h264enc_new(&m.m)
if int(r) < 0 {
err = errors.New("open encoder failed")
return
}
return
}
func (m *H264Encoder) Encode(img *image.YCbCr) (data []byte, err error) {
var f *C.AVFrame
if img == nil {
f = nil
} else {
if img.SubsampleRatio != m.Pixfmt {
err = errors.New("image pixfmt not match")
return
}
if img.Rect.Dx() != m.W || img.Rect.Dy() != m.H {
err = errors.New("image size not match")
return
}
f = m.m.f
f.data[0] = (*C.uint8_t)(unsafe.Pointer(&img.Y[0]))
f.data[1] = (*C.uint8_t)(unsafe.Pointer(&img.Cb[0]))
f.data[2] = (*C.uint8_t)(unsafe.Pointer(&img.Cr[0]))
f.linesize[0] = (C.int)(img.YStride)
f.linesize[1] = (C.int)(img.CStride)
f.linesize[2] = (C.int)(img.CStride)
}
C.av_init_packet(&m.m.pkt)
m.m.pkt.data = nil
m.m.pkt.size = 0
f.pts = (C.longlong)(m.pts)
m.pts++
r := C.avcodec_encode_video2(m.m.ctx, &m.m.pkt, f, &m.m.got)
defer C.av_packet_unref(&m.m.pkt)
if int(r) < 0 {
err = errors.New("encode failed")
return
}
if m.m.got == 0 {
err = errors.New("no picture")
return
}
if m.m.pkt.size == 0 {
err = errors.New("packet size == 0")
return
}
data = make([]byte, m.m.pkt.size)
C.memcpy(
unsafe.Pointer(&data[0]),
unsafe.Pointer(m.m.pkt.data),
(C.size_t)(m.m.pkt.size),
)
return data, nil
}
但得到了相同的输出。但是,当我使用 ffmpeg 二进制文件时,结果非常好,所以我猜我设置了错误的参数,但我不知道是哪个。 任何建议将不胜感激。如果您有更好的方法来实现这一点,我将不胜感激。
供您参考:我必须使用 golang 来完成此操作。
原来不是细节丢失,真正的问题是颜色丢失。输出流应该是 yuvj420p [0-255] 格式而不是 yuv420p [16-239].