camera2 拍摄的照片 - 从 YUV_420_888 到 NV21 的转换
camera2 captured picture - conversion from YUV_420_888 to NV21
通过 camera2 API 我们正在接收格式为 YUV_420_888 的图像对象。然后我们使用以下函数转换为 NV21:
private static byte[] YUV_420_888toNV21(Image image) {
byte[] nv21;
ByteBuffer yBuffer = image.getPlanes()[0].getBuffer();
ByteBuffer uBuffer = image.getPlanes()[1].getBuffer();
ByteBuffer vBuffer = image.getPlanes()[2].getBuffer();
int ySize = yBuffer.remaining();
int uSize = uBuffer.remaining();
int vSize = vBuffer.remaining();
nv21 = new byte[ySize + uSize + vSize];
//U and V are swapped
yBuffer.get(nv21, 0, ySize);
vBuffer.get(nv21, ySize, vSize);
uBuffer.get(nv21, ySize + vSize, uSize);
return nv21;
}
虽然此函数在 cameraCaptureSessions.setRepeatingRequest
上运行良好,但在调用 cameraCaptureSessions.capture
时,我们会在进一步处理(在 JNI 端)中遇到分段错误。两者都通过 ImageReader 请求 YUV_420_888 格式。
为什么请求的类型相同,但两次函数调用的结果不同?
更新: 如评论中所述,由于图像尺寸不同(捕获请求的尺寸要大得多),我会出现这种行为。但是我们在 JNI 端的进一步处理操作对于两个请求都是相同的,并且不依赖于图像尺寸(仅依赖于纵横比,这在两种情况下都是相同的)。
如果根本没有填充,您的代码只会return更正NV21,并且U和V 平原重叠,实际上代表交错的 VU 值。这种情况在预览时经常发生,但在这种情况下,您会为数组分配额外的 w*h/4
字节(这可能不是问题)。也许对于捕获的图像,您需要更强大的实现,例如
private static byte[] YUV_420_888toNV21(Image image) {
int width = image.getWidth();
int height = image.getHeight();
int ySize = width*height;
int uvSize = width*height/4;
byte[] nv21 = new byte[ySize + uvSize*2];
ByteBuffer yBuffer = image.getPlanes()[0].getBuffer(); // Y
ByteBuffer uBuffer = image.getPlanes()[1].getBuffer(); // U
ByteBuffer vBuffer = image.getPlanes()[2].getBuffer(); // V
int rowStride = image.getPlanes()[0].getRowStride();
assert(image.getPlanes()[0].getPixelStride() == 1);
int pos = 0;
if (rowStride == width) { // likely
yBuffer.get(nv21, 0, ySize);
pos += ySize;
}
else {
long yBufferPos = -rowStride; // not an actual position
for (; pos<ySize; pos+=width) {
yBufferPos += rowStride;
yBuffer.position(yBufferPos);
yBuffer.get(nv21, pos, width);
}
}
rowStride = image.getPlanes()[2].getRowStride();
int pixelStride = image.getPlanes()[2].getPixelStride();
assert(rowStride == image.getPlanes()[1].getRowStride());
assert(pixelStride == image.getPlanes()[1].getPixelStride());
if (pixelStride == 2 && rowStride == width && uBuffer.get(0) == vBuffer.get(1)) {
// maybe V an U planes overlap as per NV21, which means vBuffer[1] is alias of uBuffer[0]
byte savePixel = vBuffer.get(1);
try {
vBuffer.put(1, (byte)~savePixel);
if (uBuffer.get(0) == (byte)~savePixel) {
vBuffer.put(1, savePixel);
vBuffer.position(0);
uBuffer.position(0);
vBuffer.get(nv21, ySize, 1);
uBuffer.get(nv21, ySize + 1, uBuffer.remaining());
return nv21; // shortcut
}
}
catch (ReadOnlyBufferException ex) {
// unfortunately, we cannot check if vBuffer and uBuffer overlap
}
// unfortunately, the check failed. We must save U and V pixel by pixel
vBuffer.put(1, savePixel);
}
// other optimizations could check if (pixelStride == 1) or (pixelStride == 2),
// but performance gain would be less significant
for (int row=0; row<height/2; row++) {
for (int col=0; col<width/2; col++) {
int vuPos = col*pixelStride + row*rowStride;
nv21[pos++] = vBuffer.get(vuPos);
nv21[pos++] = uBuffer.get(vuPos);
}
}
return nv21;
}
如果您打算将生成的数组传递给 C++,则可以利用 fact
the buffer returned will always have isDirect return true, so the underlying data could be mapped as a pointer in JNI without doing any copies with GetDirectBufferAddress.
这意味着可以在 C++ 中以最小的开销完成相同的转换。在C++中,你甚至会发现实际的像素排列已经是NV21了!
PS 实际上,这可以在 Java 中完成,开销可以忽略不计,请参见上面的行 if (pixelStride == 2 && …
。因此,我们可以将所有色度字节批量复制到生成的字节数组,这比 运行 循环快得多,但仍然比 C++ 中这种情况下可以实现的速度慢。有关完整实施,请参阅 Image.toByteArray()。
public static byte[] YUV420toNV21(Image image) {
Rect crop = image.getCropRect();
int format = image.getFormat();
int width = crop.width();
int height = crop.height();
Image.Plane[] planes = image.getPlanes();
byte[] data = new byte[width * height * ImageFormat.getBitsPerPixel(format) / 8];
byte[] rowData = new byte[planes[0].getRowStride()];
int channelOffset = 0;
int outputStride = 1;
for (int i = 0; i < planes.length; i++) {
switch (i) {
case 0:
channelOffset = 0;
outputStride = 1;
break;
case 1:
channelOffset = width * height + 1;
outputStride = 2;
break;
case 2:
channelOffset = width * height;
outputStride = 2;
break;
}
ByteBuffer buffer = planes[i].getBuffer();
int rowStride = planes[i].getRowStride();
int pixelStride = planes[i].getPixelStride();
int shift = (i == 0) ? 0 : 1;
int w = width >> shift;
int h = height >> shift;
buffer.position(rowStride * (crop.top >> shift) + pixelStride * (crop.left >> shift));
for (int row = 0; row < h; row++) {
int length;
if (pixelStride == 1 && outputStride == 1) {
length = w;
buffer.get(data, channelOffset, length);
channelOffset += length;
} else {
length = (w - 1) * pixelStride + 1;
buffer.get(rowData, 0, length);
for (int col = 0; col < w; col++) {
data[channelOffset] = rowData[col * pixelStride];
channelOffset += outputStride;
}
}
if (row < h - 1) {
buffer.position(buffer.position() + rowStride - length);
}
}
}
return data;
}
基于,我在JNI部分实现了它,试图从字节访问和性能优势中获益。我把它留在这里,也许它和@Alex 的回答对我一样有用。它几乎是相同的算法,在 C 中;基于 YUV_420_888 格式的图像:
uchar* yuvToNV21(jbyteArray yBuf, jbyteArray uBuf, jbyteArray vBuf, jbyte *fullArrayNV21,
int width, int height, int yRowStride, int yPixelStride, int uRowStride,
int uPixelStride, int vRowStride, int vPixelStride, JNIEnv *env) {
/* Check that our frame has right format, as specified at android docs for
* YUV_420_888 (https://developer.android.com/reference/android/graphics/ImageFormat?authuser=2#YUV_420_888):
* - Plane Y not overlaped with UV, and always with pixelStride = 1
* - Planes U and V have the same rowStride and pixelStride (overlaped or not)
*/
if(yPixelStride != 1 || uPixelStride != vPixelStride || uRowStride != vRowStride) {
jclass Exception = env->FindClass("java/lang/Exception");
env->ThrowNew(Exception, "Invalid YUV_420_888 byte structure. Not agree with https://developer.android.com/reference/android/graphics/ImageFormat?authuser=2#YUV_420_888");
}
int ySize = width*height;
int uSize = env->GetArrayLength(uBuf);
int vSize = env->GetArrayLength(vBuf);
int newArrayPosition = 0; //Posicion por la que vamos rellenando el array NV21
if (fullArrayNV21 == nullptr) {
fullArrayNV21 = new jbyte[ySize + uSize + vSize];
}
if(yRowStride == width) {
//Best case. No padding, copy direct
env->GetByteArrayRegion(yBuf, newArrayPosition, ySize, fullArrayNV21);
newArrayPosition = ySize;
}else {
// Padding at plane Y. Copy Row by Row
long yPlanePosition = 0;
for(; newArrayPosition<ySize; newArrayPosition += width) {
env->GetByteArrayRegion(yBuf, yPlanePosition, width, fullArrayNV21 + newArrayPosition);
yPlanePosition += yRowStride;
}
}
// Check UV channels in order to know if they are overlapped (best case)
// If they are overlapped, U and B first bytes are consecutives and pixelStride = 2
long uMemoryAdd = (long)&uBuf;
long vMemoryAdd = (long)&vBuf;
long diff = std::abs(uMemoryAdd - vMemoryAdd);
if(vPixelStride == 2 && diff == 8) {
if(width == vRowStride) {
// Best Case: Valid NV21 representation (UV overlapped, no padding). Copy direct
env->GetByteArrayRegion(uBuf, 0, uSize, fullArrayNV21 + ySize);
env->GetByteArrayRegion(vBuf, 0, vSize, fullArrayNV21 + ySize + uSize);
}else {
// UV overlapped, but with padding. Copy row by row (too much performance improvement compared with copy byte-by-byte)
int limit = height/2 - 1;
for(int row = 0; row<limit; row++) {
env->GetByteArrayRegion(uBuf, row * vRowStride, width, fullArrayNV21 + ySize + (row * width));
}
}
}else {
//WORST: not overlapped UV. Copy byte by byte
for(int row = 0; row<height/2; row++) {
for(int col = 0; col<width/2; col++) {
int vuPos = col*uPixelStride + row*uRowStride;
env->GetByteArrayRegion(vBuf, vuPos, 1, fullArrayNV21 + newArrayPosition);
newArrayPosition++;
env->GetByteArrayRegion(uBuf, vuPos, 1, fullArrayNV21 + newArrayPosition);
newArrayPosition++;
}
}
}
return (uchar*)fullArrayNV21;
}
我确定可以添加一些改进,但我已经在很多设备上进行了测试,并且它的性能和稳定性都非常好。
通过 camera2 API 我们正在接收格式为 YUV_420_888 的图像对象。然后我们使用以下函数转换为 NV21:
private static byte[] YUV_420_888toNV21(Image image) {
byte[] nv21;
ByteBuffer yBuffer = image.getPlanes()[0].getBuffer();
ByteBuffer uBuffer = image.getPlanes()[1].getBuffer();
ByteBuffer vBuffer = image.getPlanes()[2].getBuffer();
int ySize = yBuffer.remaining();
int uSize = uBuffer.remaining();
int vSize = vBuffer.remaining();
nv21 = new byte[ySize + uSize + vSize];
//U and V are swapped
yBuffer.get(nv21, 0, ySize);
vBuffer.get(nv21, ySize, vSize);
uBuffer.get(nv21, ySize + vSize, uSize);
return nv21;
}
虽然此函数在 cameraCaptureSessions.setRepeatingRequest
上运行良好,但在调用 cameraCaptureSessions.capture
时,我们会在进一步处理(在 JNI 端)中遇到分段错误。两者都通过 ImageReader 请求 YUV_420_888 格式。
为什么请求的类型相同,但两次函数调用的结果不同?
更新: 如评论中所述,由于图像尺寸不同(捕获请求的尺寸要大得多),我会出现这种行为。但是我们在 JNI 端的进一步处理操作对于两个请求都是相同的,并且不依赖于图像尺寸(仅依赖于纵横比,这在两种情况下都是相同的)。
如果根本没有填充,您的代码只会return更正NV21,并且U和V 平原重叠,实际上代表交错的 VU 值。这种情况在预览时经常发生,但在这种情况下,您会为数组分配额外的 w*h/4
字节(这可能不是问题)。也许对于捕获的图像,您需要更强大的实现,例如
private static byte[] YUV_420_888toNV21(Image image) {
int width = image.getWidth();
int height = image.getHeight();
int ySize = width*height;
int uvSize = width*height/4;
byte[] nv21 = new byte[ySize + uvSize*2];
ByteBuffer yBuffer = image.getPlanes()[0].getBuffer(); // Y
ByteBuffer uBuffer = image.getPlanes()[1].getBuffer(); // U
ByteBuffer vBuffer = image.getPlanes()[2].getBuffer(); // V
int rowStride = image.getPlanes()[0].getRowStride();
assert(image.getPlanes()[0].getPixelStride() == 1);
int pos = 0;
if (rowStride == width) { // likely
yBuffer.get(nv21, 0, ySize);
pos += ySize;
}
else {
long yBufferPos = -rowStride; // not an actual position
for (; pos<ySize; pos+=width) {
yBufferPos += rowStride;
yBuffer.position(yBufferPos);
yBuffer.get(nv21, pos, width);
}
}
rowStride = image.getPlanes()[2].getRowStride();
int pixelStride = image.getPlanes()[2].getPixelStride();
assert(rowStride == image.getPlanes()[1].getRowStride());
assert(pixelStride == image.getPlanes()[1].getPixelStride());
if (pixelStride == 2 && rowStride == width && uBuffer.get(0) == vBuffer.get(1)) {
// maybe V an U planes overlap as per NV21, which means vBuffer[1] is alias of uBuffer[0]
byte savePixel = vBuffer.get(1);
try {
vBuffer.put(1, (byte)~savePixel);
if (uBuffer.get(0) == (byte)~savePixel) {
vBuffer.put(1, savePixel);
vBuffer.position(0);
uBuffer.position(0);
vBuffer.get(nv21, ySize, 1);
uBuffer.get(nv21, ySize + 1, uBuffer.remaining());
return nv21; // shortcut
}
}
catch (ReadOnlyBufferException ex) {
// unfortunately, we cannot check if vBuffer and uBuffer overlap
}
// unfortunately, the check failed. We must save U and V pixel by pixel
vBuffer.put(1, savePixel);
}
// other optimizations could check if (pixelStride == 1) or (pixelStride == 2),
// but performance gain would be less significant
for (int row=0; row<height/2; row++) {
for (int col=0; col<width/2; col++) {
int vuPos = col*pixelStride + row*rowStride;
nv21[pos++] = vBuffer.get(vuPos);
nv21[pos++] = uBuffer.get(vuPos);
}
}
return nv21;
}
如果您打算将生成的数组传递给 C++,则可以利用 fact
the buffer returned will always have isDirect return true, so the underlying data could be mapped as a pointer in JNI without doing any copies with GetDirectBufferAddress.
这意味着可以在 C++ 中以最小的开销完成相同的转换。在C++中,你甚至会发现实际的像素排列已经是NV21了!
PS 实际上,这可以在 Java 中完成,开销可以忽略不计,请参见上面的行 if (pixelStride == 2 && …
。因此,我们可以将所有色度字节批量复制到生成的字节数组,这比 运行 循环快得多,但仍然比 C++ 中这种情况下可以实现的速度慢。有关完整实施,请参阅 Image.toByteArray()。
public static byte[] YUV420toNV21(Image image) {
Rect crop = image.getCropRect();
int format = image.getFormat();
int width = crop.width();
int height = crop.height();
Image.Plane[] planes = image.getPlanes();
byte[] data = new byte[width * height * ImageFormat.getBitsPerPixel(format) / 8];
byte[] rowData = new byte[planes[0].getRowStride()];
int channelOffset = 0;
int outputStride = 1;
for (int i = 0; i < planes.length; i++) {
switch (i) {
case 0:
channelOffset = 0;
outputStride = 1;
break;
case 1:
channelOffset = width * height + 1;
outputStride = 2;
break;
case 2:
channelOffset = width * height;
outputStride = 2;
break;
}
ByteBuffer buffer = planes[i].getBuffer();
int rowStride = planes[i].getRowStride();
int pixelStride = planes[i].getPixelStride();
int shift = (i == 0) ? 0 : 1;
int w = width >> shift;
int h = height >> shift;
buffer.position(rowStride * (crop.top >> shift) + pixelStride * (crop.left >> shift));
for (int row = 0; row < h; row++) {
int length;
if (pixelStride == 1 && outputStride == 1) {
length = w;
buffer.get(data, channelOffset, length);
channelOffset += length;
} else {
length = (w - 1) * pixelStride + 1;
buffer.get(rowData, 0, length);
for (int col = 0; col < w; col++) {
data[channelOffset] = rowData[col * pixelStride];
channelOffset += outputStride;
}
}
if (row < h - 1) {
buffer.position(buffer.position() + rowStride - length);
}
}
}
return data;
}
基于
uchar* yuvToNV21(jbyteArray yBuf, jbyteArray uBuf, jbyteArray vBuf, jbyte *fullArrayNV21,
int width, int height, int yRowStride, int yPixelStride, int uRowStride,
int uPixelStride, int vRowStride, int vPixelStride, JNIEnv *env) {
/* Check that our frame has right format, as specified at android docs for
* YUV_420_888 (https://developer.android.com/reference/android/graphics/ImageFormat?authuser=2#YUV_420_888):
* - Plane Y not overlaped with UV, and always with pixelStride = 1
* - Planes U and V have the same rowStride and pixelStride (overlaped or not)
*/
if(yPixelStride != 1 || uPixelStride != vPixelStride || uRowStride != vRowStride) {
jclass Exception = env->FindClass("java/lang/Exception");
env->ThrowNew(Exception, "Invalid YUV_420_888 byte structure. Not agree with https://developer.android.com/reference/android/graphics/ImageFormat?authuser=2#YUV_420_888");
}
int ySize = width*height;
int uSize = env->GetArrayLength(uBuf);
int vSize = env->GetArrayLength(vBuf);
int newArrayPosition = 0; //Posicion por la que vamos rellenando el array NV21
if (fullArrayNV21 == nullptr) {
fullArrayNV21 = new jbyte[ySize + uSize + vSize];
}
if(yRowStride == width) {
//Best case. No padding, copy direct
env->GetByteArrayRegion(yBuf, newArrayPosition, ySize, fullArrayNV21);
newArrayPosition = ySize;
}else {
// Padding at plane Y. Copy Row by Row
long yPlanePosition = 0;
for(; newArrayPosition<ySize; newArrayPosition += width) {
env->GetByteArrayRegion(yBuf, yPlanePosition, width, fullArrayNV21 + newArrayPosition);
yPlanePosition += yRowStride;
}
}
// Check UV channels in order to know if they are overlapped (best case)
// If they are overlapped, U and B first bytes are consecutives and pixelStride = 2
long uMemoryAdd = (long)&uBuf;
long vMemoryAdd = (long)&vBuf;
long diff = std::abs(uMemoryAdd - vMemoryAdd);
if(vPixelStride == 2 && diff == 8) {
if(width == vRowStride) {
// Best Case: Valid NV21 representation (UV overlapped, no padding). Copy direct
env->GetByteArrayRegion(uBuf, 0, uSize, fullArrayNV21 + ySize);
env->GetByteArrayRegion(vBuf, 0, vSize, fullArrayNV21 + ySize + uSize);
}else {
// UV overlapped, but with padding. Copy row by row (too much performance improvement compared with copy byte-by-byte)
int limit = height/2 - 1;
for(int row = 0; row<limit; row++) {
env->GetByteArrayRegion(uBuf, row * vRowStride, width, fullArrayNV21 + ySize + (row * width));
}
}
}else {
//WORST: not overlapped UV. Copy byte by byte
for(int row = 0; row<height/2; row++) {
for(int col = 0; col<width/2; col++) {
int vuPos = col*uPixelStride + row*uRowStride;
env->GetByteArrayRegion(vBuf, vuPos, 1, fullArrayNV21 + newArrayPosition);
newArrayPosition++;
env->GetByteArrayRegion(uBuf, vuPos, 1, fullArrayNV21 + newArrayPosition);
newArrayPosition++;
}
}
}
return (uchar*)fullArrayNV21;
}
我确定可以添加一些改进,但我已经在很多设备上进行了测试,并且它的性能和稳定性都非常好。