使用 SIMD 向量的 RGB 到 YCbCr 会丢失一些数据
RGB to YCbCr using SIMD vectors lose some data
我正在用 Rust 编写 JPEG decoder/encoder,但我在 RGB ↔ YCbCr 转换方面遇到了一些问题。
我的代码:
use std::simd::f32x4;
fn clamp<T>(val: T, min: T, max: T) -> T
where T: PartialOrd {
if val < min { min }
else if max < val { max }
else { val }
}
// in oryginal code there are 2 methods, one for processors with SSE3 and for rest
// both do the same and give the same results
pub fn sum_f32x4(f32x4(a, b, c, d): f32x4) -> f32 {
a + b + c + d
}
pub fn rgb_to_ycbcr(r: u8, g: u8, b: u8) -> (u8, u8, u8) {
let rgb = f32x4(r as f32, g as f32, b as f32, 1.0);
let y = sum_f32x4(rgb * f32x4( 0.2990, 0.5870, 0.1140, 0.0));
let cb = sum_f32x4(rgb * f32x4(-0.1687, -0.3313, 0.5000, 128.0));
let cr = sum_f32x4(rgb * f32x4( 0.5000, -0.4187, -0.0813, 128.0));
(y as u8, cb as u8, cr as u8)
}
pub fn ycbcr_to_rgb(y: u8, cb: u8, cr: u8) -> (u8, u8, u8) {
let ycbcr = f32x4(y as f32, cb as f32 - 128.0f32, cr as f32 - 128.0f32, 0.0);
let r = sum_f32x4(ycbcr * f32x4(1.0, 0.00000, 1.40200, 0.0));
let g = sum_f32x4(ycbcr * f32x4(1.0, -0.34414, -0.71414, 0.0));
let b = sum_f32x4(ycbcr * f32x4(1.0, 1.77200, 0.00000, 0.0));
(clamp(r, 0., 255.) as u8, clamp(g, 0., 255.) as u8, clamp(b, 0., 255.) as u8)
}
fn main() {
assert_eq!(rgb_to_ycbcr( 0, 71, 171), ( 61, 189, 84));
// assert_eq!(rgb_to_ycbcr( 0, 71, 169), ( 61, 189, 84)); // will fail
// for some reason we always lose data on blue channel
assert_eq!(ycbcr_to_rgb( 61, 189, 84), ( 0, 71, 169));
}
出于某种原因,摊位测试(在评论中)通过了。我宁愿期望其中至少有一个会失败。我错了吗?至少它应该在某个时候停止,但是当我将 jpeg::color::utils::rgb_to_ycbcr(0, 71, 171)
更改为 jpeg::color::utils::rgb_to_ycbcr(0, 71, 169)
时,由于 YCbCr 值已更改,因此测试失败,因此我将永远失去我的蓝色通道。
@dbaupp 把钉子钉在棺材里,建议使用 round
:
#![allow(unstable)]
use std::simd::{f32x4};
use std::num::Float;
fn clamp(val: f32) -> u8 {
if val < 0.0 { 0 }
else if val > 255.0 { 255 }
else { val.round() as u8 }
}
fn sum_f32x4(v: f32x4) -> f32 {
v.0 + v.1 + v.2 + v.3
}
pub fn rgb_to_ycbcr((r, g, b): (u8, u8, u8)) -> (u8, u8, u8) {
let rgb = f32x4(r as f32, g as f32, b as f32, 1.0);
let y = sum_f32x4(rgb * f32x4( 0.299000, 0.587000, 0.114000, 0.0));
let cb = sum_f32x4(rgb * f32x4(-0.168736, -0.331264, 0.500000, 128.0));
let cr = sum_f32x4(rgb * f32x4( 0.500000, -0.418688, -0.081312, 128.0));
(clamp(y), clamp(cb), clamp(cr))
}
pub fn ycbcr_to_rgb((y, cb, cr): (u8, u8, u8)) -> (u8, u8, u8) {
let ycbcr = f32x4(y as f32, cb as f32 - 128.0f32, cr as f32 - 128.0f32, 0.0);
let r = sum_f32x4(ycbcr * f32x4(1.0, 0.00000, 1.40200, 0.0));
let g = sum_f32x4(ycbcr * f32x4(1.0, -0.34414, -0.71414, 0.0));
let b = sum_f32x4(ycbcr * f32x4(1.0, 1.77200, 0.00000, 0.0));
(clamp(r), clamp(g), clamp(b))
}
fn main() {
let mut rgb = (0, 71, 16);
println!("{:?}", rgb);
for _ in 0..100 {
let yuv = rgb_to_ycbcr(rgb);
rgb = ycbcr_to_rgb(yuv);
println!("{:?}", rgb);
}
}
请注意,我还提高了 rgb_to_ycbcr
中 Wikipedia page 值的精度。我也在这两个函数中 clamp
,以及调用 round
。现在输出是:
(0u8, 71u8, 16u8)
(1u8, 72u8, 16u8)
(1u8, 72u8, 16u8)
最后一个值在整个循环中重复。
我正在用 Rust 编写 JPEG decoder/encoder,但我在 RGB ↔ YCbCr 转换方面遇到了一些问题。
我的代码:
use std::simd::f32x4;
fn clamp<T>(val: T, min: T, max: T) -> T
where T: PartialOrd {
if val < min { min }
else if max < val { max }
else { val }
}
// in oryginal code there are 2 methods, one for processors with SSE3 and for rest
// both do the same and give the same results
pub fn sum_f32x4(f32x4(a, b, c, d): f32x4) -> f32 {
a + b + c + d
}
pub fn rgb_to_ycbcr(r: u8, g: u8, b: u8) -> (u8, u8, u8) {
let rgb = f32x4(r as f32, g as f32, b as f32, 1.0);
let y = sum_f32x4(rgb * f32x4( 0.2990, 0.5870, 0.1140, 0.0));
let cb = sum_f32x4(rgb * f32x4(-0.1687, -0.3313, 0.5000, 128.0));
let cr = sum_f32x4(rgb * f32x4( 0.5000, -0.4187, -0.0813, 128.0));
(y as u8, cb as u8, cr as u8)
}
pub fn ycbcr_to_rgb(y: u8, cb: u8, cr: u8) -> (u8, u8, u8) {
let ycbcr = f32x4(y as f32, cb as f32 - 128.0f32, cr as f32 - 128.0f32, 0.0);
let r = sum_f32x4(ycbcr * f32x4(1.0, 0.00000, 1.40200, 0.0));
let g = sum_f32x4(ycbcr * f32x4(1.0, -0.34414, -0.71414, 0.0));
let b = sum_f32x4(ycbcr * f32x4(1.0, 1.77200, 0.00000, 0.0));
(clamp(r, 0., 255.) as u8, clamp(g, 0., 255.) as u8, clamp(b, 0., 255.) as u8)
}
fn main() {
assert_eq!(rgb_to_ycbcr( 0, 71, 171), ( 61, 189, 84));
// assert_eq!(rgb_to_ycbcr( 0, 71, 169), ( 61, 189, 84)); // will fail
// for some reason we always lose data on blue channel
assert_eq!(ycbcr_to_rgb( 61, 189, 84), ( 0, 71, 169));
}
出于某种原因,摊位测试(在评论中)通过了。我宁愿期望其中至少有一个会失败。我错了吗?至少它应该在某个时候停止,但是当我将 jpeg::color::utils::rgb_to_ycbcr(0, 71, 171)
更改为 jpeg::color::utils::rgb_to_ycbcr(0, 71, 169)
时,由于 YCbCr 值已更改,因此测试失败,因此我将永远失去我的蓝色通道。
@dbaupp 把钉子钉在棺材里,建议使用 round
:
#![allow(unstable)]
use std::simd::{f32x4};
use std::num::Float;
fn clamp(val: f32) -> u8 {
if val < 0.0 { 0 }
else if val > 255.0 { 255 }
else { val.round() as u8 }
}
fn sum_f32x4(v: f32x4) -> f32 {
v.0 + v.1 + v.2 + v.3
}
pub fn rgb_to_ycbcr((r, g, b): (u8, u8, u8)) -> (u8, u8, u8) {
let rgb = f32x4(r as f32, g as f32, b as f32, 1.0);
let y = sum_f32x4(rgb * f32x4( 0.299000, 0.587000, 0.114000, 0.0));
let cb = sum_f32x4(rgb * f32x4(-0.168736, -0.331264, 0.500000, 128.0));
let cr = sum_f32x4(rgb * f32x4( 0.500000, -0.418688, -0.081312, 128.0));
(clamp(y), clamp(cb), clamp(cr))
}
pub fn ycbcr_to_rgb((y, cb, cr): (u8, u8, u8)) -> (u8, u8, u8) {
let ycbcr = f32x4(y as f32, cb as f32 - 128.0f32, cr as f32 - 128.0f32, 0.0);
let r = sum_f32x4(ycbcr * f32x4(1.0, 0.00000, 1.40200, 0.0));
let g = sum_f32x4(ycbcr * f32x4(1.0, -0.34414, -0.71414, 0.0));
let b = sum_f32x4(ycbcr * f32x4(1.0, 1.77200, 0.00000, 0.0));
(clamp(r), clamp(g), clamp(b))
}
fn main() {
let mut rgb = (0, 71, 16);
println!("{:?}", rgb);
for _ in 0..100 {
let yuv = rgb_to_ycbcr(rgb);
rgb = ycbcr_to_rgb(yuv);
println!("{:?}", rgb);
}
}
请注意,我还提高了 rgb_to_ycbcr
中 Wikipedia page 值的精度。我也在这两个函数中 clamp
,以及调用 round
。现在输出是:
(0u8, 71u8, 16u8)
(1u8, 72u8, 16u8)
(1u8, 72u8, 16u8)
最后一个值在整个循环中重复。