I have a RAW picture in 32-bits floats unnormalized I convert back and forth Lab <-> XYZ <-> prophotoRGB in C with SSE2 instructions:
// transform the pixel to sRGB:
// Lab -> XYZ
__m128 Lab = _mm_load_ps(in);
__m128 XYZ = dt_Lab_to_XYZ_sse2(Lab);
// XYZ -> sRGB
__m128 rgb = dt_XYZ_to_sRGB_sse2(XYZ);
// transform the result back to Lab
// sRGB -> XYZ
XYZ = dt_sRGB_to_XYZ_sse2(rgb);
// XYZ -> Lab
__m128 outv = dt_XYZ_to_Lab_sse2(XYZ);
_mm_store_ps(out, outv);
The conversions are like that :
static inline __m128 dt_XYZ_to_prophotoRGB_sse2(__m128 XYZ)
{
// XYZ -> prophotoRGB matrix, D50
const __m128 xyz_to_rgb_0 = _mm_setr_ps( 1.3459433f, -0.5445989f, 0.0000000f, 0.0f);
const __m128 xyz_to_rgb_1 = _mm_setr_ps(-0.2556075f, 1.5081673f, 0.0000000f, 0.0f);
const __m128 xyz_to_rgb_2 = _mm_setr_ps(-0.0511118f, -0.0205351f, 1.2118128f, 0.0f);
__m128 rgb
= _mm_add_ps(_mm_mul_ps(xyz_to_rgb_0, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_add_ps(_mm_mul_ps(xyz_to_rgb_1, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1))),
_mm_mul_ps(xyz_to_rgb_2, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2)))));
return rgb;
}
static inline __m128 dt_prophotoRGB_to_XYZ_sse2(__m128 rgb)
{
// prophotoRGB -> XYZ matrix, D50
const __m128 rgb_to_xyz_0 = _mm_setr_ps(0.7976749f, 0.2880402f, 0.0000000f, 0.0f);
const __m128 rgb_to_xyz_1 = _mm_setr_ps(0.1351917f, 0.7118741f, 0.0000000f, 0.0f);
const __m128 rgb_to_xyz_2 = _mm_setr_ps(0.0313534f, 0.0000857f, 0.8252100f, 0.0f);
__m128 XYZ
= _mm_add_ps(_mm_mul_ps(rgb_to_xyz_0, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_add_ps(_mm_mul_ps(rgb_to_xyz_1, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1))),
_mm_mul_ps(rgb_to_xyz_2, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2)))));
return XYZ;
}
The output gives slight magenta cast (just converting back and forth, nothing happens in-between).
The same code in OpenCL instructions doesn’t shift the colors, neither does a conversion to gamma-corrected sRGB (through the same Lab <-> XYZ function, so only the XYZ <-> prophotoRGB is faulty).
Here is the OpenCL variant:
float4 prophotorgb_to_XYZ(float4 rgb)
{
const float rgb_to_xyz[3][3] = { // prophoto rgb
{0.7976749f, 0.1351917f, 0.0313534f},
{0.2880402f, 0.7118741f, 0.0000857f},
{0.0000000f, 0.0000000f, 0.8252100f},
};
float4 XYZ = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
XYZ.x += rgb_to_xyz[0][0] * rgb.x;
XYZ.x += rgb_to_xyz[0][1] * rgb.y;
XYZ.x += rgb_to_xyz[0][2] * rgb.z;
XYZ.y += rgb_to_xyz[1][0] * rgb.x;
XYZ.y += rgb_to_xyz[1][1] * rgb.y;
XYZ.y += rgb_to_xyz[1][2] * rgb.z;
XYZ.z += rgb_to_xyz[2][0] * rgb.x;
XYZ.z += rgb_to_xyz[2][1] * rgb.y;
XYZ.z += rgb_to_xyz[2][2] * rgb.z;
return XYZ;
}
float4 XYZ_to_prophotorgb(float4 XYZ)
{
const float xyz_to_rgb[3][3] = { // prophoto rgb d50
{ 1.3459433f, -0.2556075f, -0.0511118f},
{-0.5445989f, 1.5081673f, 0.0205351f},
{ 0.0000000f, 0.0000000f, 1.2118128f},
};
float4 rgb = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
rgb.x += xyz_to_rgb[0][0] * XYZ.x;
rgb.x += xyz_to_rgb[0][1] * XYZ.y;
rgb.x += xyz_to_rgb[0][2] * XYZ.z;
rgb.y += xyz_to_rgb[1][0] * XYZ.x;
rgb.y += xyz_to_rgb[1][1] * XYZ.y;
rgb.y += xyz_to_rgb[1][2] * XYZ.z;
rgb.z += xyz_to_rgb[2][0] * XYZ.x;
rgb.z += xyz_to_rgb[2][1] * XYZ.y;
rgb.z += xyz_to_rgb[2][2] * XYZ.z;
return rgb;
}
What am I missing here ?