/* This file is part of darktable, copyright (c) 2018-2020 darktable developers. darktable is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. darktable is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with darktable. If not, see . */ #include "common.h" #include "colorspace.h" #include "color_conversion.h" #include "noise_generator.h" #define INVERSE_SQRT_3 0.5773502691896258f // In case the OpenCL driver doesn't have a dot method static inline float vdot(const float4 vec1, const float4 vec2) { return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z; } typedef enum dt_iop_filmicrgb_methods_type_t { DT_FILMIC_METHOD_NONE = 0, DT_FILMIC_METHOD_MAX_RGB = 1, DT_FILMIC_METHOD_LUMINANCE = 2, DT_FILMIC_METHOD_POWER_NORM = 3, DT_FILMIC_METHOD_EUCLIDEAN_NORM_V2 = 5, DT_FILMIC_METHOD_EUCLIDEAN_NORM_V1 = 4, } dt_iop_filmicrgb_methods_type_t; typedef enum dt_iop_filmicrgb_colorscience_type_t { DT_FILMIC_COLORSCIENCE_V1 = 0, DT_FILMIC_COLORSCIENCE_V2 = 1, DT_FILMIC_COLORSCIENCE_V3 = 2, DT_FILMIC_COLORSCIENCE_V4 = 3, } dt_iop_filmicrgb_colorscience_type_t; typedef enum dt_iop_filmicrgb_reconstruction_type_t { DT_FILMIC_RECONSTRUCT_RGB = 0, DT_FILMIC_RECONSTRUCT_RATIOS = 1, } dt_iop_filmicrgb_reconstruction_type_t; typedef enum dt_iop_filmicrgb_curve_type_t { DT_FILMIC_CURVE_POLY_4 = 0, // $DESCRIPTION: "hard" DT_FILMIC_CURVE_POLY_3 = 1, // $DESCRIPTION: "soft" DT_FILMIC_CURVE_RATIONAL = 2, // $DESCRIPTION: "safe" } dt_iop_filmicrgb_curve_type_t; kernel void filmic (read_only image2d_t in, write_only image2d_t out, int width, int height, const float dynamic_range, const float shadows_range, const float grey, read_only image2d_t table, read_only image2d_t diff, const float contrast, const float power, const int preserve_color, const float saturation) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); if(x >= width || y >= height) return; float4 i = read_imagef(in, sampleri, (int2)(x, y)); const float4 xyz = Lab_to_XYZ(i); float4 o = XYZ_to_prophotorgb(xyz); const float noise = pow(2.0f, -16.0f); const float4 noise4 = noise; const float4 dynamic4 = dynamic_range; const float4 shadows4 = shadows_range; float derivative, luma; // Global desaturation if (saturation != 1.0f) { const float4 lum = xyz.y; o = lum + (float4)saturation * (o - lum); } if (preserve_color) { // Save the ratios float maxRGB = max(max(o.x, o.y), o.z); const float4 ratios = o / (float4)maxRGB; // Log profile maxRGB = maxRGB / grey; maxRGB = (maxRGB < noise) ? noise : maxRGB; maxRGB = (native_log2(maxRGB) - shadows_range) / dynamic_range; maxRGB = clamp(maxRGB, 0.0f, 1.0f); const float index = maxRGB; // Curve S LUT maxRGB = lookup(table, (const float)maxRGB); // Re-apply the ratios o = (float4)maxRGB * ratios; // Derivative derivative = lookup(diff, (const float)index); luma = maxRGB; } else { // Log profile o = o / grey; o = (o < noise) ? noise : o; o = (native_log2(o) - shadows4) / dynamic4; o = clamp(o, (float4)0.0f, (float4)1.0f); const float index = prophotorgb_to_XYZ(o).y; // Curve S LUT o.x = lookup(table, (const float)o.x); o.y = lookup(table, (const float)o.y); o.z = lookup(table, (const float)o.z); // Get the derivative derivative = lookup(diff, (const float)index); luma = prophotorgb_to_XYZ(o).y; } // Desaturate selectively o = (float4)luma + (float4)derivative * (o - (float4)luma); o = clamp(o, (float4)0.0f, (float4)1.0f); // Apply the transfer function of the display const float4 power4 = power; o = native_powr(o, power4); i.xyz = prophotorgb_to_Lab(o).xyz; write_imagef(out, (int2)(x, y), i); } /* Norms */ static inline float pixel_rgb_norm_power(const float4 pixel) { // weird norm sort of perceptual. This is black magic really, but it looks good. const float4 RGB = fabs(pixel); const float4 RGB_square = RGB * RGB; const float4 RGB_cubic = RGB_square * RGB; return (RGB_cubic.x + RGB_cubic.y + RGB_cubic.z) / fmax(RGB_square.x + RGB_square.y + RGB_square.z, 1e-12f); } static inline float pixel_rgb_norm_euclidean(const float4 pixel) { const float4 RGB = pixel; const float4 RGB_square = RGB * RGB; return native_sqrt(RGB_square.x + RGB_square.y + RGB_square.z); } static inline float get_pixel_norm(const float4 pixel, const dt_iop_filmicrgb_methods_type_t variant, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile) { switch(variant) { case DT_FILMIC_METHOD_MAX_RGB: return fmax(fmax(pixel.x, pixel.y), pixel.z); case DT_FILMIC_METHOD_LUMINANCE: return (use_work_profile) ? get_rgb_matrix_luminance(pixel, profile_info, profile_info->matrix_in, lut) : dt_camera_rgb_luminance(pixel); case DT_FILMIC_METHOD_POWER_NORM: return pixel_rgb_norm_power(pixel); case DT_FILMIC_METHOD_EUCLIDEAN_NORM_V1: return pixel_rgb_norm_euclidean(pixel); case DT_FILMIC_METHOD_EUCLIDEAN_NORM_V2: return pixel_rgb_norm_euclidean(pixel) * INVERSE_SQRT_3; case DT_FILMIC_METHOD_NONE: default: return (use_work_profile) ? get_rgb_matrix_luminance(pixel, profile_info, profile_info->matrix_in, lut) : dt_camera_rgb_luminance(pixel); } } /* Saturation */ static inline float filmic_desaturate_v1(const float x, const float sigma_toe, const float sigma_shoulder, const float saturation) { const float radius_toe = x; const float radius_shoulder = 1.0f - x; const float key_toe = native_exp(-0.5f * radius_toe * radius_toe / sigma_toe); const float key_shoulder = native_exp(-0.5f * radius_shoulder * radius_shoulder / sigma_shoulder); return 1.0f - clamp((key_toe + key_shoulder) / saturation, 0.0f, 1.0f); } static inline float filmic_desaturate_v2(const float x, const float sigma_toe, const float sigma_shoulder, const float saturation) { const float radius_toe = x; const float radius_shoulder = 1.0f - x; const float sat2 = 0.5f / native_sqrt(saturation); const float key_toe = native_exp(-radius_toe * radius_toe / sigma_toe * sat2); const float key_shoulder = native_exp(-radius_shoulder * radius_shoulder / sigma_shoulder * sat2); return (saturation - (key_toe + key_shoulder) * (saturation)); } static inline float4 linear_saturation(const float4 x, const float luminance, const float saturation) { return (float4)luminance + (float4)saturation * (x - (float4)luminance); } static inline float filmic_spline(const float x, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const dt_iop_filmicrgb_curve_type_t type[2]) { // if type polynomial : // y = M5 * x⁴ + M4 * x³ + M3 * x² + M2 * x¹ + M1 * x⁰ // but we rewrite it using Horner factorisation, to spare ops and enable FMA in available // else if type rational : // y = M1 * (M2 * (x - x_0)² + (x - x_0)) / (M2 * (x - x_0)² + (x - x_0) + M3) float result; if(x < latitude_min) { // toe if(type[0] == DT_FILMIC_CURVE_POLY_4) { // polynomial toe, 4th order result = M1.x + x * (M2.x + x * (M3.x + x * (M4.x + x * M5.x))); } else if(type[0] == DT_FILMIC_CURVE_POLY_3) { // polynomial toe, 3rd order result = M1.x + x * (M2.x + x * (M3.x + x * M4.x)); } else { // rational toe const float xi = latitude_min - x; const float rat = xi * (xi * M2.x + 1.f); result = M4.x - M1.x * rat / (rat + M3.x); } } else if(x > latitude_max) { // shoulder if(type[1] == DT_FILMIC_CURVE_POLY_4) { // polynomial shoulder, 4th order result = M1.y + x * (M2.y + x * (M3.y + x * (M4.y + x * M5.y))); } else if(type[1] == DT_FILMIC_CURVE_POLY_3) { // polynomial shoulder, 3rd order result = M1.y + x * (M2.y + x * (M3.y + x * M4.y)); } else { // rational toe const float xi = x - latitude_max; const float rat = xi * (xi * M2.y + 1.f); result = M4.y + M1.y * rat / (rat + M3.y); } } else { // latitude result = M1.z + x * M2.z; } return result; } #define NORM_MIN 1.52587890625e-05f // norm can't be < to 2^(-16) static inline float log_tonemapping_v1(const float x, const float grey, const float black, const float dynamic_range) { const float temp = (native_log2(x / grey) - black) / dynamic_range; return clamp(temp, NORM_MIN, 1.f); } static inline float log_tonemapping_v2(const float x, const float grey, const float black, const float dynamic_range) { return clamp((native_log2(x / grey) - black) / dynamic_range, 0.f, 1.f); } static inline float4 pipe_RGB_to_Ych(float4 in, constant const float *const matrix) { // go from pipeline RGB to CIE 2006 LMS D65 const float4 LMS = matrix_product_float4(in, matrix); // go from CIE LMS 2006 to Kirk/Filmlight Yrg const float4 Yrg = LMS_to_Yrg(LMS); // rewrite in polar coordinates return Yrg_to_Ych(Yrg); } static inline float4 Ych_to_pipe_RGB(float4 in, constant const float *const matrix) { // rewrite in cartesian coordinates const float4 Yrg = Ych_to_Yrg(in); // go from Kirk/Filmlight Yrg to CIE LMS 2006 const float4 LMS = Yrg_to_LMS(Yrg); // go from CIE LMS 2006 to pipeline RGB return matrix_product_float4(LMS, matrix); } static inline float4 filmic_desaturate_v4(const float4 Ych_original, float4 Ych_final, const float saturation) { // Note : Ych is normalized trough the LMS conversion, // meaning c is actually a saturation (saturation ~= chroma / brightness). // So copy-pasting c and h from a different Y is equivalent to // tonemapping with a norm, which is equivalent to doing exposure compensation : // it's saturation-invariant, aka chroma will get increased // if Y is increased, and the other way around. const float chroma_original = Ych_original.y * Ych_original.x; // c2 float chroma_final = Ych_final.y * Ych_final.x; // c1 // fit a linear model `chroma = f(y)`: // `chroma = c1 + (yc - y1) * (c2 - c1) / (y2 - y1)` // where `(yc - y1)` is user-defined as `saturation * (y2 - y1)` // so `chroma = c1 + saturation * (c2 - c1)` // when saturation = 0, we stay at the saturation-invariant final chroma // when saturation > 0, we go back towards the initial chroma before tone-mapping // when saturation < 0, we amplify the initial -> final chroma change const float delta_chroma = saturation * (chroma_original - chroma_final); const int filmic_brightens = (Ych_final.x > Ych_original.x); const int filmic_resat = (chroma_original < chroma_final); const int filmic_desat = (chroma_original > chroma_final); const int user_resat = (saturation > 0.f); const int user_desat = (saturation < 0.f); chroma_final = (filmic_brightens && filmic_resat) ? (chroma_original + chroma_final) / 2.f // force original lower sat if brightening : ((user_resat && filmic_desat) || user_desat) ? chroma_final + delta_chroma // allow resaturation only if filmic desaturated, allow desat anytime : chroma_final; Ych_final.y = fmax(chroma_final / Ych_final.x, 0.f); return Ych_final; } // Pipeline and ICC luminance is CIE Y 1931 // Kirk Ych/Yrg uses CIE Y 2006 // 1 CIE Y 1931 = 1.05785528 CIE Y 2006, so we need to adjust that. // This also accounts for the CAT16 D50->D65 adaptation that has to be done // to go from RGB to CIE LMS 2006. // Warning: only applies to achromatic pixels. #define CIE_Y_1931_to_CIE_Y_2006(x) (1.05785528f * (x)) static inline float clip_chroma_white_raw(constant const float *const coeffs, const float target_white, const float Y, const float cos_h, const float sin_h) { const float denominator_Y_coeff = coeffs[0] * (0.979381443298969f * cos_h + 0.391752577319588f * sin_h) + coeffs[1] * (0.0206185567010309f * cos_h + 0.608247422680412f * sin_h) - coeffs[2] * (cos_h + sin_h); const float denominator_target_term = target_white * (0.68285981628866f * cos_h + 0.482137060515464f * sin_h); // this channel won't limit the chroma if(denominator_Y_coeff == 0.f) return FLT_MAX; // The equation for max chroma has an asymptote at this point (zero of denominator). // Any Y below that value won't give us sensible results for the upper bound // and we should consider the lower bound instead. const float Y_asymptote = denominator_target_term / denominator_Y_coeff; if(Y <= Y_asymptote) return FLT_MAX; // Get chroma that brings one component of target RGB to the given target_rgb value. // coeffs are the transformation coeffs to get one components (R, G or B) from input LMS. // i.e. it is a row of the LMS -> RGB transformation matrix. // See tools/derive_filmic_v6_gamut_mapping.py for derivation of these equations. const float denominator = Y * denominator_Y_coeff - denominator_target_term; const float numerator = -0.427506877216495f * (Y * (coeffs[0] + 0.856492345150334f * coeffs[1] + 0.554995960637719f * coeffs[2]) - 0.988237752433297f * target_white); return numerator / denominator; } static inline float clip_chroma_white(constant const float *const coeffs, const float target_white, const float Y, const float cos_h, const float sin_h) { // Due to slight numerical inaccuracies in color matrices, // the chroma clipping curves for each RGB channel may be // slightly at the max luminance. Thus we linearly interpolate // each clipping line to zero chroma near max luminance. const float eps = 1e-3f; const float max_Y = CIE_Y_1931_to_CIE_Y_2006(target_white); const float delta_Y = fmax(max_Y - Y, 0.f); float max_chroma; if(delta_Y < eps) { max_chroma = delta_Y / (eps * max_Y) * clip_chroma_white_raw(coeffs, target_white, (1.f - eps) * max_Y, cos_h, sin_h); } else { max_chroma = clip_chroma_white_raw(coeffs, target_white, Y, cos_h, sin_h); } return max_chroma >= 0.f ? max_chroma : FLT_MAX; } static inline float clip_chroma_black(constant const float *const coeffs, const float cos_h, const float sin_h) { // N.B. this is the same as clip_chroma_white_raw() but with target value = 0. // This allows eliminating some computation. // Get chroma that brings one component of target RGB to zero. // coeffs are the transformation coeffs to get one components (R, G or B) from input LMS. // i.e. it is a row of the LMS -> RGB transformation matrix. // See tools/derive_filmic_v6_gamut_mapping.py for derivation of these equations. const float denominator = coeffs[0] * (0.979381443298969f * cos_h + 0.391752577319588f * sin_h) + coeffs[1] * (0.0206185567010309f * cos_h + 0.608247422680412f * sin_h) - coeffs[2] * (cos_h + sin_h); // this channel won't limit the chroma if(denominator == 0.f) return FLT_MAX; const float numerator = -0.427506877216495f * (coeffs[0] + 0.856492345150334f * coeffs[1] + 0.554995960637719f * coeffs[2]); const float max_chroma = numerator / denominator; return max_chroma >= 0.f ? max_chroma : FLT_MAX; } static inline float clip_chroma(constant const float *const matrix_out, const float target_white, const float Y, const float cos_h, const float sin_h, const float chroma) { // Note: ideally we should figure out in advance which channel is going to clip first // (either go negative or over maximum allowed value) and calculate chroma clipping // curves only for those channels. That would avoid some ambiguities // (what do negative chroma values mean etc.) and reduce computation. However this // "brute-force" approach seems to work fine for now. const float chroma_R_white = clip_chroma_white(&matrix_out[0], target_white, Y, cos_h, sin_h); const float chroma_G_white = clip_chroma_white(&matrix_out[4], target_white, Y, cos_h, sin_h); const float chroma_B_white = clip_chroma_white(&matrix_out[8], target_white, Y, cos_h, sin_h); const float max_chroma_white = fmin(fmin(chroma_R_white, chroma_G_white), chroma_B_white); const float chroma_R_black = clip_chroma_black(&matrix_out[0], cos_h, sin_h); const float chroma_G_black = clip_chroma_black(&matrix_out[4], cos_h, sin_h); const float chroma_B_black = clip_chroma_black(&matrix_out[8], cos_h, sin_h); const float max_chroma_black = fmin(fmin(chroma_R_black, chroma_G_black), chroma_B_black); return fmin(fmin(chroma, max_chroma_black), max_chroma_white); } static inline float4 gamut_check_RGB(constant const float *const matrix_in, constant const float *const matrix_out, const float display_black, const float display_white, const float4 Ych_in) { // Heuristic: if there are negatives, calculate the amount (luminance) of white light that // would need to be mixed in to bring the pixel back in gamut. float4 RGB_brightened = Ych_to_pipe_RGB(Ych_in, matrix_out); const float min_pix = fmin(fmin(RGB_brightened.x, RGB_brightened.y), RGB_brightened.z); const float black_offset = fmax(-min_pix, 0.f); RGB_brightened += black_offset; const float4 Ych_brightened = pipe_RGB_to_Ych(RGB_brightened, matrix_in); // Increase the input luminance a little by the value we calculated above. // Note, however, that this doesn't actually desaturate the color like mixing // white would do. We will next find the chroma change needed to bring the pixel // into gamut. const float Y = clamp((Ych_in.x + Ych_brightened.x) / 2.f, CIE_Y_1931_to_CIE_Y_2006(display_black), CIE_Y_1931_to_CIE_Y_2006(display_white)); const float cos_h = Ych_in.z; const float sin_h = Ych_in.w; const float new_chroma = clip_chroma(matrix_out, display_white, Y, cos_h, sin_h, Ych_in.y); // Go to RGB, using existing luminance and hue and the new chroma const float4 Ych = (float4)(Y, new_chroma, cos_h, sin_h); const float4 RGB_out = Ych_to_pipe_RGB(Ych, matrix_out); // Clamp in target RGB as a final catch-all return clamp(RGB_out, 0.f, display_white); } static inline float4 gamut_mapping(float4 Ych_final, float4 Ych_original, constant const float *const input_matrix, constant const float *const output_matrix, constant const float *const export_input_matrix, constant const float *const export_output_matrix, const float display_black, const float display_white, const float saturation, const int use_output_profile) { // Force final hue to original Ych_final.z = Ych_original.z; Ych_final.w = Ych_original.w; // Clip luminance Ych_final.x = clamp(Ych_final.x, CIE_Y_1931_to_CIE_Y_2006(display_black), CIE_Y_1931_to_CIE_Y_2006(display_white)); // Massage chroma Ych_final = filmic_desaturate_v4(Ych_original, Ych_final, saturation); Ych_final = gamut_check_Yrg(Ych_final); //Test- disable RGB gamut mapping. Don't do anything but clamp RGB to display white. const float4 RGB_out = Ych_to_pipe_RGB(Ych_final, output_matrix); return clamp(RGB_out, 0.f, display_white); float4 output; if(!use_output_profile) { // Now, it is still possible that one channel > display white or < display black because of saturation. // We have already clipped Y, so we know that any problem now is caused by c output = gamut_check_RGB(input_matrix, output_matrix, display_black, display_white, Ych_final); } else { // Now, it is still possible that one channel > display white or < display black because of saturation. // We have already clipped Y, so we know that any problem now is caused by c const float4 export_RGB = gamut_check_RGB(export_input_matrix, export_output_matrix, display_black, display_white, Ych_final); // Go from export RGB to CIE LMS 2006 D65 const float4 LMS = matrix_product_float4(export_RGB, export_input_matrix); // Go from CIE LMS 2006 D65 to pipeline RGB D50 output = matrix_product_float4(LMS, output_matrix); } return output; } static inline float4 filmic_chroma_v4(const float4 i, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_methods_type_t variant, const dt_iop_filmicrgb_colorscience_type_t colorscience_version, const dt_iop_filmicrgb_curve_type_t type[2], constant const float *const matrix_in, constant const float *const matrix_out, const float display_black, const float display_white, const int use_output_profile, constant const float *const export_matrix_in, constant const float *const export_matrix_out, const float norm_min, const float norm_max) { // Norm must be clamped early to the valid input range, otherwise it will be clamped // later in log_tonemapping_v2 and the ratios will be then incorrect. // This would result in colorful patches darker than their surrounding in places // where the raw data is clipped. float norm = clamp(get_pixel_norm(i, variant, profile_info, lut, use_work_profile), norm_min, norm_max); // Save the ratios float4 ratios = i / (float4)norm; // Log tonemapping norm = log_tonemapping_v2(norm, grey_value, black_exposure, dynamic_range); // Filmic S curve on the max RGB // Apply the transfer function of the display norm = native_powr(clamp(filmic_spline(norm, M1, M2, M3, M4, M5, latitude_min, latitude_max, type), display_black, display_white), output_power); // Restore RGB float4 o = norm * ratios; // Save Ych in Kirk/Filmlight Yrg float4 Ych_original = pipe_RGB_to_Ych(i, matrix_in); // Get final Ych in Kirk/Filmlight Yrg float4 Ych_final = pipe_RGB_to_Ych(o, matrix_in); return gamut_mapping(Ych_final, Ych_original, matrix_in, matrix_out, export_matrix_in, export_matrix_out, display_black, display_white, saturation, use_output_profile); } static inline float4 filmic_split_v4(const float4 i, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_colorscience_type_t colorscience_version, const dt_iop_filmicrgb_curve_type_t type[2], constant const float *const matrix_in, constant const float *const matrix_out, const float display_black, const float display_white, const int use_output_profile, constant const float *const export_matrix_in, constant const float *const export_matrix_out) { float4 o; // Log tonemapping o.x = log_tonemapping_v2(i.x, grey_value, black_exposure, dynamic_range); o.y = log_tonemapping_v2(i.y, grey_value, black_exposure, dynamic_range); o.z = log_tonemapping_v2(i.z, grey_value, black_exposure, dynamic_range); o.w = 0.f; // Filmic S curve on individual channels o.x = filmic_spline(o.x, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); o.y = filmic_spline(o.y, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); o.z = filmic_spline(o.z, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); // Clamp to [0, display_white]: we don't want to clamp individual channels to display_black // as that would limit the max available saturation. Luminance is clipped to display_black later. // Apply output power function afterwards. o = native_powr(clamp(o, (float4)0.f, (float4)display_white), output_power); // Save Ych in Kirk/Filmlight Yrg float4 Ych_original = pipe_RGB_to_Ych(i, matrix_in); // Get final Ych in Kirk/Filmlight Yrg float4 Ych_final = pipe_RGB_to_Ych(o, matrix_in); Ych_final.y = fmin(Ych_original.y, Ych_final.y); return gamut_mapping(Ych_final, Ych_original, matrix_in, matrix_out, export_matrix_in, export_matrix_out, display_black, display_white, saturation, use_output_profile); } static inline float4 filmic_split_v1(const float4 i, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_curve_type_t type[2]) { float4 o; // Log tonemapping o.x = log_tonemapping_v1(fmax(i.x, NORM_MIN), grey_value, black_exposure, dynamic_range); o.y = log_tonemapping_v1(fmax(i.y, NORM_MIN), grey_value, black_exposure, dynamic_range); o.z = log_tonemapping_v1(fmax(i.z, NORM_MIN), grey_value, black_exposure, dynamic_range); // Selective desaturation of extreme luminances const float luminance = (use_work_profile) ? get_rgb_matrix_luminance(o, profile_info, profile_info->matrix_in, lut) : dt_camera_rgb_luminance(o); const float desaturation = filmic_desaturate_v1(luminance, sigma_toe, sigma_shoulder, saturation); o = linear_saturation(o, luminance, desaturation); // Filmic spline o.x = filmic_spline(o.x, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); o.y = filmic_spline(o.y, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); o.z = filmic_spline(o.z, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); // Output power o = native_powr(clamp(o, (float4)0.0f, (float4)1.0f), output_power); return o; } static inline float4 filmic_split_v2_v3(const float4 i, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_curve_type_t type[2]) { float4 o; // Log tonemapping o.x = log_tonemapping_v2(fmax(i.x, NORM_MIN), grey_value, black_exposure, dynamic_range); o.y = log_tonemapping_v2(fmax(i.y, NORM_MIN), grey_value, black_exposure, dynamic_range); o.z = log_tonemapping_v2(fmax(i.z, NORM_MIN), grey_value, black_exposure, dynamic_range); // Selective desaturation of extreme luminances const float luminance = (use_work_profile) ? get_rgb_matrix_luminance(o, profile_info, profile_info->matrix_in, lut) : dt_camera_rgb_luminance(o); const float desaturation = filmic_desaturate_v2(luminance, sigma_toe, sigma_shoulder, saturation); o = linear_saturation(o, luminance, desaturation); // Filmic spline o.x = filmic_spline(o.x, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); o.y = filmic_spline(o.y, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); o.z = filmic_spline(o.z, M1, M2, M3, M4, M5, latitude_min, latitude_max, type); // Output power o = native_powr(clamp(o, (float4)0.0f, (float4)1.0f), output_power); return o; } kernel void filmicrgb_split (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_colorscience_type_t color_science, const dt_iop_filmicrgb_curve_type_t type_1, const dt_iop_filmicrgb_curve_type_t type_2, constant const float *const matrix_in, constant const float *const matrix_out, const float display_black, const float display_white, const int use_output_profile, constant const float *const export_matrix_in, constant const float *const export_matrix_out) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); if(x >= width || y >= height) return; const float4 i = read_imagef(in, sampleri, (int2)(x, y)); float4 o; const dt_iop_filmicrgb_curve_type_t type[2] = { type_1, type_2 }; switch(color_science) { case DT_FILMIC_COLORSCIENCE_V1: { o = filmic_split_v1(i, dynamic_range, black_exposure, grey_value, profile_info, lut, use_work_profile, sigma_toe, sigma_shoulder, saturation, M1, M2, M3, M4, M5, latitude_min, latitude_max, output_power, type); break; } case DT_FILMIC_COLORSCIENCE_V2: case DT_FILMIC_COLORSCIENCE_V3: { o = filmic_split_v2_v3(i, dynamic_range, black_exposure, grey_value, profile_info, lut, use_work_profile, sigma_toe, sigma_shoulder, saturation, M1, M2, M3, M4, M5, latitude_min, latitude_max, output_power, type); break; } case DT_FILMIC_COLORSCIENCE_V4: { o = filmic_split_v4(i, dynamic_range, black_exposure, grey_value, profile_info, lut, use_work_profile, sigma_toe, sigma_shoulder, saturation, M1, M2, M3, M4, M5, latitude_min, latitude_max, output_power, color_science, type, matrix_in, matrix_out, display_black, display_white, use_output_profile, export_matrix_in, export_matrix_out); break; } } // Copy alpha layer and save o.w = i.w; write_imagef(out, (int2)(x, y), o); } static inline float4 filmic_chroma_v1(const float4 i, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_methods_type_t variant, const dt_iop_filmicrgb_curve_type_t type[2]) { float norm = fmax(get_pixel_norm(i, variant, profile_info, lut, use_work_profile), NORM_MIN); // Save the ratios float4 o = i / (float4)norm; // Sanitize the ratios const float min_ratios = fmin(fmin(o.x, o.y), o.z); if(min_ratios < 0.0f) o -= (float4)min_ratios; // Log tonemapping norm = log_tonemapping_v1(norm, grey_value, black_exposure, dynamic_range); // Selective desaturation of extreme luminances o *= (float4)norm; const float luminance = (use_work_profile) ? get_rgb_matrix_luminance(o, profile_info, profile_info->matrix_in, lut) : dt_camera_rgb_luminance(o); const float desaturation = filmic_desaturate_v1(norm, sigma_toe, sigma_shoulder, saturation); o = linear_saturation(o, luminance, desaturation); o /= (float4)norm; // Filmic S curve on the max RGB // Apply the transfer function of the display norm = native_powr(clamp(filmic_spline(norm, M1, M2, M3, M4, M5, latitude_min, latitude_max, type), 0.0f, 1.0f), output_power); return o * norm; } static inline float4 filmic_chroma_v2_v3(const float4 i, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_methods_type_t variant, const dt_iop_filmicrgb_colorscience_type_t colorscience_version, const dt_iop_filmicrgb_curve_type_t type[2]) { float norm = fmax(get_pixel_norm(i, variant, profile_info, lut, use_work_profile), NORM_MIN); // Save the ratios float4 ratios = i / (float4)norm; // Sanitize the ratios const float min_ratios = fmin(fmin(ratios.x, ratios.y), ratios.z); if(min_ratios < 0.0f) ratios -= (float4)min_ratios; // Log tonemapping norm = log_tonemapping_v2(norm, grey_value, black_exposure, dynamic_range); // Get the desaturation value based on the log value const float4 desaturation = (float4)filmic_desaturate_v2(norm, sigma_toe, sigma_shoulder, saturation); // Filmic S curve on the max RGB // Apply the transfer function of the display norm = native_powr(clamp(filmic_spline(norm, M1, M2, M3, M4, M5, latitude_min, latitude_max, type), 0.0f, 1.0f), output_power); // Re-apply ratios with saturation change ratios = fmax(ratios + ((float4)1.0f - ratios) * ((float4)1.0f - desaturation), (float4)0.f); if(colorscience_version == DT_FILMIC_COLORSCIENCE_V3) norm /= fmax(get_pixel_norm(ratios, variant, profile_info, lut, use_work_profile), NORM_MIN); float4 o = (float4)norm * ratios; // Gamut mapping const float max_pix = fmax(fmax(o.x, o.y), o.z); const int penalize = (max_pix > 1.0f); // Penalize the ratios by the amount of clipping if(penalize) { ratios = fmax(ratios + ((float4)1.0f - (float4)max_pix), (float4)0.0f); o = clamp((float4)norm * ratios, (float4)0.0f, (float4)1.0f); } return o; } kernel void filmicrgb_chroma (read_only image2d_t in, write_only image2d_t out, const int width, const int height, const float dynamic_range, const float black_exposure, const float grey_value, constant const dt_colorspaces_iccprofile_info_cl_t *const profile_info, read_only image2d_t lut, const int use_work_profile, const float sigma_toe, const float sigma_shoulder, const float saturation, const float4 M1, const float4 M2, const float4 M3, const float4 M4, const float4 M5, const float latitude_min, const float latitude_max, const float output_power, const dt_iop_filmicrgb_methods_type_t variant, const dt_iop_filmicrgb_colorscience_type_t color_science, const dt_iop_filmicrgb_curve_type_t type_1, const dt_iop_filmicrgb_curve_type_t type_2, constant const float *const matrix_in, constant const float *const matrix_out, const float display_black, const float display_white, const int use_output_profile, constant const float *const export_matrix_in, constant const float *const export_matrix_out, const float norm_min, const float norm_max) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); if(x >= width || y >= height) return; const float4 i = read_imagef(in, sampleri, (int2)(x, y)); float4 o; const dt_iop_filmicrgb_curve_type_t type[2] = { type_1, type_2 }; switch(color_science) { case DT_FILMIC_COLORSCIENCE_V1: { o = filmic_chroma_v1(i, dynamic_range, black_exposure, grey_value, profile_info, lut, use_work_profile, sigma_toe, sigma_shoulder, saturation, M1, M2, M3, M4, M5, latitude_min, latitude_max, output_power, variant, type); break; } case DT_FILMIC_COLORSCIENCE_V2: case DT_FILMIC_COLORSCIENCE_V3: { o = filmic_chroma_v2_v3(i, dynamic_range, black_exposure, grey_value, profile_info, lut, use_work_profile, sigma_toe, sigma_shoulder, saturation, M1, M2, M3, M4, M5, latitude_min, latitude_max, output_power, variant, color_science, type); break; } case DT_FILMIC_COLORSCIENCE_V4: { o = filmic_chroma_v4(i, dynamic_range, black_exposure, grey_value, profile_info, lut, use_work_profile, sigma_toe, sigma_shoulder, saturation, M1, M2, M3, M4, M5, latitude_min, latitude_max, output_power, variant, color_science, type, matrix_in, matrix_out, display_black, display_white, use_output_profile, export_matrix_in, export_matrix_out, norm_min, norm_max); break; } } o.w = i.w; write_imagef(out, (int2)(x, y), o); } kernel void filmic_mask_clipped_pixels(read_only image2d_t in, write_only image2d_t out, int width, int height, const float normalize, const float feathering, write_only image2d_t is_clipped) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); if(x >= width || y >= height) return; float4 i = read_imagef(in, sampleri, (int2)(x, y)); const float4 i2 = i * i; const float pix_max = fmax(native_sqrt(i2.x + i2.y + i2.z), 0.f); const float argument = -pix_max * normalize + feathering; const float weight = clamp(1.0f / ( 1.0f + native_exp2(argument)), 0.f, 1.f); if(4.f > argument) write_imageui(is_clipped, (int2)(0, 0), 1); write_imagef(out, (int2)(x, y), weight); } kernel void filmic_show_mask(read_only image2d_t in, write_only image2d_t out, const int width, const int height) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); if(x >= width || y >= height) return; const float i = (read_imagef(in, sampleri, (int2)(x, y))).x; write_imagef(out, (int2)(x, y), (float4){i, i, i, 1.0f}); } kernel void filmic_inpaint_noise(read_only image2d_t in, read_only image2d_t mask, write_only image2d_t out, const int width, const int height, const float noise_level, const float threshold, const dt_noise_distribution_t noise_distribution) { const unsigned int x = get_global_id(0); const unsigned int y = get_global_id(1); if(x >= width || y >= height) return; // Init random number generator unsigned int state[4] = { splitmix32(x + 1), splitmix32((x + 1) * (y + 3)), splitmix32(1337), splitmix32(666) }; xoshiro128plus(state); xoshiro128plus(state); xoshiro128plus(state); xoshiro128plus(state); // create noise const float4 i = read_imagef(in, sampleri, (int2)(x, y)); const float4 sigma = i * noise_level / threshold; const float4 noise = dt_noise_generator_simd(noise_distribution, i, sigma, state); const float weight = (read_imagef(mask, sampleri, (int2)(x, y))).x; const float4 o = fmax(i * (1.0f - weight) + weight * noise, 0.f); write_imagef(out, (int2)(x, y), o); } kernel void init_reconstruct(read_only image2d_t in, read_only image2d_t mask, write_only image2d_t out, const int width, const int height) { // init the reconstructed buffer with non-clipped and partially clipped pixels // Note : it's a simple multiplied alpha blending where mask = alpha weight const int x = get_global_id(0); const int y = get_global_id(1); if(x >= width || y >= height) return; const float4 i = read_imagef(in, sampleri, (int2)(x, y)); const float4 weight = 1.f - (read_imagef(mask, sampleri, (int2)(x, y))).x; float4 o = fmax(i * weight, 0.f); // copy masks and alpha o.w = i.w; write_imagef(out, (int2)(x, y), o); } static inline float fmaxabsf(const float a, const float b) { // Find the max in absolute value and return it with its sign return (fabs(a) > fabs(b) && !isnan(a)) ? a : (isnan(b)) ? 0.f : b; } static inline float fminabsf(const float a, const float b) { // Find the min in absolute value and return it with its sign return (fabs(a) < fabs(b) && !isnan(a)) ? a : (isnan(b)) ? 0.f : b; } kernel void wavelets_reconstruct(read_only image2d_t HF, read_only image2d_t LF, read_only image2d_t texture, read_only image2d_t mask, read_only image2d_t reconstructed_read, write_only image2d_t reconstructed_write, const int width, const int height, const float gamma, const float gamma_comp, const float beta, const float beta_comp, const float delta, const int s, const int scales, const dt_iop_filmicrgb_reconstruction_type_t variant) { /* * we pack the ratios and RGB methods in the same kernels since they differ by 2 lines * and avoiding kernels proliferation is a good thing since each kernel creates overhead * when initialized */ const int x = get_global_id(0); const int y = get_global_id(1); if(x >= width || y >= height) return; const float alpha = read_imagef(mask, sampleri, (int2)(x, y)).x; const float4 HF_c = read_imagef(HF, sampleri, (int2)(x, y)); const float4 LF_c = read_imagef(LF, sampleri, (int2)(x, y)); const float4 TT_c = read_imagef(texture, sampleri, (int2)(x, y)); float4 details; float4 residual; switch(variant) { case(DT_FILMIC_RECONSTRUCT_RGB): { const float grey_texture = fmaxabsf(fmaxabsf(TT_c.x, TT_c.y), TT_c.z); const float grey_details = (HF_c.x + HF_c.y + HF_c.z) / 3.f; const float grey_HF = beta_comp * (gamma_comp * grey_details + gamma * grey_texture); const float grey_residual = beta_comp * (LF_c.x + LF_c.y + LF_c.z) / 3.f; details = (gamma_comp * HF_c + gamma * TT_c) * beta + grey_HF; residual = (s == scales - 1) ? grey_residual + LF_c * beta : (float4)0.f; break; } case(DT_FILMIC_RECONSTRUCT_RATIOS): { const float grey_texture = fmaxabsf(fmaxabsf(TT_c.x, TT_c.y), TT_c.z); const float grey_details = (HF_c.x + HF_c.y + HF_c.z) / 3.f; const float grey_HF = (gamma_comp * grey_details + gamma * grey_texture); details = 0.5f * ((gamma_comp * HF_c + gamma * TT_c) + grey_HF); residual = (s == scales - 1) ? LF_c : (float4)0.f; break; } } const float4 i = read_imagef(reconstructed_read, sampleri, (int2)(x, y)); const float4 o = i + alpha * (delta * details + residual); write_imagef(reconstructed_write, (int2)(x, y), o); } kernel void compute_ratios(read_only image2d_t in, write_only image2d_t norms, write_only image2d_t ratios, const dt_iop_filmicrgb_methods_type_t variant, const int width, const int height) { const int x = get_global_id(0); const int y = get_global_id(1); if(x >= width || y >= height) return; const float4 i = read_imagef(in, sampleri, (int2)(x, y)); const float norm = fmax(pixel_rgb_norm_euclidean(i), NORM_MIN); const float4 ratio = i / norm; write_imagef(norms, (int2)(x, y), norm); write_imagef(ratios, (int2)(x, y), ratio); } kernel void restore_ratios(read_only image2d_t ratios, read_only image2d_t norms, write_only image2d_t out, const int width, const int height) { const int x = get_global_id(0); const int y = get_global_id(1); if(x >= width || y >= height) return; const float4 ratio = read_imagef(ratios, sampleri, (int2)(x, y)); const float norm = read_imagef(norms, sampleri, (int2)(x, y)).x; const float4 o = clamp(ratio, 0.f, 1.f) * norm; write_imagef(out, (int2)(x, y), o); }