Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions data/kernels/basic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3243,12 +3243,12 @@ colorzones_v3 (read_only image2d_t in,

if(x >= width || y >= height) return;

float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
float4 pixel = readpixel(in, x, y);

const float a = pixel.y;
const float b = pixel.z;
const float h = fmod(atan2(b, a) + 2.0f*M_PI_F, 2.0f*M_PI_F)/(2.0f*M_PI_F);
const float C = sqrt(b*b + a*a);
const float h = fmod(atan2(b, a) + DT_2PI_F, DT_2PI_F) / DT_2PI_F;
const float C = dt_fast_hypot(b, a);

float select = 0.0f;
float blend = 0.0f;
Expand All @@ -3264,7 +3264,7 @@ colorzones_v3 (read_only image2d_t in,
default:
case DT_IOP_COLORZONES_h:
select = h;
blend = pow(1.0f - C/128.0f, 2.0f);
blend = dtcl_pow(1.0f - C/128.0f, 2.0f);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pow(x,2.0f) -> x*x, or sqf(x) in CPU path.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in the last pr.

break;
}

Expand All @@ -3273,11 +3273,11 @@ colorzones_v3 (read_only image2d_t in,
blend *= blend; // saturation isn't as prone to artifacts:
// const float Cm = 2.0f* (blend*0.5f + (1.0f-blend)*lookup(d->lut[1], select));
const float Cm = 2.0f * lookup(table_a, select);
const float L = pixel.x * pow(2.0f, 4.0f*Lm);
const float L = pixel.x * dtcl_pow(2.0f, 4.0f*Lm);

pixel.x = L;
pixel.y = cos(2.0f*M_PI_F*(h + hm)) * Cm * C;
pixel.z = sin(2.0f*M_PI_F*(h + hm)) * Cm * C;
pixel.y = dtcl_cos(DT_2PI_F*(h + hm)) * Cm * C;
pixel.z = dtcl_sin(DT_2PI_F*(h + hm)) * Cm * C;

write_imagef (out, (int2)(x, y), pixel);
}
Expand All @@ -3297,10 +3297,10 @@ colorzones (read_only image2d_t in,

if(x >= width || y >= height) return;

float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
float4 pixel = readpixel(in, x, y);

float4 LCh;
const float normalize_C = 1.f / (128.0f * sqrt(2.f));
const float normalize_C = 1.f / (128.0f * M_SQRT2_F);

LCh = Lab_2_LCH(pixel);

Expand Down
16 changes: 8 additions & 8 deletions data/kernels/blendop.cl
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ blendif_factor_Lab(const float4 input, const float4 output,
float4 LCH_input = Lab_2_LCH(input);
float4 LCH_output = Lab_2_LCH(output);

scaled[DEVELOP_BLENDIF_C_in] = LCH_input.y / (128.0f*sqrt(2.0f)); // C scaled to 0..1
scaled[DEVELOP_BLENDIF_C_in] = LCH_input.y / (128.0f*M_SQRT2_F); // C scaled to 0..1
scaled[DEVELOP_BLENDIF_h_in] = LCH_input.z; // h scaled to 0..1

scaled[DEVELOP_BLENDIF_C_out] = LCH_output.y / (128.0f*sqrt(2.0f)); // C scaled to 0..1
scaled[DEVELOP_BLENDIF_C_out] = LCH_output.y / (128.0f*M_SQRT2_F); // C scaled to 0..1
scaled[DEVELOP_BLENDIF_h_out] = LCH_output.z; // h scaled to 0..1
}

Expand Down Expand Up @@ -1266,10 +1266,10 @@ blendop_rgb_hsl(__read_only image2d_t in_a, __read_only image2d_t in_b, __read_o
ta = RGB_2_HSV(a);
tb = RGB_2_HSV(b);
// blend color vectors of input and output
d = ta.y*cos(2.0f*M_PI_F*ta.x) * (1.0f - opacity) + tb.y*cos(2.0f*M_PI_F*tb.x) * opacity;
s = ta.y*sin(2.0f*M_PI_F*ta.x) * (1.0f - opacity) + tb.y*sin(2.0f*M_PI_F*tb.x) * opacity;
to.x = fmod(atan2(s, d)/(2.0f*M_PI_F)+1.0f, 1.0f);
to.y = sqrt(s*s + d*d);
d = ta.y*cos(DT_2PI_F*ta.x) * (1.0f - opacity) + tb.y*cos(DT_2PI_F*tb.x) * opacity;
s = ta.y*sin(DT_2PI_F*ta.x) * (1.0f - opacity) + tb.y*sin(DT_2PI_F*tb.x) * opacity;
to.x = fmod(atan2(s, d)/DT_2PI_F+1.0f, 1.0f);
to.y = dt_fast_hypot(s, d);
to.z = ta.z;
o = HSV_2_RGB(to);
break;
Expand Down Expand Up @@ -1560,12 +1560,12 @@ blendop_display_channel(__read_only image2d_t in_a, __read_only image2d_t in_b,
break;
case DT_DEV_PIXELPIPE_DISPLAY_LCH_C:
LCH = Lab_2_LCH(a);
c = clipf(LCH.y / (128.0f * sqrt(2.0f) / exp2(boost_factors[DEVELOP_BLENDIF_C_in])));
c = clipf(LCH.y / (128.0f * M_SQRT2_F / exp2(boost_factors[DEVELOP_BLENDIF_C_in])));
is_lab = 1;
break;
case (DT_DEV_PIXELPIPE_DISPLAY_LCH_C | DT_DEV_PIXELPIPE_DISPLAY_OUTPUT):
LCH = Lab_2_LCH(b);
c = clipf(LCH.y / (128.0f * sqrt(2.0f)) / exp2(boost_factors[DEVELOP_BLENDIF_C_out]));
c = clipf(LCH.y / (128.0f * M_SQRT2_F) / exp2(boost_factors[DEVELOP_BLENDIF_C_out]));
is_lab = 1;
break;
case DT_DEV_PIXELPIPE_DISPLAY_LCH_h:
Expand Down
19 changes: 4 additions & 15 deletions data/kernels/colorequal.cl
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,12 @@ typedef enum dt_iop_colorequal_channel_t

static inline float _get_satweight(const float sat, global float *weights)
{
const float isat = SATSIZE * (1.0f + clamp(sat, -1.0f, 1.0f - (1.0f / SATSIZE)));
const float isat = (float)SATSIZE * (1.0f + clamp(sat, -1.0f, 1.0f - (1.0f / SATSIZE)));
const float base = floor(isat);
const int i = (int)base;
return weights[i] + (isat - base) * (weights[i+1] - weights[i]);
}

static inline float _scharr_gradient(global float *in,
const size_t k,
const int w)
{
const float gx = 47.0f / 255.0f * (in[k-w-1] - in[k-w+1] + in[k+w-1] - in[k+w+1])
+ 162.0f / 255.0f * (in[k-1] - in[k+1]);
const float gy = 47.0f / 255.0f * (in[k-w-1] - in[k+w-1] + in[k-w+1] - in[k+w+1])
+ 162.0f / 255.0f * (in[k-w] - in[k+w]);
return dt_fast_hypot(gx, gy);
}

static inline float gamut_map_HSB(const float4 HSB, global float *gamut_LUT, const float L_white)
{
const float4 JCH = dt_UCS_HSB_to_JCH(HSB);
Expand Down Expand Up @@ -258,9 +247,9 @@ __kernel void apply_guided(global float2 *uv,
const float2 CV = { a[k].x * uv[k].x + a[k].y * uv[k].y + b[k].x,
a[k].z * uv[k].x + a[k].w * uv[k].y + b[k].y };

corrections[k].y = mix(1.0f, CV.x, _get_satweight(saturation[k] - sat_shift, weights));
corrections[k].y = 1.0f + (CV.x - 1.0f) * _get_satweight(saturation[k] - sat_shift, weights);
const float gradient_weight = 1.0f - clipf(scharr[k]);
b_corrections[k] = mix(0.0f, CV.y, gradient_weight * _get_satweight(saturation[k] - bright_shift, weights));
b_corrections[k] = CV.y * gradient_weight * _get_satweight(saturation[k] - bright_shift, weights);
}

__kernel void sample_input(__read_only image2d_t dev_in,
Expand Down Expand Up @@ -443,7 +432,7 @@ __kernel void process_data(global float2 *uv,
{
const int kk = mad24(clamp(row, 1, height - 2), width, clamp(col, 1, width - 2));

const float kscharr = fmax(0.0f, _scharr_gradient(saturation, kk, width) - 0.02f);
const float kscharr = fmax(0.0f, scharr_gradient(saturation, kk, width) - 0.02f);
Lscharr[k] = gradient_amp * kscharr * kscharr;
}

Expand Down
4 changes: 2 additions & 2 deletions data/kernels/colorharmonizer.cl
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ kernel void colorharmonizer_map(read_only image2d_t in,
float4 xyY = dt_D65_XYZ_to_xyY(XYZ_D65);
float4 JCH = xyY_to_dt_UCS_JCH(xyY, L_white);

const float hue = (JCH.z + M_PI_F) / (2.0f * M_PI_F);
const float hue = (JCH.z + M_PI_F) / DT_2PI_F;

const int idx = y * width + x;
jch_out[idx] = (float4)(JCH.x, JCH.y, hue, pix_in.w);
Expand Down Expand Up @@ -146,7 +146,7 @@ kernel void colorharmonizer_apply(write_only image2d_t out,
float4 JCH;
JCH.x = J;
JCH.y = fmax(chroma * (1.0f + corr.y * chroma_weight), 0.0f);
JCH.z = wrap_hue(hue + corr.x * effect_strength * chroma_weight) * 2.0f * M_PI_F - M_PI_F;
JCH.z = wrap_hue(hue + corr.x * effect_strength * chroma_weight) * DT_2PI_F - M_PI_F;

float4 xyY = dt_UCS_JCH_to_xyY(JCH, L_white);
float4 XYZ_D65 = dt_xyY_to_XYZ(xyY);
Expand Down
2 changes: 1 addition & 1 deletion data/kernels/colorreconstruction.cl
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ colorreconstruction_splat(
case COLORRECONSTRUCT_PRECEDENCE_HUE:
m = atan2(pixel.z, pixel.y) - params.x;
// readjust m into [-pi, +pi] interval
m = m > M_PI_F ? m - 2*M_PI_F : (m < -M_PI_F ? m + 2*M_PI_F : m);
m = m > M_PI_F ? m - DT_2PI_F : (m < -M_PI_F ? m + DT_2PI_F : m);
weight = exp(-m*m/params.y);
break;

Expand Down
16 changes: 8 additions & 8 deletions data/kernels/colorspace.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ static inline float4 Lab_2_LCH(float4 Lab)
{
float H = atan2(Lab.z, Lab.y);

H = (H > 0.0f) ? H / (2.0f*M_PI_F) : 1.0f - fabs(H) / (2.0f*M_PI_F);
H = (H > 0.0f) ? H / DT_2PI_F : 1.0f - fabs(H) / DT_2PI_F;

const float L = Lab.x;
const float C = hypot(Lab.y, Lab.z);
const float C = dt_fast_hypot(Lab.y, Lab.z);

return (float4)(L, C, H, Lab.w);
}
Expand All @@ -68,8 +68,8 @@ static inline float4 Lab_2_LCH(float4 Lab)
static inline float4 LCH_2_Lab(float4 LCH)
{
const float L = LCH.x;
const float a = cos(2.0f*M_PI_F*LCH.z) * LCH.y;
const float b = sin(2.0f*M_PI_F*LCH.z) * LCH.y;
const float a = cos(DT_2PI_F*LCH.z) * LCH.y;
const float b = sin(DT_2PI_F*LCH.z) * LCH.y;

return (float4)(L, a, b, LCH.w);
}
Expand Down Expand Up @@ -434,10 +434,10 @@ static inline float4 JzAzBz_2_XYZ(const float4 JzAzBz)

static inline float4 JzAzBz_to_JzCzhz(float4 JzAzBz)
{
const float h = atan2(JzAzBz.z, JzAzBz.y) / (2.0f * M_PI_F);
const float h = atan2(JzAzBz.z, JzAzBz.y) / DT_2PI_F;
float4 JzCzhz;
JzCzhz.x = JzAzBz.x;
JzCzhz.y = hypot(JzAzBz.y, JzAzBz.z);
JzCzhz.y = dt_fast_hypot(JzAzBz.y, JzAzBz.z);
JzCzhz.z = (h >= 0.0f) ? h : 1.0f + h;
JzCzhz.w = JzAzBz.w;
return JzCzhz;
Expand Down Expand Up @@ -561,7 +561,7 @@ static inline float4 Yrg_to_Ych(const float4 Yrg)
// -> grading RGB conversion.
const float r = Yrg.y - 0.21902143f;
const float g = Yrg.z - 0.54371398f;
const float c = hypot(g, r);
const float c = dt_fast_hypot(g, r);
const float cos_h = c != 0.f ? r / c : 1.f;
const float sin_h = c != 0.f ? g / c : 0.f;
return (float4)(Y, c, cos_h, sin_h);
Expand Down Expand Up @@ -955,7 +955,7 @@ static inline float lookup_gamut(global const float *gamut_lut, const float x)

// Linearly interpolate the value of the gamut LUT at the hue angle in radians.
// convert in LUT coordinate
const float x_test = (float)LUT_ELEM * (x + M_PI_F) / (2.f * M_PI_F);
const float x_test = (float)LUT_ELEM * (x + M_PI_F) / DT_2PI_F;

// find the 2 closest integer coordinates (next/previous)
const float x_prev = floor(x_test);
Expand Down
43 changes: 36 additions & 7 deletions data/kernels/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,26 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE


#ifndef M_PI_F
#define M_PI_F 3.14159265358979323846f // should be defined by the OpenCL compiler acc. to standard
#define M_PI_F 3.14159265358979323846f
#endif

#ifndef M_LN2f
#define M_LN2f 0.69314718055994530942f
#endif

#ifndef M_PI_2f
#define M_PI_2f 1.57079632679489661923f
#endif

#ifndef M_PI_4f
#define M_PI_4f 0.78539816339744830962f
#endif

#ifndef M_SQRT2_F
#define M_SQRT2_F 1.41421356237309504880f
#endif

#define DT_2PI_F 6.28318530717958647693f

#define LUT_ELEM 512 // gamut LUT number of elements:

Expand All @@ -55,6 +71,11 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
#define dtcl_sin(A) native_sin(A)
#define dtcl_cos(A) native_cos(A)

static inline float dt_fast_hypot(const float x, const float y)
{
return native_sqrt(x * x + y * y);
}

// Allow the compiler to convert a * b + c to fused multiply-add to use hardware acceleration
// on compatible platforms
#pragma OPENCL FP_CONTRACT ON
Expand All @@ -70,6 +91,11 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
#define dtcl_sin(A) sin(A)
#define dtcl_cos(A) cos(A)

static inline float dt_fast_hypot(const float x, const float y)
{
return hypot(x, y);
}

#pragma OPENCL FP_CONTRACT OFF
#endif

Expand All @@ -82,6 +108,15 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
m = t2; \
}

static inline float scharr_gradient(global float *in, const int k, const int w)
{
const float gx = 47.0f / 255.0f * (in[k-w-1] - in[k-w+1] + in[k+w-1] - in[k+w+1])
+ 162.0f / 255.0f * (in[k-1] - in[k+1]);
const float gy = 47.0f / 255.0f * (in[k-w-1] - in[k+w-1] + in[k-w+1] - in[k+w+1])
+ 162.0f / 255.0f * (in[k-w] - in[k+w]);
return dt_fast_hypot(gx, gy);
}

static inline int
FC(const int row, const int col, const unsigned int filters)
{
Expand Down Expand Up @@ -143,12 +178,6 @@ atomic_add_f(
#endif
}

static inline float
dt_fast_hypot(const float x, const float y)
{
return dtcl_sqrt(x * x + y * y);
}

/* we use this exp approximation to maintain full identity with cpu path */
static inline float
dt_fast_expf(const float x)
Expand Down
20 changes: 8 additions & 12 deletions data/kernels/demosaic_rcd.cl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ __kernel void rcd_populate (__read_only image2d_t in, global float *cfa, global
const int col = get_global_id(0);
const int row = get_global_id(1);
if(col >= w || row >= height) return;
const float val = scale * fmax(0.0f, readsingle(in, col, row));
const float val = scale * fmax(0.0f, Areadsingle(in, col, row));
const int color = FC(row, col, filters);

global float *rgbcol = rgb0;
Expand All @@ -51,7 +51,7 @@ __kernel void rcd_write_output (__write_only image2d_t out, global float *rgb0,
if(!(col >= border && col < w - border && row >= border && row < height - border)) return;
const int idx = mad24(row, w, col);

write_imagef(out, (int2)(col, row), (float4)(fmax(scale * rgb0[idx], 0.0f), fmax(scale * rgb1[idx], 0.0f), fmax(scale * rgb2[idx], 0.0f), 0.0f));
write_imagef(out, (int2)(col, row), fmax(0.0f, (float4)(scale * rgb0[idx], scale * rgb1[idx], scale * rgb2[idx], 0.0f)));
}

#define eps 1e-5f // Tolerance to avoid dividing by zero
Expand Down Expand Up @@ -278,8 +278,8 @@ __kernel void write_blended_dual(__read_only image2d_t high,
const int row = get_global_id(1);
if((col >= w) || (row >= height)) return;

const float4 high_val = readpixel(high, col, row);
const float4 low_val = readpixel(low, col, row);
const float4 high_val = Areadpixel(high, col, row);
const float4 low_val = Areadpixel(low, col, row);
const float4 blender = (float4)clipf(mask[mad24(row, w, col)]);
float4 data = mix(low_val, high_val, blender);
data.w = showmask ? blender.x : 0.0f;
Expand All @@ -297,8 +297,8 @@ __kernel void calc_Y0_mask(global float *mask,
if((col >= w) || (row >= height)) return;
const int idx = mad24(row, w, col);

const float4 pt = wb * fmax(0.0f, readpixel(in, col, row));
mask[idx] = dtcl_sqrt(0.33333333f * (pt.x + pt.y + pt.z));
const float4 pt = wb * fmax(0.0f, Areadpixel(in, col, row));
mask[idx] = dtcl_sqrt((pt.x + pt.y + pt.z) / 3.0f);
}

__kernel void calc_scharr_mask(global float *in, global float *out, const int w, const int height)
Expand All @@ -311,11 +311,7 @@ __kernel void calc_scharr_mask(global float *in, global float *out, const int w,
const int incol = clamp(col, 1, w - 2);
const int inrow = clamp(row, 1, height -2);
const int idx = mad24(inrow, w, incol);
const float gx = 47.0f / 255.0f * (in[idx-w-1] - in[idx-w+1] + in[idx+w-1] - in[idx+w+1])
+ 162.0f / 255.0f * (in[idx-1] - in[idx+1]);
const float gy = 47.0f / 255.0f * (in[idx-w-1] - in[idx+w-1] + in[idx-w+1] - in[idx+w+1])
+ 162.0f / 255.0f * (in[idx-w] - in[idx+w]);
const float gradient_magnitude = dt_fast_hypot(gx, gy);
const float gradient_magnitude = scharr_gradient(in, idx, w);
out[oidx] = clipf(gradient_magnitude / 16.0f);
}

Expand Down Expand Up @@ -350,7 +346,7 @@ kernel void demosaic_box3(read_only image2d_t in,
if(x >= 0 && y >= 0 && x < width && y < height)
{
const int color = fcol(y, x, filters, xtrans);
sum[color] += fmax(0.0f, read_imagef(in, sampleri, (int2)(x, y)).x);
sum[color] += fmax(0.0f, Areadsingle(in, x, y));
cnt[color] += 1.0f;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/chart/thinplate.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ int thinplate_match(const tonecurve_t *curve, // tonecurve to apply after this (
float thinplate_color_pos(const float L, const float a, const float b)
{
const float h = atan2f(b, a) + M_PI_F;
const int sector = 4.0f * h / (2.0f * M_PI_F);
const int sector = 4.0f * h / DT_2PI_F;
return 256.0 * sector + L; // C;
}

Expand Down
Loading
Loading