From 8f2e9ef74524eb6257ca9414b1e8b0e00f32b674 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Thu, 22 Feb 2018 14:51:52 -0800 Subject: [PATCH 1/4] Refactor IBA::resample() More cleanly separate the deep and non-deep cases. --- src/libOpenImageIO/imagebufalgo_xform.cpp | 110 ++++++++++++++-------- 1 file changed, 72 insertions(+), 38 deletions(-) diff --git a/src/libOpenImageIO/imagebufalgo_xform.cpp b/src/libOpenImageIO/imagebufalgo_xform.cpp index 91ffc0637c..eeab73e360 100644 --- a/src/libOpenImageIO/imagebufalgo_xform.cpp +++ b/src/libOpenImageIO/imagebufalgo_xform.cpp @@ -501,12 +501,11 @@ static bool resample_ (ImageBuf &dst, const ImageBuf &src, bool interpolate, ROI roi, int nthreads) { + ASSERT (!src.deep() && !dst.deep()); ImageBufAlgo::parallel_image (roi, nthreads, [&](ROI roi){ const ImageSpec &srcspec (src.spec()); const ImageSpec &dstspec (dst.spec()); int nchannels = src.nchannels(); - bool deep = src.deep(); - ASSERT (deep == dst.deep()); // Local copies of the source image window, converted to float float srcfx = srcspec.full_x; @@ -535,21 +534,7 @@ resample_ (ImageBuf &dst, const ImageBuf &src, bool interpolate, float s = (x-dstfx+0.5f)*dstpixelwidth; float src_xf = srcfx + s * srcfw; int src_x = ifloor (src_xf); - if (deep) { - srcpel.pos (src_x, src_y, 0); - int nsamps = srcpel.deep_samples(); - ASSERT (nsamps == out.deep_samples()); - if (! nsamps) - continue; - for (int c = 0; c < nchannels; ++c) { - if (dstspec.channelformat(c) == TypeDesc::UINT32) - for (int samp = 0; samp < nsamps; ++samp) - out.set_deep_value (c, samp, srcpel.deep_value_uint(c, samp)); - else - for (int samp = 0; samp < nsamps; ++samp) - out.set_deep_value (c, samp, srcpel.deep_value(c, samp)); - } - } else if (interpolate) { + if (interpolate) { // Non-deep image, bilinearly interpolate src.interppixel (src_xf, src_yf, pel); for (int c = roi.chbegin; c < roi.chend; ++c) @@ -568,6 +553,75 @@ resample_ (ImageBuf &dst, const ImageBuf &src, bool interpolate, +static bool +resample_deep (ImageBuf &dst, const ImageBuf &src, bool interpolate, + ROI roi, int nthreads) +{ + ASSERT (src.deep() && dst.deep()); + + // If it's deep, figure out the sample allocations first, because + // it's not thread-safe to do that simultaneously with copying the + // values. + const ImageSpec &srcspec (src.spec()); + const ImageSpec &dstspec (dst.spec()); + float srcfx = srcspec.full_x; + float srcfy = srcspec.full_y; + float srcfw = srcspec.full_width; + float srcfh = srcspec.full_height; + float dstfx = dstspec.full_x; + float dstfy = dstspec.full_y; + float dstfw = dstspec.full_width; + float dstfh = dstspec.full_height; + float dstpixelwidth = 1.0f / dstfw; + float dstpixelheight = 1.0f / dstfh; + ImageBuf::ConstIterator srcpel (src, roi); + ImageBuf::Iterator dstpel (dst, roi); + for ( ; !dstpel.done(); ++dstpel, ++srcpel) { + float s = (dstpel.x()-dstspec.full_x+0.5f)*dstpixelwidth; + float t = (dstpel.y()-dstspec.full_y+0.5f)*dstpixelheight; + int src_y = ifloor (srcfy + t * srcfh); + int src_x = ifloor (srcfx + s * srcfw); + srcpel.pos (src_x, src_y, 0); + dstpel.set_deep_samples (srcpel.deep_samples ()); + } + + ImageBufAlgo::parallel_image (roi, nthreads, [=,&dst,&src](ROI roi){ + int nchannels = src.nchannels(); + const ImageSpec &dstspec (dst.spec()); + ImageBuf::Iterator out (dst, roi); + ImageBuf::ConstIterator srcpel (src); + for (int y = roi.ybegin; y < roi.yend; ++y) { + // s,t are NDC space + float t = (y-dstfy+0.5f)*dstpixelheight; + // src_xf, src_xf are image space float coordinates + float src_yf = srcfy + t * srcfh; + // src_x, src_y are image space integer coordinates of the floor + int src_y = ifloor (src_yf); + for (int x = roi.xbegin; x < roi.xend; ++x, ++out) { + float s = (x-dstfx+0.5f)*dstpixelwidth; + float src_xf = srcfx + s * srcfw; + int src_x = ifloor (src_xf); + srcpel.pos (src_x, src_y, 0); + int nsamps = srcpel.deep_samples(); + ASSERT (nsamps == out.deep_samples()); + if (! nsamps) + continue; + for (int c = 0; c < nchannels; ++c) { + if (dstspec.channelformat(c) == TypeDesc::UINT32) + for (int samp = 0; samp < nsamps; ++samp) + out.set_deep_value (c, samp, srcpel.deep_value_uint(c, samp)); + else + for (int samp = 0; samp < nsamps; ++samp) + out.set_deep_value (c, samp, srcpel.deep_value(c, samp)); + } + } + } + }); + return true; +} + + + bool ImageBufAlgo::resample (ImageBuf &dst, const ImageBuf &src, bool interpolate, ROI roi, int nthreads) @@ -579,27 +633,7 @@ ImageBufAlgo::resample (ImageBuf &dst, const ImageBuf &src, return false; if (dst.deep()) { - // If it's deep, figure out the sample allocations first, because - // it's not thread-safe to do that simultaneously with copying the - // values. - const ImageSpec &srcspec (src.spec()); - const ImageSpec &dstspec (dst.spec()); - float srcfx = srcspec.full_x; - float srcfy = srcspec.full_y; - float srcfw = srcspec.full_width; - float srcfh = srcspec.full_height; - float dstpixelwidth = 1.0f / dstspec.full_width; - float dstpixelheight = 1.0f / dstspec.full_height; - ImageBuf::ConstIterator srcpel (src, roi); - ImageBuf::Iterator dstpel (dst, roi); - for ( ; !dstpel.done(); ++dstpel, ++srcpel) { - float s = (dstpel.x()-dstspec.full_x+0.5f)*dstpixelwidth; - float t = (dstpel.y()-dstspec.full_y+0.5f)*dstpixelheight; - int src_y = ifloor (srcfy + t * srcfh); - int src_x = ifloor (srcfx + s * srcfw); - srcpel.pos (src_x, src_y, 0); - dstpel.set_deep_samples (srcpel.deep_samples ()); - } + return resample_deep (dst, src, interpolate, roi, nthreads); } bool ok; From e8a93d84216726269365c83a4c70752a1d4278c7 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Fri, 2 Mar 2018 21:02:44 -0800 Subject: [PATCH 2/4] Update test --- src/libOpenImageIO/imagebufalgo_test.cpp | 79 ++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/libOpenImageIO/imagebufalgo_test.cpp b/src/libOpenImageIO/imagebufalgo_test.cpp index 67cbbaf982..5294bdcdea 100644 --- a/src/libOpenImageIO/imagebufalgo_test.cpp +++ b/src/libOpenImageIO/imagebufalgo_test.cpp @@ -55,6 +55,51 @@ static bool verbose = false; static bool wedge = false; static int threadcounts[] = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 64, 128, 1024, 1<<30 }; +namespace { + +// Some pre-made specs and buffers -- make these once to unclutter the +// test functions themselves. +ImageSpec spec_2x2_f (2, 2, 1, TypeFloat); +ImageSpec spec_2x2_rgb_f (2, 2, 3, TypeFloat); +ImageSpec spec_1k_rgb_f (1024, 1024, 3, TypeFloat); +ImageSpec spec_1k_rgb_u8 (1024, 1024, 3, TypeUInt8); +ImageSpec spec_1k_rgb_u16 (1024, 1024, 3, TypeUInt16); +ImageSpec spec_1k_rgba_f (1024, 1024, 4, TypeFloat); +ImageSpec spec_1k_rgba_h (1024, 1024, 4, TypeHalf); +ImageSpec spec_1k_rgba_u8 (1024, 1024, 4, TypeUInt8); +ImageSpec spec_1k_rgba_u16 (1024, 1024, 4, TypeUInt16); +ImageSpec spec_hd_rgb_f (1920, 1080, 3, TypeFloat); +ImageSpec spec_hd_rgb_h (1920, 1080, 3, TypeHalf); +ImageSpec spec_hd_rgb_u8 (1920, 1080, 3, TypeUInt8); +ImageSpec spec_hd_rgb_u16 (1920, 1080, 3, TypeUInt16); +ImageSpec spec_hd_rgba_f (1920, 1080, 4, TypeFloat); +ImageSpec spec_hd_rgba_h (1920, 1080, 4, TypeHalf); +ImageSpec spec_hd_rgba_u8 (1920, 1080, 4, TypeUInt8); +ImageSpec spec_hd_rgba_u16 (1920, 1080, 4, TypeUInt16); + +ImageBuf buf_2x2_f (spec_2x2_f); +ImageBuf buf_2x2_rgb (spec_2x2_rgb_f); +ImageBuf buf_1k_rgb_f (spec_1k_rgb_f); +ImageBuf buf_1k_rgb_u8 (spec_1k_rgb_u8); +ImageBuf buf_1k_rgb_u16 (spec_1k_rgb_u16); +ImageBuf buf_1k_rgba_f (spec_1k_rgba_f); +ImageBuf buf_1k_rgba_h (spec_1k_rgba_h); +ImageBuf buf_1k_rgba_u8 (spec_1k_rgba_u8); +ImageBuf buf_1k_rgba_u16(spec_1k_rgba_u16); +ImageBuf buf_hd_rgb_f (spec_hd_rgb_f); +ImageBuf buf_hd_rgb_h (spec_hd_rgb_h); +ImageBuf buf_hd_rgb_u8 (spec_hd_rgb_u8); +ImageBuf buf_hd_rgb_u16 (spec_hd_rgb_u16); +ImageBuf buf_hd_rgba_f (spec_hd_rgba_f); +ImageBuf buf_hd_rgba_h (spec_hd_rgba_h); +ImageBuf buf_hd_rgba_u8 (spec_hd_rgba_u8); +ImageBuf buf_hd_rgba_u16(spec_hd_rgba_u16); + +// Some colors +float red_rgba[] = { 1.0, 0.0, 0.0, 1.0 }; +} + + static void getargs (int argc, char *argv[]) @@ -559,6 +604,39 @@ void test_over () +// Test ImageBuf::resample +void test_resample () +{ + std::cout << "test resample\n"; + + // Timing + Benchmarker bench; + ImageBufAlgo::fill (buf_hd_rgba_f, red_rgba); + ImageBufAlgo::fill (buf_hd_rgba_u8, red_rgba); + ImageBuf smallf (ImageSpec (1024, 512, 4, TypeFloat)); + ImageBuf smallu8 (ImageSpec (1024, 512, 4, TypeUInt8)); + bench (" IBA::resize 2k->1k rgba f->f interp ", [&](){ + ImageBufAlgo::resample (smallf, buf_hd_rgba_f, true); + }); + bench (" IBA::resize 2k->1k rgba f->u8 interp ", [&](){ + ImageBufAlgo::resample (smallu8, buf_hd_rgba_f, true); + }); + bench (" IBA::resize 2k->1k rgba u8->u8 interp ", [&](){ + ImageBufAlgo::resample (smallu8, buf_hd_rgba_u8, true); + }); + bench (" IBA::resize 2k->1k rgba f->f no interp ", [&](){ + ImageBufAlgo::resample (smallf, buf_hd_rgba_f, false); + }); + bench (" IBA::resize 2k->1k rgba f->u8 no interp ", [&](){ + ImageBufAlgo::resample (smallu8, buf_hd_rgba_f, false); + }); + bench (" IBA::resize 2k->1k rgba u8->u8 no interp ", [&](){ + ImageBufAlgo::resample (smallu8, buf_hd_rgba_u8, false); + }); +} + + + // Tests ImageBufAlgo::compare void test_compare () { @@ -941,6 +1019,7 @@ main (int argc, char **argv) test_mul (); test_mad (); test_over (); + test_resample (); test_compare (); test_isConstantColor (); test_isConstantChannel (); From b8d525a12752b8e0f9830c543468d567970e1031 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Fri, 2 Mar 2018 21:02:55 -0800 Subject: [PATCH 3/4] try to speed up more --- src/libOpenImageIO/imagebufalgo_xform.cpp | 48 ++++++++++++++--------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/libOpenImageIO/imagebufalgo_xform.cpp b/src/libOpenImageIO/imagebufalgo_xform.cpp index eeab73e360..a712b5d1a3 100644 --- a/src/libOpenImageIO/imagebufalgo_xform.cpp +++ b/src/libOpenImageIO/imagebufalgo_xform.cpp @@ -502,10 +502,9 @@ resample_ (ImageBuf &dst, const ImageBuf &src, bool interpolate, ROI roi, int nthreads) { ASSERT (!src.deep() && !dst.deep()); - ImageBufAlgo::parallel_image (roi, nthreads, [&](ROI roi){ + ImageBufAlgo::parallel_image (roi, nthreads, [&,interpolate](ROI roi){ const ImageSpec &srcspec (src.spec()); const ImageSpec &dstspec (dst.spec()); - int nchannels = src.nchannels(); // Local copies of the source image window, converted to float float srcfx = srcspec.full_x; @@ -519,27 +518,40 @@ resample_ (ImageBuf &dst, const ImageBuf &src, bool interpolate, float dstfh = dstspec.full_height; float dstpixelwidth = 1.0f / dstfw; float dstpixelheight = 1.0f / dstfh; - float *pel = ALLOCA (float, nchannels); - ImageBuf::Iterator out (dst, roi); - ImageBuf::ConstIterator srcpel (src); - for (int y = roi.ybegin; y < roi.yend; ++y) { - // s,t are NDC space - float t = (y-dstfy+0.5f)*dstpixelheight; - // src_xf, src_xf are image space float coordinates - float src_yf = srcfy + t * srcfh; - // src_x, src_y are image space integer coordinates of the floor - int src_y = ifloor (src_yf); - for (int x = roi.xbegin; x < roi.xend; ++x, ++out) { - float s = (x-dstfx+0.5f)*dstpixelwidth; - float src_xf = srcfx + s * srcfw; - int src_x = ifloor (src_xf); - if (interpolate) { + if (interpolate) { + int nchannels = src.nchannels(); + float *pel = ALLOCA (float, nchannels); + ImageBuf::Iterator out (dst, roi); + for (int y = roi.ybegin; y < roi.yend; ++y) { + // s,t are NDC space + float t = (y-dstfy+0.5f)*dstpixelheight; + // src_xf, src_xf are image space float coordinates + float src_yf = srcfy + t * srcfh; + for (int x = roi.xbegin; x < roi.xend; ++x, ++out) { + float s = (x-dstfx+0.5f)*dstpixelwidth; + float src_xf = srcfx + s * srcfw; // Non-deep image, bilinearly interpolate src.interppixel (src_xf, src_yf, pel); for (int c = roi.chbegin; c < roi.chend; ++c) out[c] = pel[c]; - } else { + } + } + + } else { // vvv NO interpolate case + ImageBuf::Iterator out (dst, roi); + ImageBuf::ConstIterator srcpel (src); + for (int y = roi.ybegin; y < roi.yend; ++y) { + // s,t are NDC space + float t = (y-dstfy+0.5f)*dstpixelheight; + // src_xf, src_xf are image space float coordinates + float src_yf = srcfy + t * srcfh; + // src_x, src_y are image space integer coordinates of the floor + int src_y = ifloor (src_yf); + for (int x = roi.xbegin; x < roi.xend; ++x, ++out) { + float s = (x-dstfx+0.5f)*dstpixelwidth; + float src_xf = srcfx + s * srcfw; + int src_x = ifloor (src_xf); // Non-deep image, just copy closest pixel srcpel.pos (src_x, src_y, 0); for (int c = roi.chbegin; c < roi.chend; ++c) From e4fc21faffc8fff59abc4de62c3ffe298e4d95aa Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Fri, 2 Mar 2018 22:01:59 -0800 Subject: [PATCH 4/4] WIP expand interppixel --- src/libOpenImageIO/imagebufalgo_xform.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/libOpenImageIO/imagebufalgo_xform.cpp b/src/libOpenImageIO/imagebufalgo_xform.cpp index a712b5d1a3..96e5d0a86a 100644 --- a/src/libOpenImageIO/imagebufalgo_xform.cpp +++ b/src/libOpenImageIO/imagebufalgo_xform.cpp @@ -522,17 +522,34 @@ resample_ (ImageBuf &dst, const ImageBuf &src, bool interpolate, if (interpolate) { int nchannels = src.nchannels(); float *pel = ALLOCA (float, nchannels); + float *localpixel = ALLOCA (float, nchannels*4); + float *p[4] = { localpixel, localpixel+nchannels, localpixel+2*nchannels, localpixel+3*nchannels }; ImageBuf::Iterator out (dst, roi); + ImageBuf::ConstIterator it (src); for (int y = roi.ybegin; y < roi.yend; ++y) { // s,t are NDC space float t = (y-dstfy+0.5f)*dstpixelheight; // src_xf, src_xf are image space float coordinates float src_yf = srcfy + t * srcfh; + float yy = src_yf - 0.5f; + int ytexel; + float yfrac = floorfrac (yy, &ytexel); for (int x = roi.xbegin; x < roi.xend; ++x, ++out) { float s = (x-dstfx+0.5f)*dstpixelwidth; float src_xf = srcfx + s * srcfw; // Non-deep image, bilinearly interpolate - src.interppixel (src_xf, src_yf, pel); + + // src.interppixel (src_xf, src_yf, pel); + + float xx = src_xf - 0.5f; + int xtexel; + float xfrac = floorfrac (xx, &xtexel); + it.rerange (xtexel, xtexel+2, ytexel, ytexel+2, 0, 1); + for (int i = 0; i < 4; ++i, ++it) + for (int c = 0; c < nchannels; ++c) + p[i][c] = it[c]; + bilerp (p[0], p[1], p[2], p[3], xfrac, yfrac, nchannels, pel); + for (int c = roi.chbegin; c < roi.chend; ++c) out[c] = pel[c]; }