diff --git a/Makefile b/Makefile index d8dc0c3c0c..237a0cd8ea 100644 --- a/Makefile +++ b/Makefile @@ -239,6 +239,10 @@ ifneq (${BUILD_OIIOUTIL_ONLY},) MY_CMAKE_FLAGS += -DBUILD_OIIOUTIL_ONLY:BOOL=${BUILD_OIIOUTIL_ONLY} endif +ifneq (${USE_CUDA},) +MY_CMAKE_FLAGS += -DUSE_CUDA:BOOL=${USE_CUDA} +endif + ifdef DEBUG MY_CMAKE_FLAGS += -DCMAKE_BUILD_TYPE:STRING=Debug endif @@ -510,6 +514,7 @@ help: @echo " USE_OPENCV=0 Skip anything that needs OpenCV" @echo " USE_PTEX=0 Skip anything that needs PTex" @echo " USE_FREETYPE=0 Skip anything that needs Freetype" + @echo " USE_CUDA=1 Build NVIDIA CUDA support (if found)" @echo " OIIO build-time options:" @echo " INSTALL_PREFIX=path Set installation prefix (default: ./${INSTALL_PREFIX_BRIEF})" @echo " NAMESPACE=name Override namespace base name (default: OpenImageIO)" diff --git a/site/spi/Makefile-bits-arnold b/site/spi/Makefile-bits-arnold index 8b7fa11f41..86bad3f4ee 100644 --- a/site/spi/Makefile-bits-arnold +++ b/site/spi/Makefile-bits-arnold @@ -120,6 +120,11 @@ ifeq (${SP_OS}, rhel7) -DOPENEXR_CUSTOM_LIB_DIR=/usr/lib64/OpenEXR2 endif + # CUDA customizations + MY_CMAKE_FLAGS += \ + -DCUDA_TOOLKIT_ROOT_DIR=/net/soft_scratch/apps/arnold/tools/nvidia/cuda9.1 \ + -DCUDA_HOST_COMPILER=/shots/spi/home/lib/arnold/rhel7/llvm_4.0_final/bin/clang++ + MY_CMAKE_FLAGS += \ -DOCIO_PATH=${OCIO_PATH} \ -DFIELD3D_HOME=${FIELD3D_HOME} \ diff --git a/src/cmake/compiler.cmake b/src/cmake/compiler.cmake index 33c28aac59..618807693e 100644 --- a/src/cmake/compiler.cmake +++ b/src/cmake/compiler.cmake @@ -22,6 +22,8 @@ set (CLANG_TIDY_CHECKS "-*" CACHE STRING "clang-tidy checks to perform") set (CLANG_TIDY_ARGS "" CACHE STRING "clang-tidy args") option (CLANG_TIDY_FIX "Have clang-tidy fix source" OFF) set (GLIBCXX_USE_CXX11_ABI "" CACHE STRING "For gcc, use the new C++11 library ABI (0|1)") +option (USE_CUDA "Use CUDA if found" OFF) +set (CUDA_TARGET_ARCH "sm_30" CACHE STRING "CUDA GPU architecture (e.g. sm_35)") # Figure out which compiler we're using if (CMAKE_COMPILER_IS_GNUCC) diff --git a/src/cmake/externalpackages.cmake b/src/cmake/externalpackages.cmake index 05e9bcb0a9..711549b83a 100644 --- a/src/cmake/externalpackages.cmake +++ b/src/cmake/externalpackages.cmake @@ -4,6 +4,7 @@ # When not in VERBOSE mode, try to make things as quiet as possible if (NOT VERBOSE) set (Boost_FIND_QUIETLY true) + set (CUDA_FIND_QUIETLY true) set (DCMTK_FIND_QUIETLY true) set (FFmpeg_FIND_QUIETLY true) set (Field3D_FIND_QUIETLY true) @@ -624,4 +625,27 @@ endmacro() ########################################################################### +if (USE_CUDA) + if (NOT CUDA_TOOLKIT_ROOT_DIR AND NOT $ENV{CUDA_TOOLKIT_ROOT_DIR} STREQUAL "") + set (CUDA_TOOLKIT_ROOT_DIR $ENV{CUDA_TOOLKIT_ROOT_DIR}) + endif () + if (NOT CUDA_FIND_QUIETLY) + message (STATUS "CUDA_TOOLKIT_ROOT_DIR = ${CUDA_TOOLKIT_ROOT_DIR}") + endif () + set (CUDA_PROPAGATE_HOST_FLAGS ON) + set (CUDA_VERBOSE_BUILD ${VERBOSE}) + find_package (CUDA 7.0 REQUIRED) + list (APPEND CUDA_NVCC_FLAGS ${CSTD_FLAGS} -expt-relaxed-constexpr) + set (CUDA_INCLUDE_DIR ${CUDA_TOOLKIT_ROOT_DIR}/include) + message (STATUS "CUDA version = ${CUDA_VERSION}") + if (NOT CUDA_FIND_QUIETLY) + message (STATUS "CUDA includes = ${CUDA_INCLUDE_DIR}") + message (STATUS "CUDA libraries = ${CUDA_LIBRARIES}") + message (STATUS "CUDA host compiler = ${CUDA_HOST_COMPILER}") + message (STATUS "CUDA nvcc flags = ${CUDA_NVCC_FLAGS}") + endif () +endif () + +# end Cuda +########################################################################### diff --git a/src/include/OpenImageIO/imagebuf.h b/src/include/OpenImageIO/imagebuf.h index c35b6c7092..a758e35418 100644 --- a/src/include/OpenImageIO/imagebuf.h +++ b/src/include/OpenImageIO/imagebuf.h @@ -183,6 +183,9 @@ class OIIO_API ImageBuf { /// Which type of storage is being used for the pixels? IBStorage storage () const; + /// Is the pixel memory of this ImageBuf visible to Cuda? + bool cuda_storage () const; + /// Is this ImageBuf object initialized? bool initialized () const; diff --git a/src/include/OpenImageIO/platform.h b/src/include/OpenImageIO/platform.h index e4eac911e9..20c03c6873 100644 --- a/src/include/OpenImageIO/platform.h +++ b/src/include/OpenImageIO/platform.h @@ -314,8 +314,10 @@ // OIIO_HOSTDEVICE is used to supply the function decorators needed when // compiling for CUDA devices. #ifdef __CUDACC__ +# define OIIO_HOST __host__ # define OIIO_HOSTDEVICE __host__ __device__ #else +# define OIIO_HOST # define OIIO_HOSTDEVICE #endif diff --git a/src/libOpenImageIO/CMakeLists.txt b/src/libOpenImageIO/CMakeLists.txt index 695b9bdcd2..7ba863f33d 100644 --- a/src/libOpenImageIO/CMakeLists.txt +++ b/src/libOpenImageIO/CMakeLists.txt @@ -4,6 +4,12 @@ endif () file (TO_NATIVE_PATH "${PLUGIN_SEARCH_PATH}" PLUGIN_SEARCH_PATH_NATIVE) configure_file (imageio_pvt.h.in "${CMAKE_BINARY_DIR}/include/imageio_pvt.h" @ONLY) +if (BUILDSTATIC) + set (OIIO_LIB_TYPE "STATIC") +else () + set (OIIO_LIB_TYPE "SHARED") +endif () + file (GLOB libOpenImageIO_hdrs ../include/OpenImageIO/*.h) if (NOT USE_EXTERNAL_PUGIXML) @@ -47,6 +53,7 @@ set (libOpenImageIO_srcs imageoutput.cpp iptc.cpp xmp.cpp color_ocio.cpp maketexture.cpp + imageio_cuda.cpp ../libutil/argparse.cpp ../libutil/benchmark.cpp ../libutil/errorhandler.cpp @@ -73,6 +80,13 @@ set (libOpenImageIO_srcs ${libOpenImageIO_hdrs} ) +#set (cuda_using_srcs imagebufalgo_addsub.cpp) + +if (USE_CUDA) + file (GLOB gpu_source_files "*.cu") + message (STATUS "Extra cuda files: ${gpu_source_files}") + list (APPEND libOpenImageIO_srcs ${gpu_source_files}) +endif () # If the 'EMBEDPLUGINS' option is set, we want to compile the source for # all the plugins into libOpenImageIO. @@ -100,10 +114,12 @@ endif () source_group ("libutil" REGULAR_EXPRESSION ".+/libutil/.+") source_group ("libtexture" REGULAR_EXPRESSION ".+/libtexture/.+") -if (BUILDSTATIC) - add_library (OpenImageIO STATIC ${libOpenImageIO_srcs}) +if (USE_CUDA) + add_definitions ("-DOIIO_USE_CUDA=1") + cuda_add_library (OpenImageIO ${OIIO_LIB_TYPE} ${libOpenImageIO_srcs} + OPTIONS -arch ${CUDA_TARGET_ARCH}) else () - add_library (OpenImageIO SHARED ${libOpenImageIO_srcs}) + add_library (OpenImageIO ${OIIO_LIB_TYPE} ${libOpenImageIO_srcs}) endif () # if (SANITIZE AND ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") @@ -206,7 +222,9 @@ if (EXTRA_DSO_LINK_ARGS) set_target_properties (OpenImageIO PROPERTIES LINK_FLAGS ${EXTRA_DSO_LINK_ARGS}) endif() -oiio_install_targets (OpenImageIO) +oiio_install_targets (OpenImageIO +#${OIIO_CUDA_LIB} +) # Testing diff --git a/src/libOpenImageIO/imagebuf.cpp b/src/libOpenImageIO/imagebuf.cpp index 3102d1c4f6..9f6725cc93 100644 --- a/src/libOpenImageIO/imagebuf.cpp +++ b/src/libOpenImageIO/imagebuf.cpp @@ -144,6 +144,7 @@ class ImageBufImpl { void append_error (const std::string& message) const; ImageBuf::IBStorage storage () const { return m_storage; } + bool cuda_storage () const { return m_cuda_storage; } TypeDesc pixeltype () const { validate_spec (); @@ -194,6 +195,7 @@ class ImageBufImpl { m_current_subimage, m_current_miplevel); } + // Make sure the pixels are ready to read with an iterator. bool validate_pixels () const { if (m_pixels_valid) return true; @@ -259,6 +261,7 @@ class ImageBufImpl { mutable spin_mutex m_valid_mutex; mutable bool m_spec_valid; ///< Is the spec valid mutable bool m_pixels_valid; ///< Image is valid + mutable bool m_cuda_storage = false; ///< Is the pixel memory visible to Cuda? bool m_badfile; ///< File not found float m_pixelaspect; ///< Pixel aspect ratio of the image size_t m_pixel_bytes; @@ -373,6 +376,7 @@ ImageBufImpl::ImageBufImpl (const ImageBufImpl &src) // Source had the image fully in memory (no cache) if (m_storage == ImageBuf::APPBUFFER) { // Source just wrapped the client app's pixels, we do the same + m_allocated_size = src.m_localpixels ? src.spec().image_bytes() : 0; m_localpixels = src.m_localpixels; } else { // We own our pixels -- copy from source @@ -498,7 +502,28 @@ ImageBufImpl::new_pixels (size_t size, const void *data) if (m_allocated_size) free_pixels(); m_allocated_size = size; - m_pixels.reset (size ? new char [size] : nullptr); + m_cuda_storage = false; + if (size) { +#ifdef OIIO_USE_CUDA + if (OIIO::get_int_attribute("cuda") && m_spec.format == TypeFloat) { + char *cudaptr = (char *)OIIO::pvt::cuda_malloc (m_allocated_size); + if (cudaptr) { + OIIO::debug ("IB Cuda allocated %p\n", (void*)cudaptr); + m_pixels.reset (cudaptr); + m_cuda_storage = true; + } + else { + OIIO::debug ("Requested cudaMallocManaged of %s FAILED\n", + m_allocated_size); + } + } +#endif + if (! m_pixels) { + m_pixels.reset (new char [size]); + } + } else { + m_pixels.reset (); + } IB_local_mem_current += m_allocated_size; if (data && size) memcpy (m_pixels.get(), data, size); @@ -515,12 +540,21 @@ void ImageBufImpl::free_pixels () { IB_local_mem_current -= m_allocated_size; +#if OIIO_USE_CUDA + if (m_cuda_storage) { + OIIO::debug ("IB Cuda free %p\n", (void*)m_pixels.get()); + OIIO::pvt::cuda_free (m_pixels.release()); + m_cuda_storage = false; + } +#endif m_pixels.reset (); if (m_allocated_size && pvt::oiio_print_debug > 1) OIIO::debug ("IB freed %d MB, global IB memory now %d MB\n", m_allocated_size>>20, IB_local_mem_current>>20); m_allocated_size = 0; m_storage = ImageBuf::UNINITIALIZED; + m_localpixels = nullptr; + m_pixels_valid = false; } @@ -577,6 +611,14 @@ ImageBuf::storage () const +bool +ImageBuf::cuda_storage () const +{ + return impl()->cuda_storage (); +} + + + void ImageBufImpl::clear () { @@ -588,10 +630,8 @@ ImageBufImpl::clear () m_current_miplevel = -1; m_spec = ImageSpec (); m_nativespec = ImageSpec (); - m_pixels.reset (); - m_localpixels = NULL; + release_pixels (); m_spec_valid = false; - m_pixels_valid = false; m_badfile = false; m_pixelaspect = 1; m_pixel_bytes = 0; @@ -700,8 +740,6 @@ ImageBufImpl::realloc () m_channel_bytes = m_spec.format.size(); m_blackpixel.resize (round_to_multiple (m_pixel_bytes, OIIO_SIMD_MAX_SIZE_BYTES), 0); // NB make it big enough for SSE - if (m_allocated_size) - m_pixels_valid = true; if (m_spec.deep) { m_deepdata.init (m_spec); m_storage = ImageBuf::LOCALBUFFER; @@ -2440,5 +2478,4 @@ ImageBuf::retile (int x, int y, int z, ImageCache::Tile* &tile, } - OIIO_NAMESPACE_END diff --git a/src/libOpenImageIO/imagebufalgo_addsub.cpp b/src/libOpenImageIO/imagebufalgo_addsub.cpp index d3f1e703b0..2eccaaeea0 100644 --- a/src/libOpenImageIO/imagebufalgo_addsub.cpp +++ b/src/libOpenImageIO/imagebufalgo_addsub.cpp @@ -44,6 +44,7 @@ #include #include #include "imageio_pvt.h" +#include "imagebufalgo_cuda.h" @@ -121,6 +122,15 @@ ImageBufAlgo::add (ImageBuf &dst, Image_or_Const A_, Image_or_Const B_, return false; ROI origroi = roi; roi.chend = std::min (roi.chend, std::min (A.nchannels(), B.nchannels())); + +#ifdef OIIO_USE_CUDA + if (dst.cuda_storage() && A.cuda_storage() && B.cuda_storage() && + dst.roi() == roi && A.roi() == roi && B.roi() == roi) { + return pvt::add_impl_cuda (dst, A, B, dst.roi()); + } +// make >/dev/null && OPENIMAGEIO_LOG_TIMES=2 oiiotool -cuda -frames 1-1 -pattern fill:topleft=0,0,0:topright=0.5,0,0:bottomleft=0,0.5,0:bottomright=0.5,0.5,0.5 1920x1080 3 -pattern fill:topleft=0.5,0,0:topright=0,0.5,0:bottomleft=0.5,0.5,0.5:bottomright=0,.5,.5 1920x1080 3 -add -o out.exr +#endif + bool ok; OIIO_DISPATCH_COMMON_TYPES3 (ok, "add", add_impl, dst.spec().format, A.spec().format, B.spec().format, @@ -209,6 +219,14 @@ ImageBufAlgo::sub (ImageBuf &dst, Image_or_Const A_, Image_or_Const B_, return false; ROI origroi = roi; roi.chend = std::min (roi.chend, std::min (A.nchannels(), B.nchannels())); + +#ifdef OIIO_USE_CUDA + if (dst.cuda_storage() && A.cuda_storage() && B.cuda_storage() && + dst.roi() == roi && A.roi() == roi && B.roi() == roi) { + return pvt::sub_impl_cuda (dst, A, B, dst.roi()); + } +#endif + bool ok; OIIO_DISPATCH_COMMON_TYPES3 (ok, "sub", sub_impl, dst.spec().format, A.spec().format, B.spec().format, diff --git a/src/libOpenImageIO/imagebufalgo_cuda.cu b/src/libOpenImageIO/imagebufalgo_cuda.cu new file mode 100644 index 0000000000..a474230d3c --- /dev/null +++ b/src/libOpenImageIO/imagebufalgo_cuda.cu @@ -0,0 +1,110 @@ +/* + Copyright 2018 Larry Gritz and the other authors and contributors. + All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the software's owners nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + (This is the Modified BSD License) +*/ + +#include +#include +#include "imagebufalgo_cuda.h" + + +OIIO_NAMESPACE_BEGIN +namespace pvt { + + +__global__ +void add_cuda (float *R, const float *A, const float *B, ROI roi) +{ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + int n = int(roi.npixels()); + int nc = roi.nchannels(); + for (int p = index; p < n; p += stride) { + int i = p*nc; + for (int c = roi.chbegin; c < roi.chend; ++c) + R[i+c] = A[i+c] + B[i+c]; + } +} + + + +bool +add_impl_cuda (ImageBuf &R, const ImageBuf &A, const ImageBuf &B, + ROI roi) +{ + Timer timer; + int blockSize = 1024; + int numBlocks = (int(roi.npixels()) + blockSize - 1) / blockSize; + add_cuda<<>>((float *)R.localpixels(), + (const float *)A.localpixels(), + (const float *)B.localpixels(), roi); + cudaDeviceSynchronize(); + OIIO::debug ("Running cuda ImageBufAlgo::add, %d blocks of %d: %gms\n", + numBlocks, blockSize, timer()*1000.0f); + return true; +} + + + + +__global__ +void sub_cuda (float *R, const float *A, const float *B, ROI roi) +{ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + int n = int(roi.npixels()); + int nc = roi.nchannels(); + for (int p = index; p < n; p += stride) { + int i = p*nc; + for (int c = roi.chbegin; c < roi.chend; ++c) + R[i+c] = A[i+c] - B[i+c]; + } +} + + + +bool +sub_impl_cuda (ImageBuf &R, const ImageBuf &A, const ImageBuf &B, + ROI roi) +{ + Timer timer; + int blockSize = 1024; + int numBlocks = (int(roi.npixels()) + blockSize - 1) / blockSize; + sub_cuda<<>>((float *)R.localpixels(), + (const float *)A.localpixels(), + (const float *)B.localpixels(), roi); + cudaDeviceSynchronize(); + OIIO::debug ("Running cuda ImageBufAlgo::sub, %d blocks of %d: %gms\n", + numBlocks, blockSize, timer()*1000.0f); + return true; +} + + +} // end namespace pvt +OIIO_NAMESPACE_END + diff --git a/src/libOpenImageIO/imagebufalgo_cuda.h b/src/libOpenImageIO/imagebufalgo_cuda.h new file mode 100644 index 0000000000..e071b76bef --- /dev/null +++ b/src/libOpenImageIO/imagebufalgo_cuda.h @@ -0,0 +1,45 @@ +/* + Copyright 2018 Larry Gritz and the other authors and contributors. + All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the software's owners nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + (This is the Modified BSD License) +*/ + +#include + + + +OIIO_NAMESPACE_BEGIN +namespace pvt { + + +bool add_impl_cuda (ImageBuf &R, const ImageBuf &A, const ImageBuf &B, ROI roi); +bool sub_impl_cuda (ImageBuf &R, const ImageBuf &A, const ImageBuf &B, ROI roi); + + +} // end namespace pvt +OIIO_NAMESPACE_END + diff --git a/src/libOpenImageIO/imageio.cpp b/src/libOpenImageIO/imageio.cpp index cbd08fbe7c..f2fdf1d393 100644 --- a/src/libOpenImageIO/imageio.cpp +++ b/src/libOpenImageIO/imageio.cpp @@ -80,6 +80,7 @@ int oiio_print_debug (oiio_debug_env ? atoi(oiio_debug_env) : 0); int oiio_print_debug (oiio_debug_env ? atoi(oiio_debug_env) : 1); #endif int oiio_log_times = Strutil::from_string(Sysutil::getenv("OPENIMAGEIO_LOG_TIMES")); +atomic_int use_cuda (Strutil::from_string(Sysutil::getenv("OPENIMAGEIO_CUDA"))); } using namespace pvt; @@ -282,6 +283,11 @@ attribute (string_view name, TypeDesc type, const void *val) default_thread_pool()->resize (ot-1); return true; } + if (name == "cuda" && type == TypeDesc::TypeInt) { + use_cuda = (*(const int *)val); + return true; + } + spin_lock lock (attrib_mutex); if (name == "read_chunk" && type == TypeInt) { oiio_read_chunk = *(const int *)val; @@ -323,6 +329,11 @@ getattribute (string_view name, TypeDesc type, void *val) *(int *)val = oiio_threads; return true; } + if (name == "cuda" && type == TypeDesc::TypeInt) { + *(int *)val = openimageio_cuda(); + return true; + } + spin_lock lock (attrib_mutex); if (name == "read_chunk" && type == TypeInt) { *(int *)val = oiio_read_chunk; diff --git a/src/libOpenImageIO/imageio_cuda.cpp b/src/libOpenImageIO/imageio_cuda.cpp new file mode 100644 index 0000000000..413097da29 --- /dev/null +++ b/src/libOpenImageIO/imageio_cuda.cpp @@ -0,0 +1,174 @@ +/* + Copyright 2018 Larry Gritz and the other authors and contributors. + All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the software's owners nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + (This is the Modified BSD License) +*/ + +#include +#include +#include + +#ifdef OIIO_USE_CUDA +// #include +#include +#endif + +#include +#include +#include +#include "imageio_pvt.h" + + +OIIO_NAMESPACE_BEGIN + +// Global private data +namespace pvt { + +spin_mutex cuda_mutex; +bool cuda_supported = false; +std::string cuda_device_name; +int cuda_driver_version = 0; +int cuda_runtime_version = 0; +int cuda_compatibility = 0; +size_t cuda_total_memory = 0; + + + +#ifdef OIIO_USE_CUDA + +// This will output the proper CUDA error strings in the event that a +// CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline bool __checkCudaErrors(cudaError_t err, const char *file, const int line) +{ + if (cudaSuccess != err) { + Strutil::fprintf (stderr, "Cuda error %d (%s) at %s:%d\n", + (int)err, cudaGetErrorString(err), file, line); + } + return true; + return (err == cudaSuccess); +} + + + +static void +initialize_cuda () +{ + // Environment OPENIMAGEIO_CUDA=0 trumps everything else, turns off + // Cuda functionality. + const char *env = getenv ("OPENIMAGEIO_CUDA"); + if (env && strtol(env,NULL,10) == 0) + return; + + // if (! checkCudaErrors (cuInit (0))) + // return; + + // Get number of devices supporting CUDA + int deviceCount = 0; + if (! checkCudaErrors (cudaGetDeviceCount(&deviceCount))) { + return; + } + + OIIO::debug ("Number of Cuda devices: %d\n", deviceCount); +#if 0 + for (int dev = 0; dev < deviceCount; ++dev) { + CUdevice device; + cudaGetDevice (&device, dev); + cudaSetDevice(dev); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, dev); + cuda_device_name = deviceProp.name; + cuDriverGetVersion (&cuda_driver_version); + cudaRuntimeGetVersion (&cuda_runtime_version); + cuda_compatibility = 100 * deviceProp.major + deviceProp.minor; + cuda_total_memory = deviceProp.totalGlobalMem; + OIIO::debug ("Cuda device \"%s\": driver %s, runtime %s, Cuda compat %s\n", + cuda_device_name, cuda_driver_version, + cuda_runtime_version, cuda_compatibility); + OIIO::debug (" total mem %g MB\n", cuda_total_memory/(1024.0*1024.0)); + break; // only inventory the first Cuda device. FIXME? + } +#endif + cuda_supported = true; +} + +#endif /* defined(OIIO_USE_CUDA) */ + + + +bool +openimageio_cuda () +{ + if (! use_cuda) + return false; +#ifdef OIIO_USE_CUDA + static std::once_flag cuda_initialized; + std::call_once (cuda_initialized, initialize_cuda); +#endif + return cuda_supported; +} + + +struct cuda_force_initializer { + cuda_force_initializer() { (void) openimageio_cuda(); } +}; +cuda_force_initializer init; + + + +void* cuda_malloc (size_t size) +{ +#ifdef OIIO_USE_CUDA + if (use_cuda) { + char *cudaptr = nullptr; + checkCudaErrors (cudaMallocManaged (&cudaptr, size)); + cudaDeviceSynchronize(); + return cudaptr; + } +#endif + return malloc (size); +} + + + +void cuda_free (void *mem) +{ +#ifdef OIIO_USE_CUDA + if (use_cuda) { + cudaDeviceSynchronize(); + checkCudaErrors (cudaFree (mem)); + return; + } +#endif + return free (mem); +} + + +} // end namespace pvt + +OIIO_NAMESPACE_END diff --git a/src/libOpenImageIO/imageio_pvt.h.in b/src/libOpenImageIO/imageio_pvt.h.in index 4d2e7f1cee..9b3a565f3d 100644 --- a/src/libOpenImageIO/imageio_pvt.h.in +++ b/src/libOpenImageIO/imageio_pvt.h.in @@ -60,6 +60,14 @@ extern std::string library_list; extern int oiio_print_debug; extern int oiio_log_times; +extern atomic_int use_cuda; +extern bool cuda_supported; +extern std::string cuda_device_name; +extern int cuda_driver_version; +extern int cuda_runtime_version; +extern int cuda_compatibility; +extern size_t cuda_total_memory; + // For internal use - use error() below for a nicer interface. void seterror (string_view message); @@ -75,6 +83,13 @@ inline void error (string_view fmt, const Args&... args) { // imageio_mutex is held. For internal use only. void catalog_all_plugins (std::string searchpath); +// Is Cuda available to OpenImageIO? +bool openimageio_cuda (); + +/// Allocate unified Cuda/CPU memory +void* cuda_malloc (size_t size); +void cuda_free (void *mem); + /// Given the format, set the default quantization range. void get_default_quantize (TypeDesc format, long long &quant_min, long long &quant_max); @@ -136,6 +151,7 @@ public: } void stop () { m_timer.stop(); } void rename (string_view name) { m_name = name; } + Timer& timer () { return m_timer; } private: Timer m_timer; std::string m_name; diff --git a/src/oiiotool/oiiotool.cpp b/src/oiiotool/oiiotool.cpp index 3f80d55b8d..813acb7c4f 100644 --- a/src/oiiotool/oiiotool.cpp +++ b/src/oiiotool/oiiotool.cpp @@ -482,6 +482,27 @@ unset_autopremult (int argc, const char *argv[]) } +static int +enable_cuda (int argc, const char *argv[]) +{ + ASSERT (argc == 1); + OIIO::attribute ("cuda", 1); + int r = OIIO::get_int_attribute ("cuda"); // force initialization + if (ot.debug) + std::cout << "Enable_cuda: " << r << "\n"; + return 0; +} + + +static int +disable_cuda (int argc, const char *argv[]) +{ + ASSERT (argc == 1); + OIIO::attribute ("cuda", 0); + return 0; +} + + static int action_label (int argc, const char *argv[]) @@ -5152,6 +5173,8 @@ getargs (int argc, char *argv[]) "-a", &ot.allsubimages, "Do operations on all subimages/miplevels", "--debug", &ot.debug, "Debug mode", "--runstats", &ot.runstats, "Print runtime statistics", + "--cuda %@", &enable_cuda, NULL, "Use Cuda if available", + "--nocuda %@", &disable_cuda, NULL, "Don't use Cuda, even if available", "--info %@", set_printinfo, NULL, "Print resolution and basic info on all inputs, detailed metadata if -v is also used (options: format=xml:verbose=1)", "--echo %@ %s", do_echo, NULL, "Echo message to console (options: newline=0)", "--metamatch %s", &ot.printinfo_metamatch,