Skip to content

Commit 4a17029

Browse files
authored
fixes 34 fix conda package (#40)
* fixed issues with cmake and numba in conda package and successfully built on linux * fixing windows conda recipe * recompiled ptx file & small notebook updates * updated notebook to clip collar
1 parent 885f66a commit 4a17029

6 files changed

Lines changed: 868 additions & 154 deletions

File tree

conda-recipe/bld.bat

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ nvcc -ptx ^
6464
-I"%OptiX_INSTALL_DIR%\include" ^
6565
-I"%SRC_DIR%\cuda" ^
6666
--use_fast_math ^
67+
--allow-unsupported-compiler ^
6768
-o "%SRC_DIR%\rtxpy\kernel.ptx" ^
6869
"%SRC_DIR%\cuda\kernel.cu"
6970
if errorlevel 1 (
@@ -89,14 +90,96 @@ if errorlevel 1 (
8990
exit /b 1
9091
)
9192

92-
cd /d "%OTK_PYOPTIX_DIR%\optix"
93-
echo Building and installing otk-pyoptix...
94-
"%PYTHON%" -m pip install . --no-deps --no-build-isolation -vv
93+
:: Verify cmake is available (installed via conda)
94+
where cmake >nul 2>&1
9595
if errorlevel 1 (
96-
echo ERROR: Failed to install otk-pyoptix
96+
echo ERROR: cmake not found. Ensure cmake is in build requirements.
9797
exit /b 1
9898
)
99+
echo Found cmake at:
100+
where cmake
101+
102+
:: Verify C++ compiler is available (conda-build should set up VS environment)
103+
where cl >nul 2>&1
104+
if errorlevel 1 (
105+
echo.
106+
echo ERROR: C++ compiler ^(cl.exe^) not found.
107+
echo.
108+
echo Please ensure Visual Studio Build Tools are installed and activated.
109+
echo You can install them from: https://visualstudio.microsoft.com/visual-cpp-build-tools/
110+
echo.
111+
echo If already installed, run this build from a "Developer Command Prompt"
112+
echo or run vcvars64.bat before building.
113+
echo.
114+
exit /b 1
115+
)
116+
echo Found C++ compiler at:
117+
where cl
118+
119+
:: Pre-clone pybind11 without submodules to avoid FetchContent submodule update failures
120+
echo Pre-cloning pybind11 to avoid submodule issues...
121+
set "PYBIND11_DIR=%SRC_DIR%\pybind11-src"
122+
git clone --depth 1 --branch v2.13.6 https://github.com/pybind/pybind11.git "%PYBIND11_DIR%"
123+
if errorlevel 1 (
124+
echo ERROR: Failed to clone pybind11
125+
exit /b 1
126+
)
127+
128+
:: Tell CMake to use our pre-cloned pybind11 instead of fetching
129+
set "FETCHCONTENT_SOURCE_DIR_PYBIND11=%PYBIND11_DIR%"
130+
echo Using pre-cloned pybind11 at %PYBIND11_DIR%
131+
132+
pushd "%OTK_PYOPTIX_DIR%\optix"
133+
134+
:: Patch CMakeLists.txt to use our pre-cloned pybind11 and skip submodule updates
135+
echo Patching CMakeLists.txt to use local pybind11...
99136

137+
:: Convert backslashes to forward slashes for CMake
138+
set "PYBIND11_DIR_CMAKE=%PYBIND11_DIR:\=/%"
139+
140+
:: Prepend the FETCHCONTENT_SOURCE_DIR_PYBIND11 setting to CMakeLists.txt
141+
(
142+
echo set^(FETCHCONTENT_SOURCE_DIR_PYBIND11 "!PYBIND11_DIR_CMAKE!" CACHE PATH "pybind11 source" FORCE^)
143+
type CMakeLists.txt
144+
) > "%SRC_DIR%\CMakeLists_new.txt"
145+
move /y "%SRC_DIR%\CMakeLists_new.txt" CMakeLists.txt >nul
146+
147+
echo Patched CMakeLists.txt - first 2 lines:
148+
powershell -Command "Get-Content CMakeLists.txt -Head 2"
149+
150+
:: Set OptiX path for cmake/pip build process (exactly like run_gpu_test.bat)
151+
set "OPTIX_PATH=%OptiX_INSTALL_DIR%"
152+
set "CMAKE_PREFIX_PATH=%OptiX_INSTALL_DIR%;%CMAKE_PREFIX_PATH%"
153+
154+
:: Clear conda-build injected CMAKE variables that break the build
155+
set CMAKE_GENERATOR=
156+
set CMAKE_GENERATOR_PLATFORM=
157+
set CMAKE_GENERATOR_TOOLSET=
158+
159+
:: Pre-install build dependencies so we can use --no-build-isolation
160+
echo Installing build dependencies...
161+
"%PYTHON%" -m pip install setuptools wheel
162+
163+
echo Building with OptiX_INSTALL_DIR=%OptiX_INSTALL_DIR%
164+
echo FETCHCONTENT_SOURCE_DIR_PYBIND11=!FETCHCONTENT_SOURCE_DIR_PYBIND11!
165+
166+
:: Pass pybind11 source dir to CMake via CMAKE_ARGS (used by scikit-build and setuptools)
167+
set "CMAKE_ARGS=-DFETCHCONTENT_SOURCE_DIR_PYBIND11=!PYBIND11_DIR!"
168+
169+
:: Use --no-build-isolation so environment variables are visible to CMake
170+
"%PYTHON%" -m pip install . -v --no-build-isolation
171+
if errorlevel 1 (
172+
echo.
173+
echo ERROR: Failed to install otk-pyoptix
174+
echo.
175+
echo If the error mentions OptiX not found, try setting manually:
176+
echo set OptiX_INSTALL_DIR=%OptiX_INSTALL_DIR%
177+
echo set OPTIX_PATH=%OptiX_INSTALL_DIR%
178+
echo.
179+
popd
180+
exit /b 1
181+
)
182+
popd
100183
echo otk-pyoptix installed successfully
101184
echo.
102185

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
# Channels to use for build dependencies
22
channel_sources:
3-
- conda-forge,nvidia
4-
5-
# CUDA versions to build against
6-
cuda_compiler_version:
7-
- "12.6"
3+
- conda-forge
84

95
# Python versions to support
106
python:
@@ -13,7 +9,16 @@ python:
139
- "3.12"
1410
- "3.13"
1511

12+
# NumPy version (needed to avoid conda-build warning)
13+
numpy:
14+
- "1.26"
15+
1616
# NumPy version handling:
1717
# - Python 3.10-3.12: numpy 1.26 (last 1.x series)
1818
# - Python 3.13+: numpy 2.1 (required for Python 3.13 support)
1919
# Note: numpy pins are set conditionally in meta.yaml based on Python version
20+
#
21+
# Windows build notes:
22+
# - Windows builds use system CUDA Toolkit (not conda packages)
23+
# - Ensure CUDA Toolkit 12.x is installed and nvcc is in PATH
24+
# - Linux builds use conda-provided CUDA packages

conda-recipe/meta.yaml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,30 +18,34 @@ build:
1818

1919
requirements:
2020
build:
21-
- {{ compiler('c') }}
22-
- {{ compiler('cxx') }}
21+
- {{ compiler('c') }} # [linux]
22+
- {{ compiler('cxx') }} # [linux]
2323
- conda-forge::cmake
24-
- conda-forge::git
25-
- cuda-nvcc
26-
- cuda-cudart-dev
27-
- cuda-nvrtc-dev
24+
- conda-forge::git # [linux]
25+
- cuda-nvcc >=12 # [linux]
26+
- cuda-cudart-dev >=12 # [linux]
27+
- cuda-nvrtc-dev >=12 # [linux]
28+
# Windows uses system CUDA Toolkit and Visual Studio Build Tools
2829

2930
host:
3031
- python
3132
- pip
3233
- setuptools >=68
3334
- wheel
34-
- cuda-version >=12
35-
- cuda-cudart-dev
36-
- cuda-nvrtc-dev
35+
- conda-forge::cmake
36+
- conda-forge::git # [win]
37+
# Linux CUDA packages
38+
- cuda-version >=12 # [linux]
39+
- cuda-cudart-dev >=12 # [linux]
40+
- cuda-nvrtc-dev >=12 # [linux]
3741

3842
run:
3943
- python >=3.10
4044
- numpy >=1.21,<3 # [py<313]
4145
- numpy >=2.0,<3 # [py>=313]
4246
- numba >=0.56
4347
- cupy >=12.0
44-
- cuda-version >=12
48+
- cuda-version >=12 # [linux]
4549
- __cuda # [linux]
4650

4751
test:

examples/ham_radio_viewshed_analysis.ipynb

Lines changed: 673 additions & 93 deletions
Large diffs are not rendered by default.

rtxpy/kernel.ptx

Lines changed: 83 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,67 +11,89 @@
1111
.address_size 64
1212

1313
// .globl __raygen__main
14-
.const .align 8 .b8 params[24];
14+
.const .align 8 .b8 params[40];
1515

1616
.visible .entry __raygen__main()
1717
{
18+
.reg .pred %p<3>;
1819
.reg .f32 %f<10>;
19-
.reg .b32 %r<83>;
20-
.reg .b64 %rd<10>;
20+
.reg .b32 %r<117>;
21+
.reg .b64 %rd<19>;
2122

2223

2324
// begin inline asm
24-
call (%r1), _optix_get_launch_index_x, ();
25+
call (%r33), _optix_get_launch_index_x, ();
2526
// end inline asm
2627
// begin inline asm
27-
call (%r2), _optix_get_launch_index_y, ();
28+
call (%r34), _optix_get_launch_index_y, ();
2829
// end inline asm
2930
// begin inline asm
30-
call (%r3), _optix_get_launch_index_z, ();
31+
call (%r35), _optix_get_launch_index_z, ();
3132
// end inline asm
3233
// begin inline asm
33-
call (%r4), _optix_get_launch_dimension_x, ();
34+
call (%r36), _optix_get_launch_dimension_x, ();
3435
// end inline asm
3536
// begin inline asm
36-
call (%r5), _optix_get_launch_dimension_y, ();
37+
call (%r37), _optix_get_launch_dimension_y, ();
3738
// end inline asm
38-
mad.lo.s32 %r77, %r5, %r3, %r2;
39-
mad.lo.s32 %r78, %r77, %r4, %r1;
40-
ld.const.u64 %rd2, [params+8];
41-
cvta.to.global.u64 %rd3, %rd2;
42-
mul.wide.u32 %rd4, %r78, 32;
43-
add.s64 %rd5, %rd3, %rd4;
44-
ld.global.f32 %f1, [%rd5];
45-
ld.global.f32 %f2, [%rd5+4];
46-
ld.global.f32 %f3, [%rd5+8];
47-
ld.global.f32 %f7, [%rd5+12];
48-
ld.global.f32 %f4, [%rd5+16];
49-
ld.global.f32 %f5, [%rd5+20];
50-
ld.global.f32 %f6, [%rd5+24];
51-
ld.global.f32 %f8, [%rd5+28];
52-
ld.const.u64 %rd1, [params];
39+
mad.lo.s32 %r109, %r37, %r35, %r34;
40+
mad.lo.s32 %r110, %r109, %r36, %r33;
41+
cvt.u64.u32 %rd1, %r110;
42+
ld.const.u64 %rd5, [params+8];
43+
cvta.to.global.u64 %rd6, %rd5;
44+
mul.wide.u32 %rd7, %r110, 32;
45+
add.s64 %rd8, %rd6, %rd7;
46+
ld.global.f32 %f1, [%rd8];
47+
ld.global.f32 %f2, [%rd8+4];
48+
ld.global.f32 %f3, [%rd8+8];
49+
ld.global.f32 %f7, [%rd8+12];
50+
ld.global.f32 %f4, [%rd8+16];
51+
ld.global.f32 %f5, [%rd8+20];
52+
ld.global.f32 %f6, [%rd8+24];
53+
ld.global.f32 %f8, [%rd8+28];
54+
ld.const.u64 %rd4, [params];
5355
mov.f32 %f9, 0f00000000;
54-
mov.u32 %r42, 1;
55-
mov.u32 %r44, 4;
56-
mov.u32 %r76, 0;
57-
// begin inline asm
58-
call(%r6,%r7,%r8,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%r16,%r17,%r18,%r19,%r20,%r21,%r22,%r23,%r24,%r25,%r26,%r27,%r28,%r29,%r30,%r31,%r32,%r33,%r34,%r35,%r36,%r37),_optix_trace_typed_32,(%r76,%rd1,%f1,%f2,%f3,%f4,%f5,%f6,%f7,%f8,%f9,%r42,%r76,%r76,%r42,%r76,%r44,%r79,%r80,%r81,%r82,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76);
59-
// end inline asm
60-
ld.const.u64 %rd6, [params+16];
61-
cvta.to.global.u64 %rd7, %rd6;
62-
mul.wide.u32 %rd8, %r78, 16;
63-
add.s64 %rd9, %rd7, %rd8;
64-
st.global.u32 [%rd9], %r6;
65-
st.global.u32 [%rd9+4], %r7;
66-
st.global.u32 [%rd9+8], %r8;
67-
st.global.u32 [%rd9+12], %r9;
56+
mov.u32 %r74, 1;
57+
mov.u32 %r76, 6;
58+
mov.u32 %r108, 0;
59+
// begin inline asm
60+
call(%r38,%r39,%r40,%r41,%r42,%r43,%r44,%r45,%r46,%r47,%r48,%r49,%r50,%r51,%r52,%r53,%r54,%r55,%r56,%r57,%r58,%r59,%r60,%r61,%r62,%r63,%r64,%r65,%r66,%r67,%r68,%r69),_optix_trace_typed_32,(%r108,%rd4,%f1,%f2,%f3,%f4,%f5,%f6,%f7,%f8,%f9,%r74,%r108,%r108,%r74,%r108,%r76,%r111,%r112,%r113,%r114,%r115,%r116,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108);
61+
// end inline asm
62+
ld.const.u64 %rd9, [params+16];
63+
cvta.to.global.u64 %rd10, %rd9;
64+
mul.wide.u32 %rd11, %r110, 16;
65+
add.s64 %rd12, %rd10, %rd11;
66+
st.global.u32 [%rd12], %r38;
67+
st.global.u32 [%rd12+4], %r39;
68+
st.global.u32 [%rd12+8], %r40;
69+
st.global.u32 [%rd12+12], %r41;
70+
ld.const.u64 %rd2, [params+24];
71+
setp.eq.s64 %p1, %rd2, 0;
72+
@%p1 bra $L__BB0_2;
73+
74+
cvta.to.global.u64 %rd13, %rd2;
75+
shl.b64 %rd14, %rd1, 2;
76+
add.s64 %rd15, %rd13, %rd14;
77+
st.global.u32 [%rd15], %r42;
78+
79+
$L__BB0_2:
80+
ld.const.u64 %rd3, [params+32];
81+
setp.eq.s64 %p2, %rd3, 0;
82+
@%p2 bra $L__BB0_4;
83+
84+
cvta.to.global.u64 %rd16, %rd3;
85+
shl.b64 %rd17, %rd1, 2;
86+
add.s64 %rd18, %rd16, %rd17;
87+
st.global.u32 [%rd18], %r43;
88+
89+
$L__BB0_4:
6890
ret;
6991

7092
}
7193
// .globl __miss__miss
7294
.visible .entry __miss__miss()
7395
{
74-
.reg .b32 %r<9>;
96+
.reg .b32 %r<13>;
7597

7698

7799
mov.u32 %r8, 0;
@@ -92,21 +114,30 @@
92114
// begin inline asm
93115
call _optix_set_payload, (%r7, %r8);
94116
// end inline asm
117+
mov.u32 %r9, 4;
118+
mov.u32 %r12, -1;
119+
// begin inline asm
120+
call _optix_set_payload, (%r9, %r12);
121+
// end inline asm
122+
mov.u32 %r11, 5;
123+
// begin inline asm
124+
call _optix_set_payload, (%r11, %r12);
125+
// end inline asm
95126
ret;
96127

97128
}
98129
// .globl __closesthit__chit
99130
.visible .entry __closesthit__chit()
100131
{
101132
.reg .f32 %f<37>;
102-
.reg .b32 %r<14>;
133+
.reg .b32 %r<19>;
103134
.reg .b64 %rd<3>;
104135

105136

106137
// begin inline asm
107138
call (%f1), _optix_get_ray_tmax, ();
108139
// end inline asm
109-
cvt.rzi.ftz.u32.f32 %r13, %f1;
140+
cvt.rzi.ftz.u32.f32 %r18, %f1;
110141
// begin inline asm
111142
call (%rd1), _optix_get_gas_traversable_handle, ();
112143
// end inline asm
@@ -145,7 +176,7 @@
145176
mul.ftz.f32 %f33, %f24, %f31;
146177
neg.ftz.f32 %f34, %f33;
147178
mul.ftz.f32 %f35, %f31, %f27;
148-
cvt.rn.f32.u32 %f36, %r13;
179+
cvt.rn.f32.u32 %f36, %r18;
149180
mov.b32 %r6, %f36;
150181
mov.u32 %r5, 0;
151182
// begin inline asm
@@ -166,6 +197,17 @@
166197
// begin inline asm
167198
call _optix_set_payload, (%r11, %r12);
168199
// end inline asm
200+
mov.u32 %r13, 4;
201+
// begin inline asm
202+
call _optix_set_payload, (%r13, %r1);
203+
// end inline asm
204+
// begin inline asm
205+
call (%r15), _optix_read_instance_id, ();
206+
// end inline asm
207+
mov.u32 %r16, 5;
208+
// begin inline asm
209+
call _optix_set_payload, (%r16, %r15);
210+
// end inline asm
169211
ret;
170212

171213
}

run_gpu_test.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ if not exist "cuda\kernel.cu" (
182182
echo ERROR: cuda\kernel.cu not found. Are you in the rtxpy directory?
183183
exit /b 1
184184
)
185-
nvcc -ptx -arch=sm_%GPU_ARCH% -I"%OptiX_INSTALL_DIR%\include" -Icuda --use_fast_math -o rtxpy\kernel.ptx cuda\kernel.cu
185+
nvcc -ptx -arch=sm_%GPU_ARCH% -I"%OptiX_INSTALL_DIR%\include" -Icuda --use_fast_math -allow-unsupported-compiler -o rtxpy\kernel.ptx cuda\kernel.cu
186186
if errorlevel 1 (
187187
echo ERROR: PTX compilation failed
188188
exit /b 1

0 commit comments

Comments
 (0)