Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates using the device clock for kernel timing via
# NVRTC-compiled CUDA code.
#
# ################################################################################

import platform

import numpy as np
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates cubemap texture sampling and transformation.
#
# ################################################################################

import ctypes
import sys
import time
Expand Down
7 changes: 7 additions & 0 deletions cuda_bindings/examples/0_Introduction/simpleP2P_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates peer-to-peer memory access and data transfer
# between multiple GPUs.
#
# ################################################################################

import ctypes
import platform
import sys
Expand Down
7 changes: 7 additions & 0 deletions cuda_bindings/examples/0_Introduction/simpleZeroCopy_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates vector addition using zero-copy (mapped) host
# memory, allowing the GPU to access CPU memory directly.
#
# ################################################################################

import ctypes
import math
import platform
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates system-wide atomic operations on managed memory.
#
# ################################################################################

import ctypes
import os
import sys
Expand Down
7 changes: 7 additions & 0 deletions cuda_bindings/examples/0_Introduction/vectorAddDrv_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates vector addition using the CUDA Driver API with
# unified virtual addressing.
#
# ################################################################################

import ctypes
import math
import sys
Expand Down
7 changes: 7 additions & 0 deletions cuda_bindings/examples/0_Introduction/vectorAddMMAP_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates vector addition using multi-device memory
# mapping (cuMemCreate, cuMemMap) with virtual address management.
#
# ################################################################################

import ctypes
import math
import platform
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates stream-ordered memory allocation (cudaMallocAsync
# / cudaFreeAsync) and memory pool release thresholds.
#
# ################################################################################

import ctypes
import math
import platform
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates asynchronous copy from global to shared memory
# (memcpy_async) in matrix multiplication kernels.
#
# ################################################################################

import ctypes
import math
import platform
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates CUDA Graphs for capture and replay of GPU
# workloads, including manual graph construction and stream capture.
#
# ################################################################################

import ctypes
import random as rnd

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates a conjugate gradient solver using cooperative
# groups and multi-block grid synchronization.
#
# ################################################################################

import ctypes
import math
import platform
Expand Down Expand Up @@ -350,3 +357,7 @@ def main():
if math.sqrt(dot_result_local) >= tol:
print("conjugateGradientMultiBlockCG FAILED", file=sys.stderr)
sys.exit(1)


if __name__ == "__main__":
main()
7 changes: 7 additions & 0 deletions cuda_bindings/examples/extra/isoFDModelling_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates isotropic finite-difference wave propagation
# modelling across multiple GPUs with peer-to-peer halo exchange.
#
# ################################################################################

import time

import numpy as np
Expand Down
7 changes: 7 additions & 0 deletions cuda_bindings/examples/extra/jit_program_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Copyright 2021-2025 NVIDIA Corporation. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

# ################################################################################
#
# This example demonstrates JIT compilation of CUDA kernels using NVRTC
# and the Driver API (saxpy kernel).
#
# ################################################################################

import ctypes

import numpy as np
Expand Down
6 changes: 3 additions & 3 deletions cuda_core/examples/cuda_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

# ################################################################################
#
# This demo illustrates how to use CUDA graphs to capture and execute
# multiple kernel launches with minimal overhead. The graph performs a
# sequence of vector operations: add, multiply, and subtract.
# This example demonstrates CUDA graphs to capture and execute multiple
# kernel launches with minimal overhead. The graph performs a sequence of
# vector operations: add, multiply, and subtract.
#
# ################################################################################

Expand Down
22 changes: 11 additions & 11 deletions cuda_core/examples/gl_interop_plasma.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

# ################################################################################
#
# Real-time Plasma Effect -- CUDA/OpenGL Interop with cuda.core.GraphicsResource
# This example demonstrates cuda.core.GraphicsResource for CUDA/OpenGL
# interop: a CUDA kernel writes pixels directly into an OpenGL PBO with
# zero copies through the CPU. Requires pyglet.
#
# ################################################################################
#

# What this example teaches
# =========================
# How to use cuda.core.GraphicsResource to let a CUDA kernel write pixels
Expand All @@ -18,12 +20,12 @@
# Normally, getting CUDA results onto the screen would require:
# CUDA -> CPU memory -> OpenGL (two slow copies across the PCIe bus)
#
# GraphicsResource eliminates the CPU round-trip. The pixel data stays
# GraphicsResource eliminates the CPU round-trip. The pixel data stays
# on the GPU the entire time:
#
# 1. OpenGL allocates a PBO (Pixel Buffer Object) -- a raw GPU buffer.
# 2. GraphicsResource.from_gl_buffer() registers that PBO with CUDA.
# Now both CUDA and OpenGL have access to the *same* GPU memory.
# Now both CUDA and OpenGL have access to the same GPU memory.
#
# +----------------------+ +---------------------+
# | OpenGL PBO | | GraphicsResource |
Expand All @@ -39,23 +41,21 @@
# 4. glTexSubImage2D -- OpenGL copies PBO into a texture (GPU-to-GPU)
# 5. draw -- OpenGL renders the texture to the window
#
# Why is there a copy in step 4? OpenGL can only render from a
# "texture" object, not from a raw buffer. The glTexSubImage2D step
# Why is there a copy in step 4? OpenGL can only render from a
# texture object, not from a raw buffer. The glTexSubImage2D step
# copies the PBO bytes into a texture, but this happens entirely on
# the GPU and it is very fast. The big win from GraphicsResource is
# that we never copy pixels from the CPU to the GPU and then and back.
# that we never copy pixels from the CPU to the GPU and then back.
#
# What you should see
# ===================
# A window showing smoothly animated, colorful swirling patterns (a "plasma"
# effect popular in the demoscene). The window title shows the current FPS.
# A window showing smoothly animated, colorful swirling patterns (a plasma
# effect popular in the demoscene). The window title shows the current FPS.
# Close the window or press Escape to exit.
#
# Requirements
# ============
# pip install pyglet
#
# ################################################################################

import ctypes
import sys
Expand Down
19 changes: 5 additions & 14 deletions cuda_core/examples/jit_lto_fractal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,11 @@

# ################################################################################
#
# This demo illustrates:
#
# 1. How to use the JIT LTO feature provided by the Linker class to link multiple objects together
# 2. That linking allows for libraries to modify workflows dynamically at runtime
#
# This demo mimics a relationship between a library and a user. The user's sole responsibility is to
# provide device code that generates some art. Whereas the library is responsible for all steps involved in
# setting up the device, launch configurations and arguments, as well as linking the provided device code.
#
# Two algorithms are implemented:
# 1. A Mandelbrot set
# 2. A Julia set
#
# The user can choose which algorithm to use at runtime and generate the resulting image.
# This example demonstrates the JIT LTO feature of the Linker class to link
# multiple objects together, allowing libraries to modify workflows at runtime.
# It mimics a library-user relationship: the user provides device code that
# generates art (Mandelbrot or Julia set), while the library handles device
# setup, launch config, and linking.
#
# ################################################################################

Expand Down
Loading
Loading