Skip to content

Instantly share code, notes, and snippets.

@asilichenko
Created October 26, 2024 14:54
Show Gist options
  • Save asilichenko/7428a554a1cb23777498c4c622835ec2 to your computer and use it in GitHub Desktop.
Save asilichenko/7428a554a1cb23777498c4c622835ec2 to your computer and use it in GitHub Desktop.
How to use CUDA const array in Python
import random
from numba import cuda
import numpy as np
import cupy as cp
H_DATA = np.random.uniform(0, 40, 128).astype(np.float32)
@cuda.jit
def kernel(input_data, output_data):
thread_id = cuda.grid(1)
if thread_id >= output_data.shape[0]:
return
data = cuda.const.array_like(H_DATA) # const data inside the device. copied from the host during compilation
input_value = input_data[thread_id]
data_value = data[thread_id % data.shape[0]]
output_data[thread_id] = input_value * data_value
def main():
data_size: int = 1_000_000
d_input_data = cp.array(np.random.randint(0, 40, size=data_size, dtype=np.uint8))
d_output_data = cp.array(np.zeros(shape=data_size, dtype=np.float32))
block_size: int = 256
grid_size: int = (data_size + (block_size - 1)) // block_size
kernel[grid_size, block_size](d_input_data, d_output_data)
cuda.synchronize()
h_output_data = d_output_data.get()
print(h_output_data)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment