# L-18 MCS 572 Mon 7 Oct 2024 : gpuadd3.jl # Copied from the tutorial at # https://cuda.juliagpu.org/stable/tutorials/introduction/ using CUDA using Test function gpu_add3!(y, x) index = (blockIdx().x - 1) * blockDim().x + threadIdx().x stride = gridDim().x * blockDim().x for i = index:stride:length(y) @inbounds y[i] += x[i] end return end N = 2^20 x_d = CUDA.fill(1.0f0, N) # filled with Float32 1.0 on GPU y_d = CUDA.fill(2.0f0, N) # filled with Float32 2.0 # run with 256 threads per block numblocks = ceil(Int, N/256) @cuda threads=256 blocks=numblocks gpu_add3!(y_d, x_d) result = (@test all(Array(y_d) .== 3.0f0)) println(result)