blob: 2c0a3e80b16e7ddbe0f63f5ed89d78ddc48ac96c [file] [log] [blame]
#include <gpuintrin.h>
#include <stdint.h>
extern __gpu_local uint32_t shared_mem[];
extern "C" __gpu_kernel void localmem_reduction(uint32_t *out) {
shared_mem[__gpu_thread_id(0)] = 2;
__gpu_sync_threads();
if (__gpu_thread_id(0) == 0) {
out[__gpu_block_id(0)] = 0;
for (uint32_t i = 0; i < __gpu_num_threads(0); i++)
out[__gpu_block_id(0)] += shared_mem[i];
}
}