mlir/test/Integration/GPU/CUDA/alloc-host-shared.mlir - llvm-project - Git at Google

 // RUN: mlir-opt %s \
 // RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
 // RUN: | mlir-runner \
 // RUN:   --shared-libs=%mlir_cuda_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
 // RUN:   --shared-libs=%mlir_c_runner_utils \
 // RUN:   --entry-point-result=void \
 // RUN: | FileCheck %s

 // CHECK: 2000
 module attributes {gpu.container_module} {
   func.func @main() {
     %c1 = arith.constant 1 : index
     %c0 = arith.constant 0 : index
     %c1000_i32 = arith.constant 1000 : i32
     %memref = gpu.alloc  host_shared () : memref<1xi32>
     memref.store %c1000_i32, %memref[%c1] : memref<1xi32>
     gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
       %1 = memref.load %memref[%c1] : memref<1xi32>
       %2 = arith.addi %1, %1 : i32
       memref.store %2, %memref[%c1] : memref<1xi32>
       gpu.terminator
     }
     %0 = memref.load %memref[%c1] : memref<1xi32>
     vector.print %0 : i32
     return
   }
 }
	// RUN: mlir-opt %s \
	// RUN: \| mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
	// RUN: \| mlir-runner \
	// RUN: --shared-libs=%mlir_cuda_runtime \
	// RUN: --shared-libs=%mlir_runner_utils \
	// RUN: --shared-libs=%mlir_c_runner_utils \
	// RUN: --entry-point-result=void \
	// RUN: \| FileCheck %s

	// CHECK: 2000
	module attributes {gpu.container_module} {
	func.func @main() {
	%c1 = arith.constant 1 : index
	%c0 = arith.constant 0 : index
	%c1000_i32 = arith.constant 1000 : i32
	%memref = gpu.alloc host_shared () : memref<1xi32>
	memref.store %c1000_i32, %memref[%c1] : memref<1xi32>
	gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
	%1 = memref.load %memref[%c1] : memref<1xi32>
	%2 = arith.addi %1, %1 : i32
	memref.store %2, %memref[%c1] : memref<1xi32>
	gpu.terminator
	}
	%0 = memref.load %memref[%c1] : memref<1xi32>
	vector.print %0 : i32
	return
	}
	}