| // RUN: mlir-opt %s --launch-func-to-cuda | FileCheck %s |
| |
| module attributes {gpu.container_module} { |
| |
| // CHECK: llvm.mlir.global internal constant @[[kernel_name:.*]]("kernel\00") |
| // CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN") |
| |
| module @kernel_module attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"} { |
| gpu.func @kernel(%arg0: !llvm.float, %arg1: !llvm<"float*">) attributes {gpu.kernel} { |
| gpu.return |
| } |
| } |
| |
| llvm.func @foo() { |
| %0 = "op"() : () -> (!llvm.float) |
| %1 = "op"() : () -> (!llvm<"float*">) |
| %cst = llvm.mlir.constant(8 : index) : !llvm.i64 |
| |
| // CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]] |
| // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) |
| // CHECK: %[[cubin_ptr:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]] |
| // CHECK-SAME: -> !llvm<"i8*"> |
| // CHECK: %[[module_ptr:.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> |
| // CHECK: llvm.call @mcuModuleLoad(%[[module_ptr]], %[[cubin_ptr]]) : (!llvm<"i8**">, !llvm<"i8*">) -> !llvm.i32 |
| // CHECK: %[[func_ptr:.*]] = llvm.alloca {{.*}} x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> |
| // CHECK: llvm.call @mcuModuleGetFunction(%[[func_ptr]], {{.*}}, {{.*}}) : (!llvm<"i8**">, !llvm<"i8*">, !llvm<"i8*">) -> !llvm.i32 |
| // CHECK: llvm.call @mcuGetStreamHelper |
| // CHECK: llvm.call @mcuLaunchKernel |
| // CHECK: llvm.call @mcuStreamSynchronize |
| "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel", kernel_module = @kernel_module } |
| : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.float, !llvm<"float*">) -> () |
| |
| llvm.return |
| } |
| |
| } |