blob: 1a009b7dfa30d5d3f9ee34df4843829c7857c772 [file] [log] [blame] [edit]
# RUN: %PYTHON %s | FileCheck %s
from mlir.ir import *
import mlir.ir as ir
from mlir.dialects import gpu, func, arith, math
from mlir.extras import types as T
import mlir.dialects.gpu.passes
from mlir.passmanager import *
def run(f):
print("\nTEST:", f.__name__)
with Context(), Location.unknown():
f()
return f
# CHECK-LABEL: testGPUPass
# CHECK: SUCCESS
@run
def testGPUPass():
PassManager.parse("any(gpu-kernel-outlining)")
print("SUCCESS")
# CHECK-LABEL: testMMAElementWiseAttr
@run
def testMMAElementWiseAttr():
module = Module.create()
with InsertionPoint(module.body):
gpu.BlockDimOp(gpu.Dimension.y)
# CHECK: %block_dim_y = gpu.block_dim y
print(module)
pass
# CHECK-LABEL: testObjectAttr
@run
def testObjectAttr():
target = Attribute.parse("#nvvm.target")
format = gpu.CompilationTarget.Fatbin
object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
properties = DictAttr.get({"O": IntegerAttr.get(IntegerType.get_signless(32), 2)})
o = gpu.ObjectAttr.get(target, format, object, properties)
# CHECK: #gpu.object<#nvvm.target, properties = {O = 2 : i32}, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
print(o)
assert o.object == object
o = gpu.ObjectAttr.get(target, format, object)
# CHECK: #gpu.object<#nvvm.target, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
print(o)
object = (
b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
)
o = gpu.ObjectAttr.get(target, format, object)
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
print(o)
assert o.object == object
object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
kernelTable = Attribute.parse(
'#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
)
o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
# CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
print(o)
assert o.kernels == kernelTable
# CHECK-LABEL: testGPUFuncOp
@run
def testGPUFuncOp():
assert gpu.GPUFuncOp.__doc__ is not None
module = Module.create()
with InsertionPoint(module.body):
gpu_module_name = StringAttr.get("gpu_module")
gpumodule = gpu.GPUModuleOp(gpu_module_name)
block = gpumodule.bodyRegion.blocks.append()
def builder(func: gpu.GPUFuncOp) -> None:
gpu.GlobalIdOp(gpu.Dimension.x)
gpu.ReturnOp([])
with InsertionPoint(block):
name = StringAttr.get("kernel0")
func_type = ir.FunctionType.get(inputs=[], results=[])
type_attr = TypeAttr.get(func_type)
func = gpu.GPUFuncOp(type_attr, name)
func.attributes["sym_name"] = name
func.attributes["gpu.kernel"] = UnitAttr.get()
try:
func.entry_block
assert False, "Expected RuntimeError"
except RuntimeError as e:
assert (
str(e)
== "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?"
)
block = func.add_entry_block()
with InsertionPoint(block):
builder(func)
try:
func.add_entry_block()
assert False, "Expected RuntimeError"
except RuntimeError as e:
assert str(e) == "Entry block already exists for kernel0"
func = gpu.GPUFuncOp(
func_type,
sym_name="kernel1",
kernel=True,
body_builder=builder,
known_block_size=[1, 2, 3],
known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]),
)
assert func.name.value == "kernel1"
assert func.function_type.value == func_type
assert func.arg_attrs == None
assert func.res_attrs == None
assert func.arguments == []
assert func.entry_block == func.body.blocks[0]
assert func.is_kernel
assert func.known_block_size == DenseI32ArrayAttr.get(
[1, 2, 3]
), func.known_block_size
assert func.known_grid_size == DenseI32ArrayAttr.get(
[4, 5, 6]
), func.known_grid_size
func = gpu.GPUFuncOp(
ir.FunctionType.get(inputs=[T.index()], results=[]),
sym_name="non_kernel_func",
body_builder=builder,
arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}],
)
assert not func.is_kernel
assert func.known_block_size is None
assert func.known_grid_size is None
print(module)
# CHECK: gpu.module @gpu_module
# CHECK: gpu.func @kernel0() kernel {
# CHECK: %[[VAL_0:.*]] = gpu.global_id x
# CHECK: gpu.return
# CHECK: }
# CHECK: gpu.func @kernel1() kernel attributes
# CHECK-SAME: known_block_size = array<i32: 1, 2, 3>
# CHECK-SAME: known_grid_size = array<i32: 4, 5, 6>
# CHECK: %[[VAL_0:.*]] = gpu.global_id x
# CHECK: gpu.return
# CHECK: }
# CHECK: gpu.func @non_kernel_func(
# CHECK-SAME: %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) {
# CHECK: %[[GLOBAL_ID_0:.*]] = gpu.global_id x
# CHECK: gpu.return
# CHECK: }
# CHECK-LABEL: testGPULaunchFuncOp
@run
def testGPULaunchFuncOp():
module = Module.create()
module.operation.attributes["gpu.container_module"] = UnitAttr.get()
with InsertionPoint(module.body):
gpu_module = gpu.GPUModuleOp("gpu_module")
block = gpu_module.bodyRegion.blocks.append()
with InsertionPoint(block):
gpu_func = gpu.GPUFuncOp(
FunctionType.get([], []),
"kernel",
body_builder=lambda func: gpu.return_([]),
kernel=True,
)
with InsertionPoint(module.body):
host = func.FuncOp(type=FunctionType.get([], []), name="host")
with InsertionPoint(host.add_entry_block()):
c1 = arith.constant(T.index(), 1)
grid_sizes = (1, 1, 1)
block_sizes = (1, 1, 1)
token = gpu.wait()
token = gpu.launch_func(
async_dependencies=[token],
kernel=[gpu_module.sym_name.value, gpu_func.name.value],
grid_size=grid_sizes,
block_size=block_sizes,
kernel_operands=[],
)
gpu.wait(async_dependencies=[token])
func.ReturnOp([])
print(module)
# CHECK-LABEL: gpu.module @gpu_module {
# CHECK: gpu.func @kernel() kernel {
# CHECK: gpu.return
# CHECK: }
# CHECK: }
# CHECK-LABEL: func.func @host() {
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
# CHECK: %[[WAIT_0:.*]] = gpu.wait async
# CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index
# CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
# CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index
# CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index
# CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index
# CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
# CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
# CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]]
# CHECK: return
# CHECK: }
# CHECK-LABEL: testGPULaunchOp
@run
def testGPULaunchOp():
module = Module.create()
with InsertionPoint(module.body):
host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf")
entry_block = host.add_entry_block()
with InsertionPoint(entry_block):
c1 = arith.constant(T.index(), 1)
grid_sizes = (c1, c1, c1)
block_sizes = (c1, c1, c1)
launch = gpu.launch(grid_sizes, block_sizes)
op = launch(lambda *args: gpu.printf("%f", args[0]))
with InsertionPoint(entry_block):
func.ReturnOp([])
print(module)
# CHECK-LABEL: func.func @gpu_printf(
# CHECK-SAME: %[[ARG0:.*]]: f32) {
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
# CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) {
# CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index
# CHECK: gpu.terminator
# CHECK: }
# CHECK: return
# CHECK: }