test/python/dialects/gpu/dialect.py - llvm-project/mlir - Git at Google

 # RUN: %PYTHON %s | FileCheck %s

 from mlir.ir import *
 import mlir.ir as ir
 from mlir.dialects import gpu, func, arith, math
 from mlir.extras import types as T
 import mlir.dialects.gpu.passes
 from mlir.passmanager import *


 def run(f):
     print("\nTEST:", f.__name__)
     with Context(), Location.unknown():
         f()
     return f


 # CHECK-LABEL: testGPUPass
 #       CHECK: SUCCESS
 @run
 def testGPUPass():
     PassManager.parse("any(gpu-kernel-outlining)")
     print("SUCCESS")


 # CHECK-LABEL: testMMAElementWiseAttr
 @run
 def testMMAElementWiseAttr():
     module = Module.create()
     with InsertionPoint(module.body):
         gpu.BlockDimOp(gpu.Dimension.y)
     # CHECK: %block_dim_y = gpu.block_dim  y
     print(module)
     pass


 # CHECK-LABEL: testObjectAttr
 @run
 def testObjectAttr():
     target = Attribute.parse("#nvvm.target")
     format = gpu.CompilationTarget.Fatbin
     object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
     properties = DictAttr.get({"O": IntegerAttr.get(IntegerType.get_signless(32), 2)})
     o = gpu.ObjectAttr.get(target, format, object, properties)
     # CHECK: #gpu.object<#nvvm.target, properties = {O = 2 : i32}, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
     print(o)
     assert o.object == object

     o = gpu.ObjectAttr.get(target, format, object)
     # CHECK: #gpu.object<#nvvm.target, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
     print(o)

     object = (
         b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
     )
     o = gpu.ObjectAttr.get(target, format, object)
     # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
     print(o)
     assert o.object == object

     object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
     kernelTable = Attribute.parse(
         '#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
     )
     o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
     # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
     print(o)
     assert o.kernels == kernelTable


 # CHECK-LABEL: testGPUFuncOp
 @run
 def testGPUFuncOp():
     assert gpu.GPUFuncOp.__doc__ is not None
     module = Module.create()
     with InsertionPoint(module.body):
         gpu_module_name = StringAttr.get("gpu_module")
         gpumodule = gpu.GPUModuleOp(gpu_module_name)
         block = gpumodule.bodyRegion.blocks.append()

         def builder(func: gpu.GPUFuncOp) -> None:
             gpu.GlobalIdOp(gpu.Dimension.x)
             gpu.ReturnOp([])

         with InsertionPoint(block):
             name = StringAttr.get("kernel0")
             func_type = ir.FunctionType.get(inputs=[], results=[])
             type_attr = TypeAttr.get(func_type)
             func = gpu.GPUFuncOp(type_attr, name)
             func.attributes["sym_name"] = name
             func.attributes["gpu.kernel"] = UnitAttr.get()

             try:
                 func.entry_block
                 assert False, "Expected RuntimeError"
             except RuntimeError as e:
                 assert (
                     str(e)
                     == "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?"
                 )

             block = func.add_entry_block()
             with InsertionPoint(block):
                 builder(func)

             try:
                 func.add_entry_block()
                 assert False, "Expected RuntimeError"
             except RuntimeError as e:
                 assert str(e) == "Entry block already exists for kernel0"

             func = gpu.GPUFuncOp(
                 func_type,
                 sym_name="kernel1",
                 kernel=True,
                 body_builder=builder,
                 known_block_size=[1, 2, 3],
                 known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]),
             )

             assert func.name.value == "kernel1"
             assert func.function_type.value == func_type
             assert func.arg_attrs == None
             assert func.res_attrs == None
             assert func.arguments == []
             assert func.entry_block == func.body.blocks[0]
             assert func.is_kernel
             assert func.known_block_size == DenseI32ArrayAttr.get(
                 [1, 2, 3]
             ), func.known_block_size
             assert func.known_grid_size == DenseI32ArrayAttr.get(
                 [4, 5, 6]
             ), func.known_grid_size

             func = gpu.GPUFuncOp(
                 ir.FunctionType.get(inputs=[T.index()], results=[]),
                 sym_name="non_kernel_func",
                 body_builder=builder,
                 arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}],
             )
             assert not func.is_kernel
             assert func.known_block_size is None
             assert func.known_grid_size is None

     print(module)

     # CHECK: gpu.module @gpu_module
     # CHECK: gpu.func @kernel0() kernel {
     # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
     # CHECK:   gpu.return
     # CHECK: }
     # CHECK: gpu.func @kernel1() kernel attributes
     # CHECK-SAME: known_block_size = array<i32: 1, 2, 3>
     # CHECK-SAME: known_grid_size = array<i32: 4, 5, 6>
     # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
     # CHECK:   gpu.return
     # CHECK: }
     # CHECK:   gpu.func @non_kernel_func(
     # CHECK-SAME:      %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) {
     # CHECK:           %[[GLOBAL_ID_0:.*]] = gpu.global_id  x
     # CHECK:           gpu.return
     # CHECK:         }


 # CHECK-LABEL: testGPULaunchFuncOp
 @run
 def testGPULaunchFuncOp():
     module = Module.create()

     module.operation.attributes["gpu.container_module"] = UnitAttr.get()
     with InsertionPoint(module.body):
         gpu_module = gpu.GPUModuleOp("gpu_module")
         block = gpu_module.bodyRegion.blocks.append()

     with InsertionPoint(block):
         gpu_func = gpu.GPUFuncOp(
             FunctionType.get([], []),
             "kernel",
             body_builder=lambda func: gpu.return_([]),
             kernel=True,
         )

     with InsertionPoint(module.body):
         host = func.FuncOp(type=FunctionType.get([], []), name="host")

     with InsertionPoint(host.add_entry_block()):
         c1 = arith.constant(T.index(), 1)
         grid_sizes = (1, 1, 1)
         block_sizes = (1, 1, 1)
         token = gpu.wait()
         token = gpu.launch_func(
             async_dependencies=[token],
             kernel=[gpu_module.sym_name.value, gpu_func.name.value],
             grid_size=grid_sizes,
             block_size=block_sizes,
             kernel_operands=[],
         )
         gpu.wait(async_dependencies=[token])
         func.ReturnOp([])

     print(module)

     # CHECK-LABEL:   gpu.module @gpu_module {
     # CHECK:           gpu.func @kernel() kernel {
     # CHECK:             gpu.return
     # CHECK:           }
     # CHECK:         }

     # CHECK-LABEL:   func.func @host() {
     # CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
     # CHECK:           %[[WAIT_0:.*]] = gpu.wait async
     # CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_2:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_3:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_4:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_5:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_6:.*]] = arith.constant 1 : index
     # CHECK:           %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
     # CHECK:           %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]]
     # CHECK:           return
     # CHECK:         }


 # CHECK-LABEL: testGPULaunchOp
 @run
 def testGPULaunchOp():
     module = Module.create()

     with InsertionPoint(module.body):
         host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf")

     entry_block = host.add_entry_block()
     with InsertionPoint(entry_block):
         c1 = arith.constant(T.index(), 1)
         grid_sizes = (c1, c1, c1)
         block_sizes = (c1, c1, c1)

         launch = gpu.launch(grid_sizes, block_sizes)

     op = launch(lambda *args: gpu.printf("%f", args[0]))

     with InsertionPoint(entry_block):
         func.ReturnOp([])

     print(module)

     # CHECK-LABEL:   func.func @gpu_printf(
     # CHECK-SAME:      %[[ARG0:.*]]: f32) {
     # CHECK:           %[[CONSTANT_0:.*]] = arith.constant 1 : index
     # CHECK:           gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) {
     # CHECK:             gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index
     # CHECK:             gpu.terminator
     # CHECK:           }
     # CHECK:           return
     # CHECK:         }
	# RUN: %PYTHON %s \| FileCheck %s

	from mlir.ir import *
	import mlir.ir as ir
	from mlir.dialects import gpu, func, arith, math
	from mlir.extras import types as T
	import mlir.dialects.gpu.passes
	from mlir.passmanager import *


	def run(f):
	print("\nTEST:", f.__name__)
	with Context(), Location.unknown():
	f()
	return f


	# CHECK-LABEL: testGPUPass
	# CHECK: SUCCESS
	@run
	def testGPUPass():
	PassManager.parse("any(gpu-kernel-outlining)")
	print("SUCCESS")


	# CHECK-LABEL: testMMAElementWiseAttr
	@run
	def testMMAElementWiseAttr():
	module = Module.create()
	with InsertionPoint(module.body):
	gpu.BlockDimOp(gpu.Dimension.y)
	# CHECK: %block_dim_y = gpu.block_dim y
	print(module)
	pass


	# CHECK-LABEL: testObjectAttr
	@run
	def testObjectAttr():
	target = Attribute.parse("#nvvm.target")
	format = gpu.CompilationTarget.Fatbin
	object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
	properties = DictAttr.get({"O": IntegerAttr.get(IntegerType.get_signless(32), 2)})
	o = gpu.ObjectAttr.get(target, format, object, properties)
	# CHECK: #gpu.object<#nvvm.target, properties = {O = 2 : i32}, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
	print(o)
	assert o.object == object

	o = gpu.ObjectAttr.get(target, format, object)
	# CHECK: #gpu.object<#nvvm.target, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
	print(o)

	object = (
	b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
	)
	o = gpu.ObjectAttr.get(target, format, object)
	# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
	print(o)
	assert o.object == object

	object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
	kernelTable = Attribute.parse(
	'#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
	)
	o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
	# CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
	print(o)
	assert o.kernels == kernelTable


	# CHECK-LABEL: testGPUFuncOp
	@run
	def testGPUFuncOp():
	assert gpu.GPUFuncOp.__doc__ is not None
	module = Module.create()
	with InsertionPoint(module.body):
	gpu_module_name = StringAttr.get("gpu_module")
	gpumodule = gpu.GPUModuleOp(gpu_module_name)
	block = gpumodule.bodyRegion.blocks.append()

	def builder(func: gpu.GPUFuncOp) -> None:
	gpu.GlobalIdOp(gpu.Dimension.x)
	gpu.ReturnOp([])

	with InsertionPoint(block):
	name = StringAttr.get("kernel0")
	func_type = ir.FunctionType.get(inputs=[], results=[])
	type_attr = TypeAttr.get(func_type)
	func = gpu.GPUFuncOp(type_attr, name)
	func.attributes["sym_name"] = name
	func.attributes["gpu.kernel"] = UnitAttr.get()

	try:
	func.entry_block
	assert False, "Expected RuntimeError"
	except RuntimeError as e:
	assert (
	str(e)
	== "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?"
	)

	block = func.add_entry_block()
	with InsertionPoint(block):
	builder(func)

	try:
	func.add_entry_block()
	assert False, "Expected RuntimeError"
	except RuntimeError as e:
	assert str(e) == "Entry block already exists for kernel0"

	func = gpu.GPUFuncOp(
	func_type,
	sym_name="kernel1",
	kernel=True,
	body_builder=builder,
	known_block_size=[1, 2, 3],
	known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]),
	)

	assert func.name.value == "kernel1"
	assert func.function_type.value == func_type
	assert func.arg_attrs == None
	assert func.res_attrs == None
	assert func.arguments == []
	assert func.entry_block == func.body.blocks[0]
	assert func.is_kernel
	assert func.known_block_size == DenseI32ArrayAttr.get(
	[1, 2, 3]
	), func.known_block_size
	assert func.known_grid_size == DenseI32ArrayAttr.get(
	[4, 5, 6]
	), func.known_grid_size

	func = gpu.GPUFuncOp(
	ir.FunctionType.get(inputs=[T.index()], results=[]),
	sym_name="non_kernel_func",
	body_builder=builder,
	arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}],
	)
	assert not func.is_kernel
	assert func.known_block_size is None
	assert func.known_grid_size is None

	print(module)

	# CHECK: gpu.module @gpu_module
	# CHECK: gpu.func @kernel0() kernel {
	# CHECK: %[[VAL_0:.*]] = gpu.global_id x
	# CHECK: gpu.return
	# CHECK: }
	# CHECK: gpu.func @kernel1() kernel attributes
	# CHECK-SAME: known_block_size = array<i32: 1, 2, 3>
	# CHECK-SAME: known_grid_size = array<i32: 4, 5, 6>
	# CHECK: %[[VAL_0:.*]] = gpu.global_id x
	# CHECK: gpu.return
	# CHECK: }
	# CHECK: gpu.func @non_kernel_func(
	# CHECK-SAME: %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) {
	# CHECK: %[[GLOBAL_ID_0:.*]] = gpu.global_id x
	# CHECK: gpu.return
	# CHECK: }


	# CHECK-LABEL: testGPULaunchFuncOp
	@run
	def testGPULaunchFuncOp():
	module = Module.create()

	module.operation.attributes["gpu.container_module"] = UnitAttr.get()
	with InsertionPoint(module.body):
	gpu_module = gpu.GPUModuleOp("gpu_module")
	block = gpu_module.bodyRegion.blocks.append()

	with InsertionPoint(block):
	gpu_func = gpu.GPUFuncOp(
	FunctionType.get([], []),
	"kernel",
	body_builder=lambda func: gpu.return_([]),
	kernel=True,
	)

	with InsertionPoint(module.body):
	host = func.FuncOp(type=FunctionType.get([], []), name="host")

	with InsertionPoint(host.add_entry_block()):
	c1 = arith.constant(T.index(), 1)
	grid_sizes = (1, 1, 1)
	block_sizes = (1, 1, 1)
	token = gpu.wait()
	token = gpu.launch_func(
	async_dependencies=[token],
	kernel=[gpu_module.sym_name.value, gpu_func.name.value],
	grid_size=grid_sizes,
	block_size=block_sizes,
	kernel_operands=[],
	)
	gpu.wait(async_dependencies=[token])
	func.ReturnOp([])

	print(module)

	# CHECK-LABEL: gpu.module @gpu_module {
	# CHECK: gpu.func @kernel() kernel {
	# CHECK: gpu.return
	# CHECK: }
	# CHECK: }

	# CHECK-LABEL: func.func @host() {
	# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
	# CHECK: %[[WAIT_0:.*]] = gpu.wait async
	# CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index
	# CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
	# CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index
	# CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index
	# CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index
	# CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
	# CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
	# CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]]
	# CHECK: return
	# CHECK: }


	# CHECK-LABEL: testGPULaunchOp
	@run
	def testGPULaunchOp():
	module = Module.create()

	with InsertionPoint(module.body):
	host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf")

	entry_block = host.add_entry_block()
	with InsertionPoint(entry_block):
	c1 = arith.constant(T.index(), 1)
	grid_sizes = (c1, c1, c1)
	block_sizes = (c1, c1, c1)

	launch = gpu.launch(grid_sizes, block_sizes)

	op = launch(lambda *args: gpu.printf("%f", args[0]))

	with InsertionPoint(entry_block):
	func.ReturnOp([])

	print(module)

	# CHECK-LABEL: func.func @gpu_printf(
	# CHECK-SAME: %[[ARG0:.*]]: f32) {
	# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
	# CHECK: gpu.launch blocks(%[[VAL_0:.]], %[[VAL_1:.]], %[[VAL_2:.]]) in (%[[VAL_3:.]] = %[[CONSTANT_0]], %[[VAL_4:.]] = %[[CONSTANT_0]], %[[VAL_5:.]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.]], %[[VAL_7:.]], %[[VAL_8:.]]) in (%[[VAL_9:.]] = %[[CONSTANT_0]], %[[VAL_10:.]] = %[[CONSTANT_0]], %[[VAL_11:.]] = %[[CONSTANT_0]]) {
	# CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index
	# CHECK: gpu.terminator
	# CHECK: }
	# CHECK: return
	# CHECK: }