mlir/test/Dialect/GPU/promotion.mlir - llvm-project - Git at Google

 // RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s

 gpu.module @foo {

   // Verify that the attribution was indeed introduced
   // CHECK-LABEL: @memref3d
   // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32>
   // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, 3>)
   gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel {
     // Verify that loop bounds are emitted, the order does not matter.
     // CHECK-DAG: %[[c1:.*]] = arith.constant 1
     // CHECK-DAG: %[[c4:.*]] = arith.constant 4
     // CHECK-DAG: %[[c5:.*]] = arith.constant 5
     // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"}
     // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"}
     // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"}
     // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"}
     // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"}
     // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}

     // Verify that loops for the copy are emitted. We only check the number of
     // loops here since their bounds are produced by mapLoopToProcessorIds,
     // tested separately.
     // CHECK: scf.for %[[i0:.*]] =
     // CHECK:   scf.for %[[i1:.*]] =
     // CHECK:     scf.for %[[i2:.*]] =

     // Verify that the copy is emitted and uses only the last two loops.
     // CHECK:       %[[v:.*]] = memref.load %[[arg]][%[[i1]], %[[i2]]]
     // CHECK:       store %[[v]], %[[promoted]][%[[i1]], %[[i2]]]

     // Verify that the use has been rewritten.
     // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, 3>)
     "use"(%arg0) : (memref<5x4xf32>) -> ()


     // Verify that loops for the copy are emitted. We only check the number of
     // loops here since their bounds are produced by mapLoopToProcessorIds,
     // tested separately.
     // CHECK: scf.for %[[i0:.*]] =
     // CHECK:   scf.for %[[i1:.*]] =
     // CHECK:     scf.for %[[i2:.*]] =

     // Verify that the copy is emitted and uses only the last two loops.
     // CHECK:       %[[v:.*]] = memref.load %[[promoted]][%[[i1]], %[[i2]]]
     // CHECK:       store %[[v]], %[[arg]][%[[i1]], %[[i2]]]
     gpu.return
   }
 }

 // -----

 gpu.module @foo {

   // Verify that the attribution was indeed introduced
   // CHECK-LABEL: @memref5d
   // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32>
   // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, 3>)
   gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel {
     // Verify that loop bounds are emitted, the order does not matter.
     // CHECK-DAG: %[[c0:.*]] = arith.constant 0
     // CHECK-DAG: %[[c1:.*]] = arith.constant 1
     // CHECK-DAG: %[[c4:.*]] = arith.constant 4
     // CHECK-DAG: %[[c5:.*]] = arith.constant 5
     // CHECK-DAG: %[[c6:.*]] = arith.constant 6
     // CHECK-DAG: %[[c7:.*]] = arith.constant 7
     // CHECK-DAG: %[[c8:.*]] = arith.constant 8
     // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"}
     // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"}
     // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"}
     // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"}
     // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"}
     // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}

     // Verify that loops for the copy are emitted.
     // CHECK: scf.for %[[i0:.*]] =
     // CHECK:   scf.for %[[i1:.*]] =
     // CHECK:     scf.for %[[i2:.*]] =
     // CHECK:       scf.for %[[i3:.*]] =
     // CHECK:         scf.for %[[i4:.*]] =

     // Verify that the copy is emitted.
     // CHECK:           %[[v:.*]] = memref.load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
     // CHECK:           store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]

     // Verify that the use has been rewritten.
     // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, 3>)
     "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()

     // Verify that loop loops for the copy are emitted.
     // CHECK: scf.for %[[i0:.*]] =
     // CHECK:   scf.for %[[i1:.*]] =
     // CHECK:     scf.for %[[i2:.*]] =
     // CHECK:       scf.for %[[i3:.*]] =
     // CHECK:         scf.for %[[i4:.*]] =

     // Verify that the copy is emitted.
     // CHECK:           %[[v:.*]] = memref.load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
     // CHECK:           store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
     gpu.return
   }
 }

 // -----

 gpu.module @foo {

   // Check that attribution insertion works fine.
   // CHECK-LABEL: @insert
   // CHECK-SAME: (%{{.*}}: memref<4xf32>
   // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, 3>
   // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, 3>)
   // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>)
   gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup})
       workgroup(%arg1: memref<1x1xf64, 3>)
       private(%arg2: memref<1x1xi64, 5>)
       kernel {
     // CHECK: "use"(%[[wg2]])
     "use"(%arg0) : (memref<4xf32>) -> ()
     gpu.return
   }
 }
	// RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s \| FileCheck %s

	gpu.module @foo {

	// Verify that the attribution was indeed introduced
	// CHECK-LABEL: @memref3d
	// CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32>
	// CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, 3>)
	gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel {
	// Verify that loop bounds are emitted, the order does not matter.
	// CHECK-DAG: %[[c1:.*]] = arith.constant 1
	// CHECK-DAG: %[[c4:.*]] = arith.constant 4
	// CHECK-DAG: %[[c5:.*]] = arith.constant 5
	// CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"}
	// CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"}
	// CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"}
	// CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"}
	// CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"}
	// CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}

	// Verify that loops for the copy are emitted. We only check the number of
	// loops here since their bounds are produced by mapLoopToProcessorIds,
	// tested separately.
	// CHECK: scf.for %[[i0:.*]] =
	// CHECK: scf.for %[[i1:.*]] =
	// CHECK: scf.for %[[i2:.*]] =

	// Verify that the copy is emitted and uses only the last two loops.
	// CHECK: %[[v:.*]] = memref.load %[[arg]][%[[i1]], %[[i2]]]
	// CHECK: store %[[v]], %[[promoted]][%[[i1]], %[[i2]]]

	// Verify that the use has been rewritten.
	// CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, 3>)
	"use"(%arg0) : (memref<5x4xf32>) -> ()


	// Verify that loops for the copy are emitted. We only check the number of
	// loops here since their bounds are produced by mapLoopToProcessorIds,
	// tested separately.
	// CHECK: scf.for %[[i0:.*]] =
	// CHECK: scf.for %[[i1:.*]] =
	// CHECK: scf.for %[[i2:.*]] =

	// Verify that the copy is emitted and uses only the last two loops.
	// CHECK: %[[v:.*]] = memref.load %[[promoted]][%[[i1]], %[[i2]]]
	// CHECK: store %[[v]], %[[arg]][%[[i1]], %[[i2]]]
	gpu.return
	}
	}

	// -----

	gpu.module @foo {

	// Verify that the attribution was indeed introduced
	// CHECK-LABEL: @memref5d
	// CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32>
	// CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, 3>)
	gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel {
	// Verify that loop bounds are emitted, the order does not matter.
	// CHECK-DAG: %[[c0:.*]] = arith.constant 0
	// CHECK-DAG: %[[c1:.*]] = arith.constant 1
	// CHECK-DAG: %[[c4:.*]] = arith.constant 4
	// CHECK-DAG: %[[c5:.*]] = arith.constant 5
	// CHECK-DAG: %[[c6:.*]] = arith.constant 6
	// CHECK-DAG: %[[c7:.*]] = arith.constant 7
	// CHECK-DAG: %[[c8:.*]] = arith.constant 8
	// CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"}
	// CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"}
	// CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"}
	// CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"}
	// CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"}
	// CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}

	// Verify that loops for the copy are emitted.
	// CHECK: scf.for %[[i0:.*]] =
	// CHECK: scf.for %[[i1:.*]] =
	// CHECK: scf.for %[[i2:.*]] =
	// CHECK: scf.for %[[i3:.*]] =
	// CHECK: scf.for %[[i4:.*]] =

	// Verify that the copy is emitted.
	// CHECK: %[[v:.*]] = memref.load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
	// CHECK: store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]

	// Verify that the use has been rewritten.
	// CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, 3>)
	"use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()

	// Verify that loop loops for the copy are emitted.
	// CHECK: scf.for %[[i0:.*]] =
	// CHECK: scf.for %[[i1:.*]] =
	// CHECK: scf.for %[[i2:.*]] =
	// CHECK: scf.for %[[i3:.*]] =
	// CHECK: scf.for %[[i4:.*]] =

	// Verify that the copy is emitted.
	// CHECK: %[[v:.*]] = memref.load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
	// CHECK: store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
	gpu.return
	}
	}

	// -----

	gpu.module @foo {

	// Check that attribution insertion works fine.
	// CHECK-LABEL: @insert
	// CHECK-SAME: (%{{.*}}: memref<4xf32>
	// CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, 3>
	// CHECK-SAME: %[[wg2:.*]] : memref<4xf32, 3>)
	// CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>)
	gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup})
	workgroup(%arg1: memref<1x1xf64, 3>)
	private(%arg2: memref<1x1xi64, 5>)
	kernel {
	// CHECK: "use"(%[[wg2]])
	"use"(%arg0) : (memref<4xf32>) -> ()
	gpu.return
	}
	}