mlir/test/Dialect/Transform/test-loop-transforms.mlir - llvm-project - Git at Google

 // RUN: mlir-opt %s --transform-interpreter --split-input-file \
 // RUN:     --verify-diagnostics | FileCheck %s

 // UNSUPPORTED: target=aarch64-pc-windows-msvc

 // CHECK-LABEL: func @test_loop_invariant_subset_hoisting(
 //  CHECK-SAME:     %[[arg:.*]]: tensor<?xf32>
 func.func @test_loop_invariant_subset_hoisting(%arg: tensor<?xf32>) -> tensor<?xf32> {
   %lb = "test.foo"() : () -> (index)
   %ub = "test.foo"() : () -> (index)
   %step = "test.foo"() : () -> (index)
   // CHECK: %[[extract:.*]] = tensor.extract_slice %[[arg]]
   // CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[t:.*]] = %[[arg]], %[[hoisted:.*]] = %[[extract]])
   // expected-remark @below{{new loop op}}
   %0 = scf.for %iv = %lb to %ub step %step iter_args(%t = %arg) -> (tensor<?xf32>) {
     %1 = tensor.extract_slice %t[0][5][1] : tensor<?xf32> to tensor<5xf32>
     // CHECK: %[[foo:.*]] = "test.foo"(%[[hoisted]])
     %2 = "test.foo"(%1) : (tensor<5xf32>) -> (tensor<5xf32>)
     // Obfuscate the IR by inserting at offset %sub instead of 0; both of them
     // have the same value.
     %3 = tensor.insert_slice %2 into %t[0][5][1] : tensor<5xf32> into tensor<?xf32>
     // CHECK: scf.yield %[[t]], %[[foo]]
     scf.yield %3 : tensor<?xf32>
   }
   // CHECK: %[[insert:.*]] = tensor.insert_slice %[[for]]#1 into %[[for]]#0
   // CHECK: return %[[insert]]
   return %0 : tensor<?xf32>
 }

 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %1 = transform.structured.match ops{["tensor.extract_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %2 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op

     transform.loop.hoist_loop_invariant_subsets %0 : !transform.any_op
     // Make sure that the handles are still valid (and were updated in case of
     // the loop).

     %p = transform.num_associations %0 : (!transform.any_op) -> !transform.param<i64>
     // expected-remark @below{{1}}
     transform.debug.emit_param_as_remark %p : !transform.param<i64>
     transform.debug.emit_remark_at %0, "new loop op" : !transform.any_op
     %p2 = transform.num_associations %1 : (!transform.any_op) -> !transform.param<i64>
     // expected-remark @below{{1}}
     transform.debug.emit_param_as_remark %p2 : !transform.param<i64>
     %p3 = transform.num_associations %2 : (!transform.any_op) -> !transform.param<i64>
     // expected-remark @below{{1}}
     transform.debug.emit_param_as_remark %p3 : !transform.param<i64>

     transform.yield
   }
 }

 // -----

 // Regression test for `collectHoistableOps` not being passed to `insert()` in
 // `populateSubsetOpsAtIterArg`. Without the fix, when recursing into nested
 // loops, inner loop subset ops were incorrectly added to the outer loop's
 // hoistable extraction/insertion pairs. If the outer loop were processed before
 // the inner loop (pre-order), this would cause ops inside the inner loop to be
 // moved across region boundaries, producing invalid IR.
 //
 // This test uses `transform.split_handle` to process ONLY the outer loop,
 // simulating pre-order processing. With the fix, inner loop ops are only
 // collected in `allSubsetOps` (for disjointness checking) but NOT added to
 // the hoistable extractions/insertions of the outer loop. Thus only the outer
 // [0][5][1] pair is hoisted, and the inner [5][5][1] pair remains untouched.

 // CHECK-LABEL: func @hoist_outer_loop_only(
 //  CHECK-SAME:     %[[arg:.*]]: tensor<?xf32>
 func.func @hoist_outer_loop_only(%arg: tensor<?xf32>) -> tensor<?xf32> {
   %lb = "test.foo"() : () -> (index)
   %ub = "test.foo"() : () -> (index)
   %step = "test.foo"() : () -> (index)

   // Only the outer [0][5][1] pair is hoisted. The inner [5][5][1] pair stays
   // inside the inner loop.
   // CHECK: %[[slice:.*]] = tensor.extract_slice %[[arg]][0] [5] [1]
   // CHECK: %[[outer:.*]]:2 = scf.for {{.*}} iter_args(%[[t:.*]] = %[[arg]], %[[hs:.*]] = %[[slice]])
   // CHECK:   %[[inner:.*]] = scf.for {{.*}} iter_args(%[[t2:.*]] = %[[t]])
   // CHECK:     tensor.extract_slice %[[t2]][5] [5] [1]
   // CHECK:     tensor.insert_slice
   %0 = scf.for %iv = %lb to %ub step %step iter_args(%t = %arg) -> (tensor<?xf32>) {
     %1 = tensor.extract_slice %t[0][5][1] : tensor<?xf32> to tensor<5xf32>
     %2 = "test.foo"(%1) : (tensor<5xf32>) -> (tensor<5xf32>)
     %3 = tensor.insert_slice %2 into %t[0][5][1] : tensor<5xf32> into tensor<?xf32>
     %4 = scf.for %iv2 = %lb to %ub step %step iter_args(%t2 = %3) -> (tensor<?xf32>) {
       %5 = tensor.extract_slice %t2[5][5][1] : tensor<?xf32> to tensor<5xf32>
       %6 = "test.foo"(%5) : (tensor<5xf32>) -> (tensor<5xf32>)
       %7 = tensor.insert_slice %6 into %t2[5][5][1] : tensor<5xf32> into tensor<?xf32>
       scf.yield %7 : tensor<?xf32>
     }
     scf.yield %4 : tensor<?xf32>
   }
   return %0 : tensor<?xf32>
 }

 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
     // structured.match returns loops innermost-first: %inner, then %outer.
     %loops = transform.structured.match ops{["scf.for"]} in %arg0
         : (!transform.any_op) -> !transform.any_op
     // Split to get individual handles; process only the outer loop.
     %inner, %outer = transform.split_handle %loops
         : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
     transform.loop.hoist_loop_invariant_subsets %outer : !transform.any_op
     transform.yield
   }
 }

 // -----

 // Checks that transform ops from LoopExtensionOps and SCFTransformOps can be
 // used together.

 // CHECK-LABEL: func @test_mixed_loop_extension_scf_transform(
 func.func @test_mixed_loop_extension_scf_transform(%arg: tensor<?xf32>) -> tensor<?xf32> {
   %lb = "test.foo"() : () -> (index)
   %ub = "test.foo"() : () -> (index)
   %step = "test.foo"() : () -> (index)
   // CHECK: scf.for
   // CHECK: scf.for
   %0 = scf.for %iv = %lb to %ub step %step iter_args(%t = %arg) -> (tensor<?xf32>) {
     %1 = "test.foo"(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
     scf.yield %1 : tensor<?xf32>
   }
   return %0 : tensor<?xf32>
 }

 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.loop.hoist_loop_invariant_subsets %0 : !transform.any_op
     transform.loop.unroll %0 { factor = 4 } : !transform.any_op
     transform.yield
   }
 }
	// RUN: mlir-opt %s --transform-interpreter --split-input-file \
	// RUN: --verify-diagnostics \| FileCheck %s

	// UNSUPPORTED: target=aarch64-pc-windows-msvc

	// CHECK-LABEL: func @test_loop_invariant_subset_hoisting(
	// CHECK-SAME: %[[arg:.*]]: tensor<?xf32>
	func.func @test_loop_invariant_subset_hoisting(%arg: tensor<?xf32>) -> tensor<?xf32> {
	%lb = "test.foo"() : () -> (index)
	%ub = "test.foo"() : () -> (index)
	%step = "test.foo"() : () -> (index)
	// CHECK: %[[extract:.*]] = tensor.extract_slice %[[arg]]
	// CHECK: %[[for:.]]:2 = scf.for {{.}} iter_args(%[[t:.]] = %[[arg]], %[[hoisted:.]] = %[[extract]])
	// expected-remark @below{{new loop op}}
	%0 = scf.for %iv = %lb to %ub step %step iter_args(%t = %arg) -> (tensor<?xf32>) {
	%1 = tensor.extract_slice %t[0][5][1] : tensor<?xf32> to tensor<5xf32>
	// CHECK: %[[foo:.*]] = "test.foo"(%[[hoisted]])
	%2 = "test.foo"(%1) : (tensor<5xf32>) -> (tensor<5xf32>)
	// Obfuscate the IR by inserting at offset %sub instead of 0; both of them
	// have the same value.
	%3 = tensor.insert_slice %2 into %t[0][5][1] : tensor<5xf32> into tensor<?xf32>
	// CHECK: scf.yield %[[t]], %[[foo]]
	scf.yield %3 : tensor<?xf32>
	}
	// CHECK: %[[insert:.*]] = tensor.insert_slice %[[for]]#1 into %[[for]]#0
	// CHECK: return %[[insert]]
	return %0 : tensor<?xf32>
	}

	module attributes {transform.with_named_sequence} {
	transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
	%0 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op
	%1 = transform.structured.match ops{["tensor.extract_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op
	%2 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op

	transform.loop.hoist_loop_invariant_subsets %0 : !transform.any_op
	// Make sure that the handles are still valid (and were updated in case of
	// the loop).

	%p = transform.num_associations %0 : (!transform.any_op) -> !transform.param<i64>
	// expected-remark @below{{1}}
	transform.debug.emit_param_as_remark %p : !transform.param<i64>
	transform.debug.emit_remark_at %0, "new loop op" : !transform.any_op
	%p2 = transform.num_associations %1 : (!transform.any_op) -> !transform.param<i64>
	// expected-remark @below{{1}}
	transform.debug.emit_param_as_remark %p2 : !transform.param<i64>
	%p3 = transform.num_associations %2 : (!transform.any_op) -> !transform.param<i64>
	// expected-remark @below{{1}}
	transform.debug.emit_param_as_remark %p3 : !transform.param<i64>

	transform.yield
	}
	}

	// -----

	// Regression test for `collectHoistableOps` not being passed to `insert()` in
	// `populateSubsetOpsAtIterArg`. Without the fix, when recursing into nested
	// loops, inner loop subset ops were incorrectly added to the outer loop's
	// hoistable extraction/insertion pairs. If the outer loop were processed before
	// the inner loop (pre-order), this would cause ops inside the inner loop to be
	// moved across region boundaries, producing invalid IR.
	//
	// This test uses `transform.split_handle` to process ONLY the outer loop,
	// simulating pre-order processing. With the fix, inner loop ops are only
	// collected in `allSubsetOps` (for disjointness checking) but NOT added to
	// the hoistable extractions/insertions of the outer loop. Thus only the outer
	// [0][5][1] pair is hoisted, and the inner [5][5][1] pair remains untouched.

	// CHECK-LABEL: func @hoist_outer_loop_only(
	// CHECK-SAME: %[[arg:.*]]: tensor<?xf32>
	func.func @hoist_outer_loop_only(%arg: tensor<?xf32>) -> tensor<?xf32> {
	%lb = "test.foo"() : () -> (index)
	%ub = "test.foo"() : () -> (index)
	%step = "test.foo"() : () -> (index)

	// Only the outer [0][5][1] pair is hoisted. The inner [5][5][1] pair stays
	// inside the inner loop.
	// CHECK: %[[slice:.*]] = tensor.extract_slice %[[arg]][0] [5] [1]
	// CHECK: %[[outer:.]]:2 = scf.for {{.}} iter_args(%[[t:.]] = %[[arg]], %[[hs:.]] = %[[slice]])
	// CHECK: %[[inner:.]] = scf.for {{.}} iter_args(%[[t2:.*]] = %[[t]])
	// CHECK: tensor.extract_slice %[[t2]][5] [5] [1]
	// CHECK: tensor.insert_slice
	%0 = scf.for %iv = %lb to %ub step %step iter_args(%t = %arg) -> (tensor<?xf32>) {
	%1 = tensor.extract_slice %t[0][5][1] : tensor<?xf32> to tensor<5xf32>
	%2 = "test.foo"(%1) : (tensor<5xf32>) -> (tensor<5xf32>)
	%3 = tensor.insert_slice %2 into %t[0][5][1] : tensor<5xf32> into tensor<?xf32>
	%4 = scf.for %iv2 = %lb to %ub step %step iter_args(%t2 = %3) -> (tensor<?xf32>) {
	%5 = tensor.extract_slice %t2[5][5][1] : tensor<?xf32> to tensor<5xf32>
	%6 = "test.foo"(%5) : (tensor<5xf32>) -> (tensor<5xf32>)
	%7 = tensor.insert_slice %6 into %t2[5][5][1] : tensor<5xf32> into tensor<?xf32>
	scf.yield %7 : tensor<?xf32>
	}
	scf.yield %4 : tensor<?xf32>
	}
	return %0 : tensor<?xf32>
	}

	module attributes {transform.with_named_sequence} {
	transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
	// structured.match returns loops innermost-first: %inner, then %outer.
	%loops = transform.structured.match ops{["scf.for"]} in %arg0
	: (!transform.any_op) -> !transform.any_op
	// Split to get individual handles; process only the outer loop.
	%inner, %outer = transform.split_handle %loops
	: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
	transform.loop.hoist_loop_invariant_subsets %outer : !transform.any_op
	transform.yield
	}
	}

	// -----

	// Checks that transform ops from LoopExtensionOps and SCFTransformOps can be
	// used together.

	// CHECK-LABEL: func @test_mixed_loop_extension_scf_transform(
	func.func @test_mixed_loop_extension_scf_transform(%arg: tensor<?xf32>) -> tensor<?xf32> {
	%lb = "test.foo"() : () -> (index)
	%ub = "test.foo"() : () -> (index)
	%step = "test.foo"() : () -> (index)
	// CHECK: scf.for
	// CHECK: scf.for
	%0 = scf.for %iv = %lb to %ub step %step iter_args(%t = %arg) -> (tensor<?xf32>) {
	%1 = "test.foo"(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
	scf.yield %1 : tensor<?xf32>
	}
	return %0 : tensor<?xf32>
	}

	module attributes {transform.with_named_sequence} {
	transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
	%0 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op
	transform.loop.hoist_loop_invariant_subsets %0 : !transform.any_op
	transform.loop.unroll %0 { factor = 4 } : !transform.any_op
	transform.yield
	}
	}