test/Transforms/cse.mlir - llvm-project/mlir - Git at Google

 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(cse))' -split-input-file | FileCheck %s

 // CHECK-LABEL: @simple_constant
 func.func @simple_constant() -> (i32, i32) {
   // CHECK-NEXT: %[[VAR_c1_i32:.*]] = arith.constant 1 : i32
   %0 = arith.constant 1 : i32

   // CHECK-NEXT: return %[[VAR_c1_i32]], %[[VAR_c1_i32]] : i32, i32
   %1 = arith.constant 1 : i32
   return %0, %1 : i32, i32
 }

 // -----

 // CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0 mod 2)>
 #map0 = affine_map<(d0) -> (d0 mod 2)>

 // CHECK-LABEL: @basic
 func.func @basic() -> (index, index) {
   // CHECK: %[[VAR_c0:[0-9a-zA-Z_]+]] = arith.constant 0 : index
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 0 : index

   // CHECK-NEXT: %[[VAR_0:[0-9a-zA-Z_]+]] = affine.apply #[[$MAP]](%[[VAR_c0]])
   %0 = affine.apply #map0(%c0)
   %1 = affine.apply #map0(%c1)

   // CHECK-NEXT: return %[[VAR_0]], %[[VAR_0]] : index, index
   return %0, %1 : index, index
 }

 // -----

 // CHECK-LABEL: @many
 func.func @many(f32, f32) -> (f32) {
 ^bb0(%a : f32, %b : f32):
   // CHECK-NEXT: %[[VAR_0:[0-9a-zA-Z_]+]] = arith.addf %{{.*}}, %{{.*}} : f32
   %c = arith.addf %a, %b : f32
   %d = arith.addf %a, %b : f32
   %e = arith.addf %a, %b : f32
   %f = arith.addf %a, %b : f32

   // CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = arith.addf %[[VAR_0]], %[[VAR_0]] : f32
   %g = arith.addf %c, %d : f32
   %h = arith.addf %e, %f : f32
   %i = arith.addf %c, %e : f32

   // CHECK-NEXT: %[[VAR_2:[0-9a-zA-Z_]+]] = arith.addf %[[VAR_1]], %[[VAR_1]] : f32
   %j = arith.addf %g, %h : f32
   %k = arith.addf %h, %i : f32

   // CHECK-NEXT: %[[VAR_3:[0-9a-zA-Z_]+]] = arith.addf %[[VAR_2]], %[[VAR_2]] : f32
   %l = arith.addf %j, %k : f32

   // CHECK-NEXT: return %[[VAR_3]] : f32
   return %l : f32
 }

 // -----

 /// Check that operations are not eliminated if they have different operands.
 // CHECK-LABEL: @different_ops
 func.func @different_ops() -> (i32, i32) {
   // CHECK: %[[VAR_c0_i32:[0-9a-zA-Z_]+]] = arith.constant 0 : i32
   // CHECK: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
   %0 = arith.constant 0 : i32
   %1 = arith.constant 1 : i32

   // CHECK-NEXT: return %[[VAR_c0_i32]], %[[VAR_c1_i32]] : i32, i32
   return %0, %1 : i32, i32
 }

 // -----

 /// Check that operations are not eliminated if they have different result
 /// types.
 // CHECK-LABEL: @different_results
 func.func @different_results(%arg0: tensor<*xf32>) -> (tensor<?x?xf32>, tensor<4x?xf32>) {
   // CHECK: %[[VAR_0:[0-9a-zA-Z_]+]] = tensor.cast %{{.*}} : tensor<*xf32> to tensor<?x?xf32>
   // CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = tensor.cast %{{.*}} : tensor<*xf32> to tensor<4x?xf32>
   %0 = tensor.cast %arg0 : tensor<*xf32> to tensor<?x?xf32>
   %1 = tensor.cast %arg0 : tensor<*xf32> to tensor<4x?xf32>

   // CHECK-NEXT: return %[[VAR_0]], %[[VAR_1]] : tensor<?x?xf32>, tensor<4x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<4x?xf32>
 }

 // -----

 /// Check that operations are not eliminated if they have different attributes.
 // CHECK-LABEL: @different_attributes
 func.func @different_attributes(index, index) -> (i1, i1, i1) {
 ^bb0(%a : index, %b : index):
   // CHECK: %[[VAR_0:[0-9a-zA-Z_]+]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index
   %0 = arith.cmpi slt, %a, %b : index

   // CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = arith.cmpi ne, %{{.*}}, %{{.*}} : index
   /// Predicate 1 means inequality comparison.
   %1 = arith.cmpi ne, %a, %b : index
   %2 = "arith.cmpi"(%a, %b) {predicate = 1} : (index, index) -> i1

   // CHECK-NEXT: return %[[VAR_0]], %[[VAR_1]], %[[VAR_1]] : i1, i1, i1
   return %0, %1, %2 : i1, i1, i1
 }

 // -----

 /// Check that operations with side effects are not eliminated.
 // CHECK-LABEL: @side_effect
 func.func @side_effect() -> (memref<2x1xf32>, memref<2x1xf32>) {
   // CHECK: %[[VAR_0:[0-9a-zA-Z_]+]] = memref.alloc() : memref<2x1xf32>
   %0 = memref.alloc() : memref<2x1xf32>

   // CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = memref.alloc() : memref<2x1xf32>
   %1 = memref.alloc() : memref<2x1xf32>

   // CHECK-NEXT: return %[[VAR_0]], %[[VAR_1]] : memref<2x1xf32>, memref<2x1xf32>
   return %0, %1 : memref<2x1xf32>, memref<2x1xf32>
 }

 // -----

 /// Check that operation definitions are properly propagated down the dominance
 /// tree.
 // CHECK-LABEL: @down_propagate_for
 func.func @down_propagate_for() {
   // CHECK: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
   %0 = arith.constant 1 : i32

   // CHECK-NEXT: affine.for {{.*}} = 0 to 4 {
   affine.for %i = 0 to 4 {
     // CHECK-NEXT: "foo"(%[[VAR_c1_i32]], %[[VAR_c1_i32]]) : (i32, i32) -> ()
     %1 = arith.constant 1 : i32
     "foo"(%0, %1) : (i32, i32) -> ()
   }
   return
 }

 // -----

 // CHECK-LABEL: @down_propagate
 func.func @down_propagate() -> i32 {
   // CHECK-NEXT: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
   %0 = arith.constant 1 : i32

   // CHECK-NEXT: %[[VAR_true:[0-9a-zA-Z_]+]] = arith.constant true
   %cond = arith.constant true

   // CHECK-NEXT: cf.cond_br %[[VAR_true]], ^bb1, ^bb2(%[[VAR_c1_i32]] : i32)
   cf.cond_br %cond, ^bb1, ^bb2(%0 : i32)

 ^bb1: // CHECK: ^bb1:
   // CHECK-NEXT: cf.br ^bb2(%[[VAR_c1_i32]] : i32)
   %1 = arith.constant 1 : i32
   cf.br ^bb2(%1 : i32)

 ^bb2(%arg : i32):
   return %arg : i32
 }

 // -----

 /// Check that operation definitions are NOT propagated up the dominance tree.
 // CHECK-LABEL: @up_propagate_for
 func.func @up_propagate_for() -> i32 {
   // CHECK: affine.for {{.*}} = 0 to 4 {
   affine.for %i = 0 to 4 {
     // CHECK-NEXT: %[[VAR_c1_i32_0:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
     // CHECK-NEXT: "foo"(%[[VAR_c1_i32_0]]) : (i32) -> ()
     %0 = arith.constant 1 : i32
     "foo"(%0) : (i32) -> ()
   }

   // CHECK: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
   // CHECK-NEXT: return %[[VAR_c1_i32]] : i32
   %1 = arith.constant 1 : i32
   return %1 : i32
 }

 // -----

 // CHECK-LABEL: func @up_propagate
 func.func @up_propagate() -> i32 {
   // CHECK-NEXT:  %[[VAR_c0_i32:[0-9a-zA-Z_]+]] = arith.constant 0 : i32
   %0 = arith.constant 0 : i32

   // CHECK-NEXT: %[[VAR_true:[0-9a-zA-Z_]+]] = arith.constant true
   %cond = arith.constant true

   // CHECK-NEXT: cf.cond_br %[[VAR_true]], ^bb1, ^bb2(%[[VAR_c0_i32]] : i32)
   cf.cond_br %cond, ^bb1, ^bb2(%0 : i32)

 ^bb1: // CHECK: ^bb1:
   // CHECK-NEXT: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
   %1 = arith.constant 1 : i32

   // CHECK-NEXT: cf.br ^bb2(%[[VAR_c1_i32]] : i32)
   cf.br ^bb2(%1 : i32)

 ^bb2(%arg : i32): // CHECK: ^bb2
   // CHECK-NEXT: %[[VAR_c1_i32_0:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
   %2 = arith.constant 1 : i32

   // CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = arith.addi %{{.*}}, %[[VAR_c1_i32_0]] : i32
   %add = arith.addi %arg, %2 : i32

   // CHECK-NEXT: return %[[VAR_1]] : i32
   return %add : i32
 }

 // -----

 /// The same test as above except that we are testing on a cfg embedded within
 /// an operation region.
 // CHECK-LABEL: func @up_propagate_region
 func.func @up_propagate_region() -> i32 {
   // CHECK-NEXT: {{.*}} "foo.region"
   %0 = "foo.region"() ({
     // CHECK-NEXT:  %[[VAR_c0_i32:[0-9a-zA-Z_]+]] = arith.constant 0 : i32
     // CHECK-NEXT: %[[VAR_true:[0-9a-zA-Z_]+]] = arith.constant true
     // CHECK-NEXT: cf.cond_br

     %1 = arith.constant 0 : i32
     %true = arith.constant true
     cf.cond_br %true, ^bb1, ^bb2(%1 : i32)

   ^bb1: // CHECK: ^bb1:
     // CHECK-NEXT: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
     // CHECK-NEXT: cf.br

     %c1_i32 = arith.constant 1 : i32
     cf.br ^bb2(%c1_i32 : i32)

   ^bb2(%arg : i32): // CHECK: ^bb2(%[[VAR_1:.*]]: i32):
     // CHECK-NEXT: %[[VAR_c1_i32_0:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
     // CHECK-NEXT: %[[VAR_2:[0-9a-zA-Z_]+]] = arith.addi %[[VAR_1]], %[[VAR_c1_i32_0]] : i32
     // CHECK-NEXT: "foo.yield"(%[[VAR_2]]) : (i32) -> ()

     %c1_i32_0 = arith.constant 1 : i32
     %2 = arith.addi %arg, %c1_i32_0 : i32
     "foo.yield" (%2) : (i32) -> ()
   }) : () -> (i32)
   return %0 : i32
 }

 // -----

 /// This test checks that nested regions that are isolated from above are
 /// properly handled.
 // CHECK-LABEL: @nested_isolated
 func.func @nested_isolated() -> i32 {
   // CHECK-NEXT: arith.constant 1
   %0 = arith.constant 1 : i32

   // CHECK-NEXT: builtin.module
   // CHECK-NEXT: @nested_func
   builtin.module {
     func.func @nested_func() {
       // CHECK-NEXT: arith.constant 1
       %foo = arith.constant 1 : i32
       "foo.yield"(%foo) : (i32) -> ()
     }
   }

   // CHECK: "foo.region"
   "foo.region"() ({
     // CHECK-NEXT: arith.constant 1
     %foo = arith.constant 1 : i32
     "foo.yield"(%foo) : (i32) -> ()
   }) : () -> ()

   return %0 : i32
 }

 // -----

 /// This test is checking that CSE gracefully handles values in graph regions
 /// where the use occurs before the def, and one of the defs could be CSE'd with
 /// the other.
 // CHECK-LABEL: @use_before_def
 func.func @use_before_def() {
   // CHECK-NEXT: test.graph_region
   test.graph_region {
     // CHECK-NEXT: arith.addi
     %0 = arith.addi %1, %2 : i32

     // CHECK-NEXT: arith.constant 1
     // CHECK-NEXT: arith.constant 1
     %1 = arith.constant 1 : i32
     %2 = arith.constant 1 : i32

     // CHECK-NEXT: "foo.yield"(%{{.*}}) : (i32) -> ()
     "foo.yield"(%0) : (i32) -> ()
   }
   return
 }

 // -----

 /// This test is checking that CSE is removing duplicated read op that follow
 /// other.
 // CHECK-LABEL: @remove_direct_duplicated_read_op
 func.func @remove_direct_duplicated_read_op() -> i32 {
   // CHECK-NEXT: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i32
   %0 = "test.op_with_memread"() : () -> (i32)
   %1 = "test.op_with_memread"() : () -> (i32)
   // CHECK-NEXT: %{{.*}} = arith.addi %[[READ_VALUE]], %[[READ_VALUE]] : i32
   %2 = arith.addi %0, %1 : i32
   return %2 : i32
 }

 // -----

 /// This test is checking that CSE is removing duplicated read op that follow
 /// other.
 // CHECK-LABEL: @remove_multiple_duplicated_read_op
 func.func @remove_multiple_duplicated_read_op() -> i64 {
   // CHECK: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i64
   %0 = "test.op_with_memread"() : () -> (i64)
   %1 = "test.op_with_memread"() : () -> (i64)
   // CHECK-NEXT: %{{.*}} = arith.addi %{{.*}}, %[[READ_VALUE]] : i64
   %2 = arith.addi %0, %1 : i64
   %3 = "test.op_with_memread"() : () -> (i64)
   // CHECK-NEXT: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i64
   %4 = arith.addi %2, %3 : i64
   %5 = "test.op_with_memread"() : () -> (i64)
   // CHECK-NEXT: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i64
   %6 = arith.addi %4, %5 : i64
   // CHECK-NEXT: return %{{.*}} : i64
   return %6 : i64
 }

 // -----

 /// This test is checking that CSE is not removing duplicated read op that
 /// have write op in between.
 // CHECK-LABEL: @dont_remove_duplicated_read_op_with_sideeffecting
 func.func @dont_remove_duplicated_read_op_with_sideeffecting() -> i32 {
   // CHECK-NEXT: %[[READ_VALUE0:.*]] = "test.op_with_memread"() : () -> i32
   %0 = "test.op_with_memread"() : () -> (i32)
   "test.op_with_memwrite"() : () -> ()
   // CHECK: %[[READ_VALUE1:.*]] = "test.op_with_memread"() : () -> i32
   %1 = "test.op_with_memread"() : () -> (i32)
   // CHECK-NEXT: %{{.*}} = arith.addi %[[READ_VALUE0]], %[[READ_VALUE1]] : i32
   %2 = arith.addi %0, %1 : i32
   return %2 : i32
 }

 // -----

 // Check that an operation with a single region can CSE.
 func.func @cse_single_block_ops(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>)
   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
   %0 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32):
     test.region_yield %arg0 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   %1 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32):
     test.region_yield %arg0 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
 // CHECK-LABEL: func @cse_single_block_ops
 //       CHECK:   %[[OP:.+]] = test.cse_of_single_block_op
 //   CHECK-NOT:   test.cse_of_single_block_op
 //       CHECK:   return %[[OP]], %[[OP]]

 // -----

 // Operations with different number of bbArgs dont CSE.
 func.func @no_cse_varied_bbargs(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>)
   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
   %0 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     test.region_yield %arg0 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   %1 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32):
     test.region_yield %arg0 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
 // CHECK-LABEL: func @no_cse_varied_bbargs
 //       CHECK:   %[[OP0:.+]] = test.cse_of_single_block_op
 //       CHECK:   %[[OP1:.+]] = test.cse_of_single_block_op
 //       CHECK:   return %[[OP0]], %[[OP1]]

 // -----

 // Operations with different regions dont CSE
 func.func @no_cse_region_difference_simple(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>)
   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
   %0 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     test.region_yield %arg0 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   %1 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     test.region_yield %arg1 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
 // CHECK-LABEL: func @no_cse_region_difference_simple
 //       CHECK:   %[[OP0:.+]] = test.cse_of_single_block_op
 //       CHECK:   %[[OP1:.+]] = test.cse_of_single_block_op
 //       CHECK:   return %[[OP0]], %[[OP1]]

 // -----

 // Operation with identical region with multiple statements CSE.
 func.func @cse_single_block_ops_identical_bodies(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : f32, %d : i1)
   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
   %0 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     %1 = arith.divf %arg0, %arg1 : f32
     %2 = arith.remf %arg0, %c : f32
     %3 = arith.select %d, %1, %2 : f32
     test.region_yield %3 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   %1 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     %1 = arith.divf %arg0, %arg1 : f32
     %2 = arith.remf %arg0, %c : f32
     %3 = arith.select %d, %1, %2 : f32
     test.region_yield %3 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
 // CHECK-LABEL: func @cse_single_block_ops_identical_bodies
 //       CHECK:   %[[OP:.+]] = test.cse_of_single_block_op
 //   CHECK-NOT:   test.cse_of_single_block_op
 //       CHECK:   return %[[OP]], %[[OP]]

 // -----

 // Operation with non-identical regions dont CSE.
 func.func @no_cse_single_block_ops_different_bodies(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : f32, %d : i1)
   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
   %0 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     %1 = arith.divf %arg0, %arg1 : f32
     %2 = arith.remf %arg0, %c : f32
     %3 = arith.select %d, %1, %2 : f32
     test.region_yield %3 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   %1 = test.cse_of_single_block_op inputs(%a, %b) {
     ^bb0(%arg0 : f32, %arg1 : f32):
     %1 = arith.divf %arg0, %arg1 : f32
     %2 = arith.remf %arg0, %c : f32
     %3 = arith.select %d, %2, %1 : f32
     test.region_yield %3 : f32
   } : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
   return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
 }
 // CHECK-LABEL: func @no_cse_single_block_ops_different_bodies
 //       CHECK:   %[[OP0:.+]] = test.cse_of_single_block_op
 //       CHECK:   %[[OP1:.+]] = test.cse_of_single_block_op
 //       CHECK:   return %[[OP0]], %[[OP1]]

 // -----

 func.func @failing_issue_59135(%arg0: tensor<2x2xi1>, %arg1: f32, %arg2 : tensor<2xi1>) -> (tensor<2xi1>, tensor<2xi1>) {
   %false_2 = arith.constant false
   %true_5 = arith.constant true
   %9 = test.cse_of_single_block_op inputs(%arg2) {
   ^bb0(%out: i1):
     %true_144 = arith.constant true
     test.region_yield %true_144 : i1
   } : tensor<2xi1> -> tensor<2xi1>
   %15 = test.cse_of_single_block_op inputs(%arg2) {
   ^bb0(%out: i1):
     %true_144 = arith.constant true
     test.region_yield %true_144 : i1
   } : tensor<2xi1> -> tensor<2xi1>
   %93 = arith.maxsi %false_2, %true_5 : i1
   return %9, %15 : tensor<2xi1>, tensor<2xi1>
 }
 // CHECK-LABEL: func @failing_issue_59135
 //       CHECK:   %[[TRUE:.+]] = arith.constant true
 //       CHECK:   %[[OP:.+]] = test.cse_of_single_block_op
 //       CHECK:     test.region_yield %[[TRUE]]
 //       CHECK:   return %[[OP]], %[[OP]]

 // -----

 func.func @cse_multiple_regions(%c: i1, %t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) {
   %r1 = scf.if %c -> (tensor<5xf32>) {
     %0 = tensor.empty() : tensor<5xf32>
     scf.yield %0 : tensor<5xf32>
   } else {
     scf.yield %t : tensor<5xf32>
   }
   %r2 = scf.if %c -> (tensor<5xf32>) {
     %0 = tensor.empty() : tensor<5xf32>
     scf.yield %0 : tensor<5xf32>
   } else {
     scf.yield %t : tensor<5xf32>
   }
   return %r1, %r2 : tensor<5xf32>, tensor<5xf32>
 }
 // CHECK-LABEL: func @cse_multiple_regions
 //       CHECK:   %[[if:.*]] = scf.if {{.*}} {
 //       CHECK:     tensor.empty
 //       CHECK:     scf.yield
 //       CHECK:   } else {
 //       CHECK:     scf.yield
 //       CHECK:   }
 //   CHECK-NOT:   scf.if
 //       CHECK:   return %[[if]], %[[if]]

 // -----

 // CHECK-LABEL: @cse_recursive_effects_success
 func.func @cse_recursive_effects_success() -> (i32, i32, i32) {
   // CHECK-NEXT: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i32
   %0 = "test.op_with_memread"() : () -> (i32)

   // do something with recursive effects, containing no side effects
   %true = arith.constant true
   // CHECK-NEXT: %[[TRUE:.+]] = arith.constant true
   // CHECK-NEXT: %[[IF:.+]] = scf.if %[[TRUE]] -> (i32) {
   %1 = scf.if %true -> (i32) {
     %c42 = arith.constant 42 : i32
     scf.yield %c42 : i32
     // CHECK-NEXT: %[[C42:.+]] = arith.constant 42 : i32
     // CHECK-NEXT: scf.yield %[[C42]]
     // CHECK-NEXT: } else {
   } else {
     %c24 = arith.constant 24 : i32
     scf.yield %c24 : i32
     // CHECK-NEXT: %[[C24:.+]] = arith.constant 24 : i32
     // CHECK-NEXT: scf.yield %[[C24]]
     // CHECK-NEXT: }
   }

   // %2 can be removed
   // CHECK-NEXT: return %[[READ_VALUE]], %[[READ_VALUE]], %[[IF]] : i32, i32, i32
   %2 = "test.op_with_memread"() : () -> (i32)
   return %0, %2, %1 : i32, i32, i32
 }

 // -----

 // CHECK-LABEL: @cse_recursive_effects_failure
 func.func @cse_recursive_effects_failure() -> (i32, i32, i32) {
   // CHECK-NEXT: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i32
   %0 = "test.op_with_memread"() : () -> (i32)

   // do something with recursive effects, containing a write effect
   %true = arith.constant true
   // CHECK-NEXT: %[[TRUE:.+]] = arith.constant true
   // CHECK-NEXT: %[[IF:.+]] = scf.if %[[TRUE]] -> (i32) {
   %1 = scf.if %true -> (i32) {
     "test.op_with_memwrite"() : () -> ()
     // CHECK-NEXT: "test.op_with_memwrite"() : () -> ()
     %c42 = arith.constant 42 : i32
     scf.yield %c42 : i32
     // CHECK-NEXT: %[[C42:.+]] = arith.constant 42 : i32
     // CHECK-NEXT: scf.yield %[[C42]]
     // CHECK-NEXT: } else {
   } else {
     %c24 = arith.constant 24 : i32
     scf.yield %c24 : i32
     // CHECK-NEXT: %[[C24:.+]] = arith.constant 24 : i32
     // CHECK-NEXT: scf.yield %[[C24]]
     // CHECK-NEXT: }
   }

   // %2 can not be be removed because of the write
   // CHECK-NEXT: %[[READ_VALUE2:.*]] = "test.op_with_memread"() : () -> i32
   // CHECK-NEXT: return %[[READ_VALUE]], %[[READ_VALUE2]], %[[IF]] : i32, i32, i32
   %2 = "test.op_with_memread"() : () -> (i32)
   return %0, %2, %1 : i32, i32, i32
 }
	// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(cse))' -split-input-file \| FileCheck %s

	// CHECK-LABEL: @simple_constant
	func.func @simple_constant() -> (i32, i32) {
	// CHECK-NEXT: %[[VAR_c1_i32:.*]] = arith.constant 1 : i32
	%0 = arith.constant 1 : i32

	// CHECK-NEXT: return %[[VAR_c1_i32]], %[[VAR_c1_i32]] : i32, i32
	%1 = arith.constant 1 : i32
	return %0, %1 : i32, i32
	}

	// -----

	// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0 mod 2)>
	#map0 = affine_map<(d0) -> (d0 mod 2)>

	// CHECK-LABEL: @basic
	func.func @basic() -> (index, index) {
	// CHECK: %[[VAR_c0:[0-9a-zA-Z_]+]] = arith.constant 0 : index
	%c0 = arith.constant 0 : index
	%c1 = arith.constant 0 : index

	// CHECK-NEXT: %[[VAR_0:[0-9a-zA-Z_]+]] = affine.apply #[[$MAP]](%[[VAR_c0]])
	%0 = affine.apply #map0(%c0)
	%1 = affine.apply #map0(%c1)

	// CHECK-NEXT: return %[[VAR_0]], %[[VAR_0]] : index, index
	return %0, %1 : index, index
	}

	// -----

	// CHECK-LABEL: @many
	func.func @many(f32, f32) -> (f32) {
	^bb0(%a : f32, %b : f32):
	// CHECK-NEXT: %[[VAR_0:[0-9a-zA-Z_]+]] = arith.addf %{{.}}, %{{.}} : f32
	%c = arith.addf %a, %b : f32
	%d = arith.addf %a, %b : f32
	%e = arith.addf %a, %b : f32
	%f = arith.addf %a, %b : f32

	// CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = arith.addf %[[VAR_0]], %[[VAR_0]] : f32
	%g = arith.addf %c, %d : f32
	%h = arith.addf %e, %f : f32
	%i = arith.addf %c, %e : f32

	// CHECK-NEXT: %[[VAR_2:[0-9a-zA-Z_]+]] = arith.addf %[[VAR_1]], %[[VAR_1]] : f32
	%j = arith.addf %g, %h : f32
	%k = arith.addf %h, %i : f32

	// CHECK-NEXT: %[[VAR_3:[0-9a-zA-Z_]+]] = arith.addf %[[VAR_2]], %[[VAR_2]] : f32
	%l = arith.addf %j, %k : f32

	// CHECK-NEXT: return %[[VAR_3]] : f32
	return %l : f32
	}

	// -----

	/// Check that operations are not eliminated if they have different operands.
	// CHECK-LABEL: @different_ops
	func.func @different_ops() -> (i32, i32) {
	// CHECK: %[[VAR_c0_i32:[0-9a-zA-Z_]+]] = arith.constant 0 : i32
	// CHECK: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	%0 = arith.constant 0 : i32
	%1 = arith.constant 1 : i32

	// CHECK-NEXT: return %[[VAR_c0_i32]], %[[VAR_c1_i32]] : i32, i32
	return %0, %1 : i32, i32
	}

	// -----

	/// Check that operations are not eliminated if they have different result
	/// types.
	// CHECK-LABEL: @different_results
	func.func @different_results(%arg0: tensor<*xf32>) -> (tensor<?x?xf32>, tensor<4x?xf32>) {
	// CHECK: %[[VAR_0:[0-9a-zA-Z_]+]] = tensor.cast %{{.}} : tensor<xf32> to tensor<?x?xf32>
	// CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = tensor.cast %{{.}} : tensor<xf32> to tensor<4x?xf32>
	%0 = tensor.cast %arg0 : tensor<*xf32> to tensor<?x?xf32>
	%1 = tensor.cast %arg0 : tensor<*xf32> to tensor<4x?xf32>

	// CHECK-NEXT: return %[[VAR_0]], %[[VAR_1]] : tensor<?x?xf32>, tensor<4x?xf32>
	return %0, %1 : tensor<?x?xf32>, tensor<4x?xf32>
	}

	// -----

	/// Check that operations are not eliminated if they have different attributes.
	// CHECK-LABEL: @different_attributes
	func.func @different_attributes(index, index) -> (i1, i1, i1) {
	^bb0(%a : index, %b : index):
	// CHECK: %[[VAR_0:[0-9a-zA-Z_]+]] = arith.cmpi slt, %{{.}}, %{{.}} : index
	%0 = arith.cmpi slt, %a, %b : index

	// CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = arith.cmpi ne, %{{.}}, %{{.}} : index
	/// Predicate 1 means inequality comparison.
	%1 = arith.cmpi ne, %a, %b : index
	%2 = "arith.cmpi"(%a, %b) {predicate = 1} : (index, index) -> i1

	// CHECK-NEXT: return %[[VAR_0]], %[[VAR_1]], %[[VAR_1]] : i1, i1, i1
	return %0, %1, %2 : i1, i1, i1
	}

	// -----

	/// Check that operations with side effects are not eliminated.
	// CHECK-LABEL: @side_effect
	func.func @side_effect() -> (memref<2x1xf32>, memref<2x1xf32>) {
	// CHECK: %[[VAR_0:[0-9a-zA-Z_]+]] = memref.alloc() : memref<2x1xf32>
	%0 = memref.alloc() : memref<2x1xf32>

	// CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = memref.alloc() : memref<2x1xf32>
	%1 = memref.alloc() : memref<2x1xf32>

	// CHECK-NEXT: return %[[VAR_0]], %[[VAR_1]] : memref<2x1xf32>, memref<2x1xf32>
	return %0, %1 : memref<2x1xf32>, memref<2x1xf32>
	}

	// -----

	/// Check that operation definitions are properly propagated down the dominance
	/// tree.
	// CHECK-LABEL: @down_propagate_for
	func.func @down_propagate_for() {
	// CHECK: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	%0 = arith.constant 1 : i32

	// CHECK-NEXT: affine.for {{.*}} = 0 to 4 {
	affine.for %i = 0 to 4 {
	// CHECK-NEXT: "foo"(%[[VAR_c1_i32]], %[[VAR_c1_i32]]) : (i32, i32) -> ()
	%1 = arith.constant 1 : i32
	"foo"(%0, %1) : (i32, i32) -> ()
	}
	return
	}

	// -----

	// CHECK-LABEL: @down_propagate
	func.func @down_propagate() -> i32 {
	// CHECK-NEXT: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	%0 = arith.constant 1 : i32

	// CHECK-NEXT: %[[VAR_true:[0-9a-zA-Z_]+]] = arith.constant true
	%cond = arith.constant true

	// CHECK-NEXT: cf.cond_br %[[VAR_true]], ^bb1, ^bb2(%[[VAR_c1_i32]] : i32)
	cf.cond_br %cond, ^bb1, ^bb2(%0 : i32)

	^bb1: // CHECK: ^bb1:
	// CHECK-NEXT: cf.br ^bb2(%[[VAR_c1_i32]] : i32)
	%1 = arith.constant 1 : i32
	cf.br ^bb2(%1 : i32)

	^bb2(%arg : i32):
	return %arg : i32
	}

	// -----

	/// Check that operation definitions are NOT propagated up the dominance tree.
	// CHECK-LABEL: @up_propagate_for
	func.func @up_propagate_for() -> i32 {
	// CHECK: affine.for {{.*}} = 0 to 4 {
	affine.for %i = 0 to 4 {
	// CHECK-NEXT: %[[VAR_c1_i32_0:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	// CHECK-NEXT: "foo"(%[[VAR_c1_i32_0]]) : (i32) -> ()
	%0 = arith.constant 1 : i32
	"foo"(%0) : (i32) -> ()
	}

	// CHECK: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	// CHECK-NEXT: return %[[VAR_c1_i32]] : i32
	%1 = arith.constant 1 : i32
	return %1 : i32
	}

	// -----

	// CHECK-LABEL: func @up_propagate
	func.func @up_propagate() -> i32 {
	// CHECK-NEXT: %[[VAR_c0_i32:[0-9a-zA-Z_]+]] = arith.constant 0 : i32
	%0 = arith.constant 0 : i32

	// CHECK-NEXT: %[[VAR_true:[0-9a-zA-Z_]+]] = arith.constant true
	%cond = arith.constant true

	// CHECK-NEXT: cf.cond_br %[[VAR_true]], ^bb1, ^bb2(%[[VAR_c0_i32]] : i32)
	cf.cond_br %cond, ^bb1, ^bb2(%0 : i32)

	^bb1: // CHECK: ^bb1:
	// CHECK-NEXT: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	%1 = arith.constant 1 : i32

	// CHECK-NEXT: cf.br ^bb2(%[[VAR_c1_i32]] : i32)
	cf.br ^bb2(%1 : i32)

	^bb2(%arg : i32): // CHECK: ^bb2
	// CHECK-NEXT: %[[VAR_c1_i32_0:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	%2 = arith.constant 1 : i32

	// CHECK-NEXT: %[[VAR_1:[0-9a-zA-Z_]+]] = arith.addi %{{.*}}, %[[VAR_c1_i32_0]] : i32
	%add = arith.addi %arg, %2 : i32

	// CHECK-NEXT: return %[[VAR_1]] : i32
	return %add : i32
	}

	// -----

	/// The same test as above except that we are testing on a cfg embedded within
	/// an operation region.
	// CHECK-LABEL: func @up_propagate_region
	func.func @up_propagate_region() -> i32 {
	// CHECK-NEXT: {{.*}} "foo.region"
	%0 = "foo.region"() ({
	// CHECK-NEXT: %[[VAR_c0_i32:[0-9a-zA-Z_]+]] = arith.constant 0 : i32
	// CHECK-NEXT: %[[VAR_true:[0-9a-zA-Z_]+]] = arith.constant true
	// CHECK-NEXT: cf.cond_br

	%1 = arith.constant 0 : i32
	%true = arith.constant true
	cf.cond_br %true, ^bb1, ^bb2(%1 : i32)

	^bb1: // CHECK: ^bb1:
	// CHECK-NEXT: %[[VAR_c1_i32:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	// CHECK-NEXT: cf.br

	%c1_i32 = arith.constant 1 : i32
	cf.br ^bb2(%c1_i32 : i32)

	^bb2(%arg : i32): // CHECK: ^bb2(%[[VAR_1:.*]]: i32):
	// CHECK-NEXT: %[[VAR_c1_i32_0:[0-9a-zA-Z_]+]] = arith.constant 1 : i32
	// CHECK-NEXT: %[[VAR_2:[0-9a-zA-Z_]+]] = arith.addi %[[VAR_1]], %[[VAR_c1_i32_0]] : i32
	// CHECK-NEXT: "foo.yield"(%[[VAR_2]]) : (i32) -> ()

	%c1_i32_0 = arith.constant 1 : i32
	%2 = arith.addi %arg, %c1_i32_0 : i32
	"foo.yield" (%2) : (i32) -> ()
	}) : () -> (i32)
	return %0 : i32
	}

	// -----

	/// This test checks that nested regions that are isolated from above are
	/// properly handled.
	// CHECK-LABEL: @nested_isolated
	func.func @nested_isolated() -> i32 {
	// CHECK-NEXT: arith.constant 1
	%0 = arith.constant 1 : i32

	// CHECK-NEXT: builtin.module
	// CHECK-NEXT: @nested_func
	builtin.module {
	func.func @nested_func() {
	// CHECK-NEXT: arith.constant 1
	%foo = arith.constant 1 : i32
	"foo.yield"(%foo) : (i32) -> ()
	}
	}

	// CHECK: "foo.region"
	"foo.region"() ({
	// CHECK-NEXT: arith.constant 1
	%foo = arith.constant 1 : i32
	"foo.yield"(%foo) : (i32) -> ()
	}) : () -> ()

	return %0 : i32
	}

	// -----

	/// This test is checking that CSE gracefully handles values in graph regions
	/// where the use occurs before the def, and one of the defs could be CSE'd with
	/// the other.
	// CHECK-LABEL: @use_before_def
	func.func @use_before_def() {
	// CHECK-NEXT: test.graph_region
	test.graph_region {
	// CHECK-NEXT: arith.addi
	%0 = arith.addi %1, %2 : i32

	// CHECK-NEXT: arith.constant 1
	// CHECK-NEXT: arith.constant 1
	%1 = arith.constant 1 : i32
	%2 = arith.constant 1 : i32

	// CHECK-NEXT: "foo.yield"(%{{.*}}) : (i32) -> ()
	"foo.yield"(%0) : (i32) -> ()
	}
	return
	}

	// -----

	/// This test is checking that CSE is removing duplicated read op that follow
	/// other.
	// CHECK-LABEL: @remove_direct_duplicated_read_op
	func.func @remove_direct_duplicated_read_op() -> i32 {
	// CHECK-NEXT: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i32
	%0 = "test.op_with_memread"() : () -> (i32)
	%1 = "test.op_with_memread"() : () -> (i32)
	// CHECK-NEXT: %{{.*}} = arith.addi %[[READ_VALUE]], %[[READ_VALUE]] : i32
	%2 = arith.addi %0, %1 : i32
	return %2 : i32
	}

	// -----

	/// This test is checking that CSE is removing duplicated read op that follow
	/// other.
	// CHECK-LABEL: @remove_multiple_duplicated_read_op
	func.func @remove_multiple_duplicated_read_op() -> i64 {
	// CHECK: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i64
	%0 = "test.op_with_memread"() : () -> (i64)
	%1 = "test.op_with_memread"() : () -> (i64)
	// CHECK-NEXT: %{{.}} = arith.addi %{{.}}, %[[READ_VALUE]] : i64
	%2 = arith.addi %0, %1 : i64
	%3 = "test.op_with_memread"() : () -> (i64)
	// CHECK-NEXT: %{{.}} = arith.addi %{{.}}, %{{.*}} : i64
	%4 = arith.addi %2, %3 : i64
	%5 = "test.op_with_memread"() : () -> (i64)
	// CHECK-NEXT: %{{.}} = arith.addi %{{.}}, %{{.*}} : i64
	%6 = arith.addi %4, %5 : i64
	// CHECK-NEXT: return %{{.*}} : i64
	return %6 : i64
	}

	// -----

	/// This test is checking that CSE is not removing duplicated read op that
	/// have write op in between.
	// CHECK-LABEL: @dont_remove_duplicated_read_op_with_sideeffecting
	func.func @dont_remove_duplicated_read_op_with_sideeffecting() -> i32 {
	// CHECK-NEXT: %[[READ_VALUE0:.*]] = "test.op_with_memread"() : () -> i32
	%0 = "test.op_with_memread"() : () -> (i32)
	"test.op_with_memwrite"() : () -> ()
	// CHECK: %[[READ_VALUE1:.*]] = "test.op_with_memread"() : () -> i32
	%1 = "test.op_with_memread"() : () -> (i32)
	// CHECK-NEXT: %{{.*}} = arith.addi %[[READ_VALUE0]], %[[READ_VALUE1]] : i32
	%2 = arith.addi %0, %1 : i32
	return %2 : i32
	}

	// -----

	// Check that an operation with a single region can CSE.
	func.func @cse_single_block_ops(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>)
	-> (tensor<?x?xf32>, tensor<?x?xf32>) {
	%0 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32):
	test.region_yield %arg0 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	%1 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32):
	test.region_yield %arg0 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
	}
	// CHECK-LABEL: func @cse_single_block_ops
	// CHECK: %[[OP:.+]] = test.cse_of_single_block_op
	// CHECK-NOT: test.cse_of_single_block_op
	// CHECK: return %[[OP]], %[[OP]]

	// -----

	// Operations with different number of bbArgs dont CSE.
	func.func @no_cse_varied_bbargs(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>)
	-> (tensor<?x?xf32>, tensor<?x?xf32>) {
	%0 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	test.region_yield %arg0 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	%1 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32):
	test.region_yield %arg0 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
	}
	// CHECK-LABEL: func @no_cse_varied_bbargs
	// CHECK: %[[OP0:.+]] = test.cse_of_single_block_op
	// CHECK: %[[OP1:.+]] = test.cse_of_single_block_op
	// CHECK: return %[[OP0]], %[[OP1]]

	// -----

	// Operations with different regions dont CSE
	func.func @no_cse_region_difference_simple(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>)
	-> (tensor<?x?xf32>, tensor<?x?xf32>) {
	%0 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	test.region_yield %arg0 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	%1 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	test.region_yield %arg1 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
	}
	// CHECK-LABEL: func @no_cse_region_difference_simple
	// CHECK: %[[OP0:.+]] = test.cse_of_single_block_op
	// CHECK: %[[OP1:.+]] = test.cse_of_single_block_op
	// CHECK: return %[[OP0]], %[[OP1]]

	// -----

	// Operation with identical region with multiple statements CSE.
	func.func @cse_single_block_ops_identical_bodies(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : f32, %d : i1)
	-> (tensor<?x?xf32>, tensor<?x?xf32>) {
	%0 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	%1 = arith.divf %arg0, %arg1 : f32
	%2 = arith.remf %arg0, %c : f32
	%3 = arith.select %d, %1, %2 : f32
	test.region_yield %3 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	%1 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	%1 = arith.divf %arg0, %arg1 : f32
	%2 = arith.remf %arg0, %c : f32
	%3 = arith.select %d, %1, %2 : f32
	test.region_yield %3 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
	}
	// CHECK-LABEL: func @cse_single_block_ops_identical_bodies
	// CHECK: %[[OP:.+]] = test.cse_of_single_block_op
	// CHECK-NOT: test.cse_of_single_block_op
	// CHECK: return %[[OP]], %[[OP]]

	// -----

	// Operation with non-identical regions dont CSE.
	func.func @no_cse_single_block_ops_different_bodies(%a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : f32, %d : i1)
	-> (tensor<?x?xf32>, tensor<?x?xf32>) {
	%0 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	%1 = arith.divf %arg0, %arg1 : f32
	%2 = arith.remf %arg0, %c : f32
	%3 = arith.select %d, %1, %2 : f32
	test.region_yield %3 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	%1 = test.cse_of_single_block_op inputs(%a, %b) {
	^bb0(%arg0 : f32, %arg1 : f32):
	%1 = arith.divf %arg0, %arg1 : f32
	%2 = arith.remf %arg0, %c : f32
	%3 = arith.select %d, %2, %1 : f32
	test.region_yield %3 : f32
	} : tensor<?x?xf32>, tensor<?x?xf32> -> tensor<?x?xf32>
	return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
	}
	// CHECK-LABEL: func @no_cse_single_block_ops_different_bodies
	// CHECK: %[[OP0:.+]] = test.cse_of_single_block_op
	// CHECK: %[[OP1:.+]] = test.cse_of_single_block_op
	// CHECK: return %[[OP0]], %[[OP1]]

	// -----

	func.func @failing_issue_59135(%arg0: tensor<2x2xi1>, %arg1: f32, %arg2 : tensor<2xi1>) -> (tensor<2xi1>, tensor<2xi1>) {
	%false_2 = arith.constant false
	%true_5 = arith.constant true
	%9 = test.cse_of_single_block_op inputs(%arg2) {
	^bb0(%out: i1):
	%true_144 = arith.constant true
	test.region_yield %true_144 : i1
	} : tensor<2xi1> -> tensor<2xi1>
	%15 = test.cse_of_single_block_op inputs(%arg2) {
	^bb0(%out: i1):
	%true_144 = arith.constant true
	test.region_yield %true_144 : i1
	} : tensor<2xi1> -> tensor<2xi1>
	%93 = arith.maxsi %false_2, %true_5 : i1
	return %9, %15 : tensor<2xi1>, tensor<2xi1>
	}
	// CHECK-LABEL: func @failing_issue_59135
	// CHECK: %[[TRUE:.+]] = arith.constant true
	// CHECK: %[[OP:.+]] = test.cse_of_single_block_op
	// CHECK: test.region_yield %[[TRUE]]
	// CHECK: return %[[OP]], %[[OP]]

	// -----

	func.func @cse_multiple_regions(%c: i1, %t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) {
	%r1 = scf.if %c -> (tensor<5xf32>) {
	%0 = tensor.empty() : tensor<5xf32>
	scf.yield %0 : tensor<5xf32>
	} else {
	scf.yield %t : tensor<5xf32>
	}
	%r2 = scf.if %c -> (tensor<5xf32>) {
	%0 = tensor.empty() : tensor<5xf32>
	scf.yield %0 : tensor<5xf32>
	} else {
	scf.yield %t : tensor<5xf32>
	}
	return %r1, %r2 : tensor<5xf32>, tensor<5xf32>
	}
	// CHECK-LABEL: func @cse_multiple_regions
	// CHECK: %[[if:.]] = scf.if {{.}} {
	// CHECK: tensor.empty
	// CHECK: scf.yield
	// CHECK: } else {
	// CHECK: scf.yield
	// CHECK: }
	// CHECK-NOT: scf.if
	// CHECK: return %[[if]], %[[if]]

	// -----

	// CHECK-LABEL: @cse_recursive_effects_success
	func.func @cse_recursive_effects_success() -> (i32, i32, i32) {
	// CHECK-NEXT: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i32
	%0 = "test.op_with_memread"() : () -> (i32)

	// do something with recursive effects, containing no side effects
	%true = arith.constant true
	// CHECK-NEXT: %[[TRUE:.+]] = arith.constant true
	// CHECK-NEXT: %[[IF:.+]] = scf.if %[[TRUE]] -> (i32) {
	%1 = scf.if %true -> (i32) {
	%c42 = arith.constant 42 : i32
	scf.yield %c42 : i32
	// CHECK-NEXT: %[[C42:.+]] = arith.constant 42 : i32
	// CHECK-NEXT: scf.yield %[[C42]]
	// CHECK-NEXT: } else {
	} else {
	%c24 = arith.constant 24 : i32
	scf.yield %c24 : i32
	// CHECK-NEXT: %[[C24:.+]] = arith.constant 24 : i32
	// CHECK-NEXT: scf.yield %[[C24]]
	// CHECK-NEXT: }
	}

	// %2 can be removed
	// CHECK-NEXT: return %[[READ_VALUE]], %[[READ_VALUE]], %[[IF]] : i32, i32, i32
	%2 = "test.op_with_memread"() : () -> (i32)
	return %0, %2, %1 : i32, i32, i32
	}

	// -----

	// CHECK-LABEL: @cse_recursive_effects_failure
	func.func @cse_recursive_effects_failure() -> (i32, i32, i32) {
	// CHECK-NEXT: %[[READ_VALUE:.*]] = "test.op_with_memread"() : () -> i32
	%0 = "test.op_with_memread"() : () -> (i32)

	// do something with recursive effects, containing a write effect
	%true = arith.constant true
	// CHECK-NEXT: %[[TRUE:.+]] = arith.constant true
	// CHECK-NEXT: %[[IF:.+]] = scf.if %[[TRUE]] -> (i32) {
	%1 = scf.if %true -> (i32) {
	"test.op_with_memwrite"() : () -> ()
	// CHECK-NEXT: "test.op_with_memwrite"() : () -> ()
	%c42 = arith.constant 42 : i32
	scf.yield %c42 : i32
	// CHECK-NEXT: %[[C42:.+]] = arith.constant 42 : i32
	// CHECK-NEXT: scf.yield %[[C42]]
	// CHECK-NEXT: } else {
	} else {
	%c24 = arith.constant 24 : i32
	scf.yield %c24 : i32
	// CHECK-NEXT: %[[C24:.+]] = arith.constant 24 : i32
	// CHECK-NEXT: scf.yield %[[C24]]
	// CHECK-NEXT: }
	}

	// %2 can not be be removed because of the write
	// CHECK-NEXT: %[[READ_VALUE2:.*]] = "test.op_with_memread"() : () -> i32
	// CHECK-NEXT: return %[[READ_VALUE]], %[[READ_VALUE2]], %[[IF]] : i32, i32, i32
	%2 = "test.op_with_memread"() : () -> (i32)
	return %0, %2, %1 : i32, i32, i32
	}