blob: a2cf72a00e019e3a28b0c1726e7d711c23c99f3f [file] [edit]
// RUN: mlir-opt -int-range-optimizations %s
// Note: I wish I had a simpler example than this, but getting rid of a
// bunch of the arithmetic made the issue go away.
// CHECK-LABEL: @blocks_prematurely_declared_dead_bug
// CHECK-NOT: arith.constant true
// CHECK-COUNT-4: cf.cond_br
// CHECK: return
func.func @blocks_prematurely_declared_dead_bug(%mem: memref<?xf16>) {
%cst = arith.constant dense<false> : vector<1xi1>
%c1 = arith.constant 1 : index
%cst_0 = arith.constant dense<0.000000e+00> : vector<1xf16>
%cst_1 = arith.constant 0.000000e+00 : f16
%c16 = arith.constant 16 : index
%c0 = arith.constant 0 : index
%c64 = arith.constant 64 : index
%thread_id_x = gpu.thread_id x upper_bound 64
%6 = test.with_bounds { smin = 16 : index, smax = 112 : index, umin = 16 : index, umax = 112 : index } : index
%8 = arith.divui %6, %c16 : index
%9 = arith.muli %8, %c16 : index
cf.br ^bb1(%c0 : index)
^bb1(%12: index): // 2 preds: ^bb0, ^bb7
%13 = arith.cmpi slt, %12, %9 : index
cf.cond_br %13, ^bb2, ^bb8
^bb2: // pred: ^bb1
%14 = arith.subi %9, %12 : index
%15 = arith.minsi %14, %c64 : index
%16 = arith.subi %15, %thread_id_x : index
%17 = vector.constant_mask [1] : vector<1xi1>
%18 = arith.cmpi sgt, %16, %c0 : index
%19 = arith.select %18, %17, %cst : vector<1xi1>
%20 = vector.extract %19[0] : i1 from vector<1xi1>
%21 = vector.insert %20, %cst [0] : i1 into vector<1xi1>
%22 = arith.addi %12, %thread_id_x : index
cf.br ^bb3(%c0, %cst_0 : index, vector<1xf16>)
^bb3(%23: index, %24: vector<1xf16>): // 2 preds: ^bb2, ^bb6
%25 = arith.cmpi slt, %23, %c1 : index
cf.cond_br %25, ^bb4, ^bb7
^bb4: // pred: ^bb3
%26 = vector.extract %21[%23] : i1 from vector<1xi1>
cf.cond_br %26, ^bb5, ^bb6(%24 : vector<1xf16>)
^bb5: // pred: ^bb4
%27 = arith.addi %22, %23 : index
%28 = memref.load %mem[%27] : memref<?xf16>
%29 = vector.insert %28, %24[%23] : f16 into vector<1xf16>
cf.br ^bb6(%29 : vector<1xf16>)
^bb6(%30: vector<1xf16>): // 2 preds: ^bb4, ^bb5
%31 = arith.addi %23, %c1 : index
cf.br ^bb3(%31, %30 : index, vector<1xf16>)
^bb7: // pred: ^bb3
%37 = arith.addi %12, %c64 : index
cf.br ^bb1(%37 : index)
^bb8: // pred: ^bb1
%70 = arith.cmpi eq, %thread_id_x, %c0 : index
cf.cond_br %70, ^bb9, ^bb10
^bb9: // pred: ^bb8
memref.store %cst_1, %mem[%c0] : memref<?xf16>
cf.br ^bb10
^bb10: // 2 preds: ^bb8, ^bb9
return
}