[StructurizeCFG] Clean up some boolean not instructions
In some cases StructurizeCFG inserts i1 xor instructions to invert
predicates. Add a quick loop to clean these up afterwards if we can get
away with modifying an existing compare instruction instead.
(StructurizeCFG is generally run late in the pipeline so instcombine
does not clean them up for us.)
Differential Revision: https://reviews.llvm.org/D118623
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index 6a2b7ca..15384e6 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -16,22 +16,22 @@
; SI-NEXT: s_load_dword s0, s[0:1], 0xf
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s8, 0
-; SI-NEXT: s_cbranch_scc0 .LBB0_2
+; SI-NEXT: s_cbranch_scc0 .LBB0_4
; SI-NEXT: ; %bb.1: ; %else
; SI-NEXT: s_add_i32 s2, s11, s0
-; SI-NEXT: s_cbranch_execz .LBB0_3
-; SI-NEXT: s_branch .LBB0_4
-; SI-NEXT: .LBB0_2:
-; SI-NEXT: ; implicit-def: $sgpr2
-; SI-NEXT: .LBB0_3: ; %if
+; SI-NEXT: s_cbranch_execnz .LBB0_3
+; SI-NEXT: .LBB0_2: ; %if
; SI-NEXT: s_sub_i32 s2, s9, s10
-; SI-NEXT: .LBB0_4: ; %endif
+; SI-NEXT: .LBB0_3: ; %endif
; SI-NEXT: s_add_i32 s0, s2, s8
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
+; SI-NEXT: .LBB0_4:
+; SI-NEXT: ; implicit-def: $sgpr2
+; SI-NEXT: s_branch .LBB0_2
entry:
%0 = icmp eq i32 %a, 0
@@ -59,28 +59,28 @@
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s6, 0
-; SI-NEXT: s_cbranch_scc0 .LBB1_2
+; SI-NEXT: s_cbranch_scc0 .LBB1_4
; SI-NEXT: ; %bb.1: ; %else
; SI-NEXT: s_load_dword s2, s[0:1], 0x2e
; SI-NEXT: s_load_dword s3, s[0:1], 0x37
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_add_i32 s7, s2, s3
-; SI-NEXT: s_cbranch_execz .LBB1_3
-; SI-NEXT: s_branch .LBB1_4
-; SI-NEXT: .LBB1_2:
-; SI-NEXT: ; implicit-def: $sgpr7
-; SI-NEXT: .LBB1_3: ; %if
+; SI-NEXT: s_cbranch_execnz .LBB1_3
+; SI-NEXT: .LBB1_2: ; %if
; SI-NEXT: s_load_dword s2, s[0:1], 0x1c
; SI-NEXT: s_load_dword s0, s[0:1], 0x25
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_add_i32 s7, s2, s0
-; SI-NEXT: .LBB1_4: ; %endif
+; SI-NEXT: .LBB1_3: ; %endif
; SI-NEXT: s_add_i32 s0, s7, s6
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
+; SI-NEXT: .LBB1_4:
+; SI-NEXT: ; implicit-def: $sgpr7
+; SI-NEXT: s_branch .LBB1_2
entry:
%cmp0 = icmp eq i32 %a, 0