blob: 1b4417890004ed0a041e3b7de0bcd51f22fda3ff [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -global-isel -new-reg-bank-select < %s | FileCheck %s --check-prefix=HAWAII
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12
define void @icmp_i16_uniform(i16 inreg %a, i16 inreg %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_i16_uniform:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: s_and_b32 s4, s16, 0xffff
; HAWAII-NEXT: s_and_b32 s5, s17, 0xffff
; HAWAII-NEXT: s_cmp_eq_u32 s4, s5
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, s5
; HAWAII-NEXT: s_sext_i32_i16 s6, s16
; HAWAII-NEXT: s_sext_i32_i16 s9, s17
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lt_i32 s6, s9
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_gt_i32 s6, s9
; HAWAII-NEXT: s_cselect_b32 s11, 1, 0
; HAWAII-NEXT: s_cmp_le_i32 s6, s9
; HAWAII-NEXT: s_cselect_b32 s12, 1, 0
; HAWAII-NEXT: s_cmp_ge_i32 s6, s9
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lt_u32 s4, s5
; HAWAII-NEXT: s_cselect_b32 s13, 1, 0
; HAWAII-NEXT: s_cmp_gt_u32 s4, s5
; HAWAII-NEXT: s_cselect_b32 s14, 1, 0
; HAWAII-NEXT: s_cmp_le_u32 s4, s5
; HAWAII-NEXT: s_cselect_b32 s15, 1, 0
; HAWAII-NEXT: s_cmp_ge_u32 s4, s5
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s7, 0
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s8, 0
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s10, 0
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s11, 0
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s12, 0
; HAWAII-NEXT: s_cselect_b32 s11, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s9, 0
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s13, 0
; HAWAII-NEXT: s_cselect_b32 s12, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s14, 0
; HAWAII-NEXT: s_cselect_b32 s13, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s15, 0
; HAWAII-NEXT: s_cselect_b32 s14, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, 0
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_add_i32 s5, s5, s7
; HAWAII-NEXT: s_add_i32 s5, s5, s8
; HAWAII-NEXT: s_add_i32 s5, s5, s10
; HAWAII-NEXT: s_add_i32 s5, s5, s11
; HAWAII-NEXT: s_add_i32 s5, s5, s9
; HAWAII-NEXT: s_add_i32 s5, s5, s12
; HAWAII-NEXT: s_add_i32 s5, s5, s13
; HAWAII-NEXT: s_add_i32 s5, s5, s14
; HAWAII-NEXT: s_add_i32 s4, s5, s4
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_mov_b32_e32 v2, s4
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_i16_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_and_b32 s2, 0xffff, s0
; GFX12-NEXT: s_and_b32 s3, 0xffff, s1
; GFX12-NEXT: s_sext_i32_i16 s0, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_eq_u32 s2, s3
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s2, s3
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lt_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_gt_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s7, 1, 0
; GFX12-NEXT: s_cmp_le_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s8, 1, 0
; GFX12-NEXT: s_cmp_ge_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_cmp_lt_u32 s2, s3
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_gt_u32 s2, s3
; GFX12-NEXT: s_cselect_b32 s9, 1, 0
; GFX12-NEXT: s_cmp_le_u32 s2, s3
; GFX12-NEXT: s_cselect_b32 s10, 1, 0
; GFX12-NEXT: s_cmp_ge_u32 s2, s3
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s4, 0
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lg_u32 s6, 0
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s7, 0
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s8, 0
; GFX12-NEXT: s_cselect_b32 s7, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s1, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s9, 0
; GFX12-NEXT: s_cselect_b32 s8, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s10, 0
; GFX12-NEXT: s_cselect_b32 s9, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_add_co_i32 s3, s3, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s3, s3, s5
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s3, s3, s6
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s3, s3, s7
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s3, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s0, s8
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s0, s9
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s0, s2
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq i16 %a, %b
%ne_result = icmp ne i16 %a, %b
%slt_result = icmp slt i16 %a, %b
%sgt_result = icmp sgt i16 %a, %b
%sle_result = icmp sle i16 %a, %b
%sge_result = icmp sge i16 %a, %b
%ult_result = icmp ult i16 %a, %b
%ugt_result = icmp ugt i16 %a, %b
%ule_result = icmp ule i16 %a, %b
%uge_result = icmp uge i16 %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%slt_zext = zext i1 %slt_result to i32
%sgt_zext = zext i1 %sgt_result to i32
%sle_zext = zext i1 %sle_result to i32
%sge_zext = zext i1 %sge_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %slt_zext
%sum3 = add i32 %sum2, %sgt_zext
%sum4 = add i32 %sum3, %sle_zext
%sum5 = add i32 %sum4, %sge_zext
%sum6 = add i32 %sum5, %ult_zext
%sum7 = add i32 %sum6, %ugt_zext
%sum8 = add i32 %sum7, %ule_zext
%result = add i32 %sum8, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_i16_divergent(i16 %a, i16 %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_i16_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_and_b32_e32 v4, 0xffff, v0
; HAWAII-NEXT: v_and_b32_e32 v5, 0xffff, v1
; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5
; HAWAII-NEXT: v_bfe_i32 v0, v0, 0, 16
; HAWAII-NEXT: v_bfe_i32 v1, v1, 0, 16
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v4, v5
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v4, v5
; HAWAII-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v4, v5
; HAWAII-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v4, v5
; HAWAII-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v4, v5
; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v6, v7
; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v5, v10
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v5, v0
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v11
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v12
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_i16_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_i16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_i16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_i16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_i16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u16_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u16_e32 vcc_lo, v0, v1
; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq i16 %a, %b
%ne_result = icmp ne i16 %a, %b
%slt_result = icmp slt i16 %a, %b
%sgt_result = icmp sgt i16 %a, %b
%sle_result = icmp sle i16 %a, %b
%sge_result = icmp sge i16 %a, %b
%ult_result = icmp ult i16 %a, %b
%ugt_result = icmp ugt i16 %a, %b
%ule_result = icmp ule i16 %a, %b
%uge_result = icmp uge i16 %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%slt_zext = zext i1 %slt_result to i32
%sgt_zext = zext i1 %sgt_result to i32
%sle_zext = zext i1 %sle_result to i32
%sge_zext = zext i1 %sge_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %slt_zext
%sum3 = add i32 %sum2, %sgt_zext
%sum4 = add i32 %sum3, %sle_zext
%sum5 = add i32 %sum4, %sge_zext
%sum6 = add i32 %sum5, %ult_zext
%sum7 = add i32 %sum6, %ugt_zext
%sum8 = add i32 %sum7, %ule_zext
%result = add i32 %sum8, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_i32_uniform(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_i32_uniform:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: s_cmp_eq_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lt_i32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_gt_i32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_le_i32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_ge_i32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lt_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s11, 1, 0
; HAWAII-NEXT: s_cmp_gt_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s12, 1, 0
; HAWAII-NEXT: s_cmp_le_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s13, 1, 0
; HAWAII-NEXT: s_cmp_ge_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s14, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, 0
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s5, 0
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s7, 0
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s8, 0
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s9, 0
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s10, 0
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s11, 0
; HAWAII-NEXT: s_cselect_b32 s11, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s12, 0
; HAWAII-NEXT: s_cselect_b32 s12, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s13, 0
; HAWAII-NEXT: s_cselect_b32 s13, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s14, 0
; HAWAII-NEXT: s_cselect_b32 s14, 1, 0
; HAWAII-NEXT: s_add_i32 s4, s4, s5
; HAWAII-NEXT: s_add_i32 s4, s4, s7
; HAWAII-NEXT: s_add_i32 s4, s4, s8
; HAWAII-NEXT: s_add_i32 s4, s4, s9
; HAWAII-NEXT: s_add_i32 s4, s4, s10
; HAWAII-NEXT: s_add_i32 s4, s4, s11
; HAWAII-NEXT: s_add_i32 s4, s4, s12
; HAWAII-NEXT: s_add_i32 s4, s4, s13
; HAWAII-NEXT: s_add_i32 s4, s4, s14
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_mov_b32_e32 v2, s4
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_i32_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_cmp_eq_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lt_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_gt_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_le_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_ge_i32 s0, s1
; GFX12-NEXT: s_cselect_b32 s7, 1, 0
; GFX12-NEXT: s_cmp_lt_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s8, 1, 0
; GFX12-NEXT: s_cmp_gt_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s9, 1, 0
; GFX12-NEXT: s_cmp_le_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s10, 1, 0
; GFX12-NEXT: s_cmp_ge_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s3, 0
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s4, 0
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s6, 0
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s7, 0
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s8, 0
; GFX12-NEXT: s_cselect_b32 s7, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s9, 0
; GFX12-NEXT: s_cselect_b32 s8, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s10, 0
; GFX12-NEXT: s_cselect_b32 s9, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s2
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s3
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s5
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s6
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s7
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s8
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s9
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s1, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq i32 %a, %b
%ne_result = icmp ne i32 %a, %b
%slt_result = icmp slt i32 %a, %b
%sgt_result = icmp sgt i32 %a, %b
%sle_result = icmp sle i32 %a, %b
%sge_result = icmp sge i32 %a, %b
%ult_result = icmp ult i32 %a, %b
%ugt_result = icmp ugt i32 %a, %b
%ule_result = icmp ule i32 %a, %b
%uge_result = icmp uge i32 %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%slt_zext = zext i1 %slt_result to i32
%sgt_zext = zext i1 %sgt_result to i32
%sle_zext = zext i1 %sle_result to i32
%sge_zext = zext i1 %sge_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %slt_zext
%sum3 = add i32 %sum2, %sgt_zext
%sum4 = add i32 %sum3, %sle_zext
%sum5 = add i32 %sum4, %sge_zext
%sum6 = add i32 %sum5, %ult_zext
%sum7 = add i32 %sum6, %ugt_zext
%sum8 = add i32 %sum7, %ule_zext
%result = add i32 %sum8, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_i32_divergent(i32 %a, i32 %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_i32_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v4, v5
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v7
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v12
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_i32_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_i32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq i32 %a, %b
%ne_result = icmp ne i32 %a, %b
%slt_result = icmp slt i32 %a, %b
%sgt_result = icmp sgt i32 %a, %b
%sle_result = icmp sle i32 %a, %b
%sge_result = icmp sge i32 %a, %b
%ult_result = icmp ult i32 %a, %b
%ugt_result = icmp ugt i32 %a, %b
%ule_result = icmp ule i32 %a, %b
%uge_result = icmp uge i32 %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%slt_zext = zext i1 %slt_result to i32
%sgt_zext = zext i1 %sgt_result to i32
%sle_zext = zext i1 %sle_result to i32
%sge_zext = zext i1 %sge_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %slt_zext
%sum3 = add i32 %sum2, %sgt_zext
%sum4 = add i32 %sum3, %sle_zext
%sum5 = add i32 %sum4, %sge_zext
%sum6 = add i32 %sum5, %ult_zext
%sum7 = add i32 %sum6, %ugt_zext
%sum8 = add i32 %sum7, %ule_zext
%result = add i32 %sum8, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_i64_divergent(i64 %a, i64 %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_i64_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_i64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_i64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v6, v7
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v11
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v12
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v13
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v14
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; HAWAII-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_i64_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add3_u32 v6, v6, v8, v9
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_i64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add3_u32 v6, v6, v7, v10
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0
; GFX12-NEXT: global_store_b32 v[4:5], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq i64 %a, %b
%ne_result = icmp ne i64 %a, %b
%slt_result = icmp slt i64 %a, %b
%sgt_result = icmp sgt i64 %a, %b
%sle_result = icmp sle i64 %a, %b
%sge_result = icmp sge i64 %a, %b
%ult_result = icmp ult i64 %a, %b
%ugt_result = icmp ugt i64 %a, %b
%ule_result = icmp ule i64 %a, %b
%uge_result = icmp uge i64 %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%slt_zext = zext i1 %slt_result to i32
%sgt_zext = zext i1 %sgt_result to i32
%sle_zext = zext i1 %sle_result to i32
%sge_zext = zext i1 %sge_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %slt_zext
%sum3 = add i32 %sum2, %sgt_zext
%sum4 = add i32 %sum3, %sle_zext
%sum5 = add i32 %sum4, %sge_zext
%sum6 = add i32 %sum5, %ult_zext
%sum7 = add i32 %sum6, %ugt_zext
%sum8 = add i32 %sum7, %ule_zext
%result = add i32 %sum8, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
; 32-bit pointer tests
define void @icmp_p3_uniform(ptr addrspace(3) inreg %a, ptr addrspace(3) inreg %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p3_uniform:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: s_cmp_eq_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lt_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_gt_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_le_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_ge_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, 0
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s5, 0
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s7, 0
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s8, 0
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s9, 0
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s10, 0
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_add_i32 s4, s4, s5
; HAWAII-NEXT: s_add_i32 s4, s4, s7
; HAWAII-NEXT: s_add_i32 s4, s4, s8
; HAWAII-NEXT: s_add_i32 s4, s4, s9
; HAWAII-NEXT: s_add_i32 s4, s4, s10
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_mov_b32_e32 v2, s4
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p3_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_cmp_eq_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lt_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_gt_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_le_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_ge_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s3, 0
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s4, 0
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s6, 0
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s2
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s3
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s5
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s1, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr addrspace(3) %a, %b
%ne_result = icmp ne ptr addrspace(3) %a, %b
%ult_result = icmp ult ptr addrspace(3) %a, %b
%ugt_result = icmp ugt ptr addrspace(3) %a, %b
%ule_result = icmp ule ptr addrspace(3) %a, %b
%uge_result = icmp uge ptr addrspace(3) %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_p3_divergent(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p3_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v4, v5
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v7
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p3_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr addrspace(3) %a, %b
%ne_result = icmp ne ptr addrspace(3) %a, %b
%ult_result = icmp ult ptr addrspace(3) %a, %b
%ugt_result = icmp ugt ptr addrspace(3) %a, %b
%ule_result = icmp ule ptr addrspace(3) %a, %b
%uge_result = icmp uge ptr addrspace(3) %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_p5_uniform(ptr addrspace(5) inreg %a, ptr addrspace(5) inreg %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p5_uniform:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: s_cmp_eq_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lt_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_gt_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_le_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_ge_u32 s16, s17
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, 0
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s5, 0
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s7, 0
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s8, 0
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s9, 0
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s10, 0
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_add_i32 s4, s4, s5
; HAWAII-NEXT: s_add_i32 s4, s4, s7
; HAWAII-NEXT: s_add_i32 s4, s4, s8
; HAWAII-NEXT: s_add_i32 s4, s4, s9
; HAWAII-NEXT: s_add_i32 s4, s4, s10
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_mov_b32_e32 v2, s4
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p5_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_cmp_eq_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lt_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_gt_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_le_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_ge_u32 s0, s1
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lg_u32 s2, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s3, 0
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s4, 0
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s6, 0
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s2
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s3
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s1, s5
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s1, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr addrspace(5) %a, %b
%ne_result = icmp ne ptr addrspace(5) %a, %b
%ult_result = icmp ult ptr addrspace(5) %a, %b
%ugt_result = icmp ugt ptr addrspace(5) %a, %b
%ule_result = icmp ule ptr addrspace(5) %a, %b
%uge_result = icmp uge ptr addrspace(5) %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_p5_divergent(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p5_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v4, v5
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v7
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p5_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1
; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0
; GFX12-NEXT: global_store_b32 v[2:3], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr addrspace(5) %a, %b
%ne_result = icmp ne ptr addrspace(5) %a, %b
%ult_result = icmp ult ptr addrspace(5) %a, %b
%ugt_result = icmp ugt ptr addrspace(5) %a, %b
%ule_result = icmp ule ptr addrspace(5) %a, %b
%uge_result = icmp uge ptr addrspace(5) %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
; 64-bit pointer tests
define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p0_uniform:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_mov_b32_e32 v2, s18
; HAWAII-NEXT: v_mov_b32_e32 v3, s19
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s11, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s7, 0
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s8, 0
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s9, 0
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s10, 0
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s11, 0
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, 0
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_add_i32 s5, s5, s7
; HAWAII-NEXT: s_add_i32 s5, s5, s8
; HAWAII-NEXT: s_add_i32 s5, s5, s9
; HAWAII-NEXT: s_add_i32 s5, s5, s10
; HAWAII-NEXT: s_add_i32 s4, s5, s4
; HAWAII-NEXT: v_mov_b32_e32 v2, s4
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p0_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_lt_u64_e64 s5, s[0:1], s[2:3]
; GFX12-NEXT: s_cmp_eq_u64 s[0:1], s[2:3]
; GFX12-NEXT: v_cmp_gt_u64_e64 s7, s[0:1], s[2:3]
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u64 s[0:1], s[2:3]
; GFX12-NEXT: v_cmp_le_u64_e64 s8, s[0:1], s[2:3]
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: v_cmp_ge_u64_e64 s0, s[0:1], s[2:3]
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s7, 0
; GFX12-NEXT: s_cselect_b32 s7, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s8, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lg_u32 s4, 0
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s6, 0
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s7, 0
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s1, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s2, s2, s3
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s2, s2, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s2, s2, s5
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s2, s1
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s1, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr %a, %b
%ne_result = icmp ne ptr %a, %b
%ult_result = icmp ult ptr %a, %b
%ugt_result = icmp ugt ptr %a, %b
%ule_result = icmp ule ptr %a, %b
%uge_result = icmp uge ptr %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_p0_divergent(ptr %a, ptr %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p0_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v6, v7
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; HAWAII-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p0_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0
; GFX12-NEXT: global_store_b32 v[4:5], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr %a, %b
%ne_result = icmp ne ptr %a, %b
%ult_result = icmp ult ptr %a, %b
%ugt_result = icmp ugt ptr %a, %b
%ule_result = icmp ule ptr %a, %b
%uge_result = icmp uge ptr %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p1_uniform:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_mov_b32_e32 v2, s18
; HAWAII-NEXT: v_mov_b32_e32 v3, s19
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3]
; HAWAII-NEXT: s_cselect_b32 s11, 1, 0
; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s7, 0
; HAWAII-NEXT: s_cselect_b32 s5, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s8, 0
; HAWAII-NEXT: s_cselect_b32 s7, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s9, 0
; HAWAII-NEXT: s_cselect_b32 s8, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s10, 0
; HAWAII-NEXT: s_cselect_b32 s9, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s11, 0
; HAWAII-NEXT: s_cselect_b32 s10, 1, 0
; HAWAII-NEXT: s_cmp_lg_u32 s4, 0
; HAWAII-NEXT: s_cselect_b32 s4, 1, 0
; HAWAII-NEXT: s_add_i32 s5, s5, s7
; HAWAII-NEXT: s_add_i32 s5, s5, s8
; HAWAII-NEXT: s_add_i32 s5, s5, s9
; HAWAII-NEXT: s_add_i32 s5, s5, s10
; HAWAII-NEXT: s_add_i32 s4, s5, s4
; HAWAII-NEXT: v_mov_b32_e32 v2, s4
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p1_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_lt_u64_e64 s5, s[0:1], s[2:3]
; GFX12-NEXT: s_cmp_eq_u64 s[0:1], s[2:3]
; GFX12-NEXT: v_cmp_gt_u64_e64 s7, s[0:1], s[2:3]
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u64 s[0:1], s[2:3]
; GFX12-NEXT: v_cmp_le_u64_e64 s8, s[0:1], s[2:3]
; GFX12-NEXT: s_cselect_b32 s6, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: v_cmp_ge_u64_e64 s0, s[0:1], s[2:3]
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s7, 0
; GFX12-NEXT: s_cselect_b32 s7, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s8, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_cmp_lg_u32 s4, 0
; GFX12-NEXT: s_cselect_b32 s2, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s6, 0
; GFX12-NEXT: s_cselect_b32 s3, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s5, 0
; GFX12-NEXT: s_cselect_b32 s4, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s7, 0
; GFX12-NEXT: s_cselect_b32 s5, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s1, 0
; GFX12-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s2, s2, s3
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s2, s2, s4
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s2, s2, s5
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s1, s2, s1
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: s_add_co_i32 s0, s1, s0
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr addrspace(1) %a, %b
%ne_result = icmp ne ptr addrspace(1) %a, %b
%ult_result = icmp ult ptr addrspace(1) %a, %b
%ugt_result = icmp ugt ptr addrspace(1) %a, %b
%ule_result = icmp ule ptr addrspace(1) %a, %b
%uge_result = icmp uge ptr addrspace(1) %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}
define void @icmp_p1_divergent(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %p) {
; HAWAII-LABEL: icmp_p1_divergent:
; HAWAII: ; %bb.0:
; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b32 s6, 0
; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b32 s7, 0xf000
; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: s_mov_b64 s[4:5], 0
; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3]
; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v6, v7
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9
; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10
; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; HAWAII-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: icmp_p1_divergent:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo
; GFX12-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9
; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0
; GFX12-NEXT: global_store_b32 v[4:5], v0, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
%eq_result = icmp eq ptr addrspace(1) %a, %b
%ne_result = icmp ne ptr addrspace(1) %a, %b
%ult_result = icmp ult ptr addrspace(1) %a, %b
%ugt_result = icmp ugt ptr addrspace(1) %a, %b
%ule_result = icmp ule ptr addrspace(1) %a, %b
%uge_result = icmp uge ptr addrspace(1) %a, %b
%eq_zext = zext i1 %eq_result to i32
%ne_zext = zext i1 %ne_result to i32
%ult_zext = zext i1 %ult_result to i32
%ugt_zext = zext i1 %ugt_result to i32
%ule_zext = zext i1 %ule_result to i32
%uge_zext = zext i1 %uge_result to i32
%sum1 = add i32 %eq_zext, %ne_zext
%sum2 = add i32 %sum1, %ult_zext
%sum3 = add i32 %sum2, %ugt_zext
%sum4 = add i32 %sum3, %ule_zext
%result = add i32 %sum4, %uge_zext
store i32 %result, ptr addrspace(1) %p
ret void
}