| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -global-isel -new-reg-bank-select < %s | FileCheck %s --check-prefix=HAWAII |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12 |
| |
| define void @icmp_i16_uniform(i16 inreg %a, i16 inreg %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_i16_uniform: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: s_and_b32 s4, s16, 0xffff |
| ; HAWAII-NEXT: s_and_b32 s5, s17, 0xffff |
| ; HAWAII-NEXT: s_cmp_eq_u32 s4, s5 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, s5 |
| ; HAWAII-NEXT: s_sext_i32_i16 s6, s16 |
| ; HAWAII-NEXT: s_sext_i32_i16 s9, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lt_i32 s6, s9 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_gt_i32 s6, s9 |
| ; HAWAII-NEXT: s_cselect_b32 s11, 1, 0 |
| ; HAWAII-NEXT: s_cmp_le_i32 s6, s9 |
| ; HAWAII-NEXT: s_cselect_b32 s12, 1, 0 |
| ; HAWAII-NEXT: s_cmp_ge_i32 s6, s9 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lt_u32 s4, s5 |
| ; HAWAII-NEXT: s_cselect_b32 s13, 1, 0 |
| ; HAWAII-NEXT: s_cmp_gt_u32 s4, s5 |
| ; HAWAII-NEXT: s_cselect_b32 s14, 1, 0 |
| ; HAWAII-NEXT: s_cmp_le_u32 s4, s5 |
| ; HAWAII-NEXT: s_cselect_b32 s15, 1, 0 |
| ; HAWAII-NEXT: s_cmp_ge_u32 s4, s5 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s7, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s8, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s10, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s11, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s12, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s11, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s9, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s13, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s12, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s14, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s13, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s15, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s14, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s7 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s8 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s10 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s11 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s9 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s12 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s13 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s14 |
| ; HAWAII-NEXT: s_add_i32 s4, s5, s4 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s4 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_i16_uniform: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_and_b32 s2, 0xffff, s0 |
| ; GFX12-NEXT: s_and_b32 s3, 0xffff, s1 |
| ; GFX12-NEXT: s_sext_i32_i16 s0, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_eq_u32 s2, s3 |
| ; GFX12-NEXT: s_sext_i32_i16 s1, s1 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s2, s3 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lt_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_gt_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 |
| ; GFX12-NEXT: s_cmp_le_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 |
| ; GFX12-NEXT: s_cmp_ge_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_cmp_lt_u32 s2, s3 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_gt_u32 s2, s3 |
| ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 |
| ; GFX12-NEXT: s_cmp_le_u32 s2, s3 |
| ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 |
| ; GFX12-NEXT: s_cmp_ge_u32 s2, s3 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 |
| ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s1, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s9, 0 |
| ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s10, 0 |
| ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_add_co_i32 s3, s3, s4 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s3, s3, s5 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s3, s3, s6 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s3, s3, s7 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s3, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s0, s1 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s0, s8 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s0, s9 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s0, s2 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX12-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq i16 %a, %b |
| %ne_result = icmp ne i16 %a, %b |
| %slt_result = icmp slt i16 %a, %b |
| %sgt_result = icmp sgt i16 %a, %b |
| %sle_result = icmp sle i16 %a, %b |
| %sge_result = icmp sge i16 %a, %b |
| %ult_result = icmp ult i16 %a, %b |
| %ugt_result = icmp ugt i16 %a, %b |
| %ule_result = icmp ule i16 %a, %b |
| %uge_result = icmp uge i16 %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %slt_zext = zext i1 %slt_result to i32 |
| %sgt_zext = zext i1 %sgt_result to i32 |
| %sle_zext = zext i1 %sle_result to i32 |
| %sge_zext = zext i1 %sge_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %slt_zext |
| %sum3 = add i32 %sum2, %sgt_zext |
| %sum4 = add i32 %sum3, %sle_zext |
| %sum5 = add i32 %sum4, %sge_zext |
| %sum6 = add i32 %sum5, %ult_zext |
| %sum7 = add i32 %sum6, %ugt_zext |
| %sum8 = add i32 %sum7, %ule_zext |
| %result = add i32 %sum8, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_i16_divergent(i16 %a, i16 %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_i16_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_and_b32_e32 v4, 0xffff, v0 |
| ; HAWAII-NEXT: v_and_b32_e32 v5, 0xffff, v1 |
| ; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 |
| ; HAWAII-NEXT: v_bfe_i32 v0, v0, 0, 16 |
| ; HAWAII-NEXT: v_bfe_i32 v1, v1, 0, 16 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v4, v5 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v4, v5 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v4, v5 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v4, v5 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v4, v5 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v6, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v5, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v5, v9 |
| ; HAWAII-NEXT: v_add_i32_e32 v5, vcc, v5, v10 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v5, v0 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v1 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v11 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v12 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v0, v4 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_i16_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_i16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_i16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_i16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_i16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u16_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq i16 %a, %b |
| %ne_result = icmp ne i16 %a, %b |
| %slt_result = icmp slt i16 %a, %b |
| %sgt_result = icmp sgt i16 %a, %b |
| %sle_result = icmp sle i16 %a, %b |
| %sge_result = icmp sge i16 %a, %b |
| %ult_result = icmp ult i16 %a, %b |
| %ugt_result = icmp ugt i16 %a, %b |
| %ule_result = icmp ule i16 %a, %b |
| %uge_result = icmp uge i16 %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %slt_zext = zext i1 %slt_result to i32 |
| %sgt_zext = zext i1 %sgt_result to i32 |
| %sle_zext = zext i1 %sle_result to i32 |
| %sge_zext = zext i1 %sge_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %slt_zext |
| %sum3 = add i32 %sum2, %sgt_zext |
| %sum4 = add i32 %sum3, %sle_zext |
| %sum5 = add i32 %sum4, %sge_zext |
| %sum6 = add i32 %sum5, %ult_zext |
| %sum7 = add i32 %sum6, %ugt_zext |
| %sum8 = add i32 %sum7, %ule_zext |
| %result = add i32 %sum8, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_i32_uniform(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_i32_uniform: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: s_cmp_eq_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lt_i32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_gt_i32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_le_i32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_ge_i32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lt_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s11, 1, 0 |
| ; HAWAII-NEXT: s_cmp_gt_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s12, 1, 0 |
| ; HAWAII-NEXT: s_cmp_le_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s13, 1, 0 |
| ; HAWAII-NEXT: s_cmp_ge_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s14, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s5, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s7, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s8, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s9, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s10, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s11, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s11, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s12, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s12, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s13, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s13, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s14, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s14, 1, 0 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s5 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s7 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s8 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s9 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s10 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s11 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s12 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s13 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s14 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s4 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_i32_uniform: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_cmp_eq_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lt_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_gt_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_le_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_ge_i32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 |
| ; GFX12-NEXT: s_cmp_lt_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 |
| ; GFX12-NEXT: s_cmp_gt_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 |
| ; GFX12-NEXT: s_cmp_le_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s10, 1, 0 |
| ; GFX12-NEXT: s_cmp_ge_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s3, 0 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 |
| ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s9, 0 |
| ; GFX12-NEXT: s_cselect_b32 s8, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s10, 0 |
| ; GFX12-NEXT: s_cselect_b32 s9, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s2 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s3 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s4 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s5 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s6 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s7 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s8 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s9 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX12-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq i32 %a, %b |
| %ne_result = icmp ne i32 %a, %b |
| %slt_result = icmp slt i32 %a, %b |
| %sgt_result = icmp sgt i32 %a, %b |
| %sle_result = icmp sle i32 %a, %b |
| %sge_result = icmp sge i32 %a, %b |
| %ult_result = icmp ult i32 %a, %b |
| %ugt_result = icmp ugt i32 %a, %b |
| %ule_result = icmp ule i32 %a, %b |
| %uge_result = icmp uge i32 %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %slt_zext = zext i1 %slt_result to i32 |
| %sgt_zext = zext i1 %sgt_result to i32 |
| %sle_zext = zext i1 %sle_result to i32 |
| %sge_zext = zext i1 %sge_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %slt_zext |
| %sum3 = add i32 %sum2, %sgt_zext |
| %sum4 = add i32 %sum3, %sle_zext |
| %sum5 = add i32 %sum4, %sge_zext |
| %sum6 = add i32 %sum5, %ult_zext |
| %sum7 = add i32 %sum6, %ugt_zext |
| %sum8 = add i32 %sum7, %ule_zext |
| %result = add i32 %sum8, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_i32_divergent(i32 %a, i32 %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_i32_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v4, v5 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v6 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v11 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v12 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_i32_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_i32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add3_u32 v4, v4, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add3_u32 v4, v4, v5, v8 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq i32 %a, %b |
| %ne_result = icmp ne i32 %a, %b |
| %slt_result = icmp slt i32 %a, %b |
| %sgt_result = icmp sgt i32 %a, %b |
| %sle_result = icmp sle i32 %a, %b |
| %sge_result = icmp sge i32 %a, %b |
| %ult_result = icmp ult i32 %a, %b |
| %ugt_result = icmp ugt i32 %a, %b |
| %ule_result = icmp ule i32 %a, %b |
| %uge_result = icmp uge i32 %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %slt_zext = zext i1 %slt_result to i32 |
| %sgt_zext = zext i1 %sgt_result to i32 |
| %sle_zext = zext i1 %sle_result to i32 |
| %sge_zext = zext i1 %sge_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %slt_zext |
| %sum3 = add i32 %sum2, %sgt_zext |
| %sum4 = add i32 %sum3, %sle_zext |
| %sum5 = add i32 %sum4, %sge_zext |
| %sum6 = add i32 %sum5, %ult_zext |
| %sum7 = add i32 %sum6, %ugt_zext |
| %sum8 = add i32 %sum7, %ule_zext |
| %result = add i32 %sum8, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_i64_divergent(i64 %a, i64 %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_i64_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_i64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_i64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v6, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v11 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v12 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v13 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v14 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_i64_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add3_u32 v6, v6, v8, v9 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add3_u32 v6, v6, v7, v10 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0 |
| ; GFX12-NEXT: global_store_b32 v[4:5], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq i64 %a, %b |
| %ne_result = icmp ne i64 %a, %b |
| %slt_result = icmp slt i64 %a, %b |
| %sgt_result = icmp sgt i64 %a, %b |
| %sle_result = icmp sle i64 %a, %b |
| %sge_result = icmp sge i64 %a, %b |
| %ult_result = icmp ult i64 %a, %b |
| %ugt_result = icmp ugt i64 %a, %b |
| %ule_result = icmp ule i64 %a, %b |
| %uge_result = icmp uge i64 %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %slt_zext = zext i1 %slt_result to i32 |
| %sgt_zext = zext i1 %sgt_result to i32 |
| %sle_zext = zext i1 %sle_result to i32 |
| %sge_zext = zext i1 %sge_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %slt_zext |
| %sum3 = add i32 %sum2, %sgt_zext |
| %sum4 = add i32 %sum3, %sle_zext |
| %sum5 = add i32 %sum4, %sge_zext |
| %sum6 = add i32 %sum5, %ult_zext |
| %sum7 = add i32 %sum6, %ugt_zext |
| %sum8 = add i32 %sum7, %ule_zext |
| %result = add i32 %sum8, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| ; 32-bit pointer tests |
| define void @icmp_p3_uniform(ptr addrspace(3) inreg %a, ptr addrspace(3) inreg %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p3_uniform: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: s_cmp_eq_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lt_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_gt_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_le_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_ge_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s5, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s7, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s8, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s9, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s10, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s5 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s7 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s8 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s9 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s10 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s4 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p3_uniform: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_cmp_eq_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lt_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_gt_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_le_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_ge_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s3, 0 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s2 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s3 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s4 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s5 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX12-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr addrspace(3) %a, %b |
| %ne_result = icmp ne ptr addrspace(3) %a, %b |
| %ult_result = icmp ult ptr addrspace(3) %a, %b |
| %ugt_result = icmp ugt ptr addrspace(3) %a, %b |
| %ule_result = icmp ule ptr addrspace(3) %a, %b |
| %uge_result = icmp uge ptr addrspace(3) %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_p3_divergent(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p3_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v4, v5 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v6 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p3_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr addrspace(3) %a, %b |
| %ne_result = icmp ne ptr addrspace(3) %a, %b |
| %ult_result = icmp ult ptr addrspace(3) %a, %b |
| %ugt_result = icmp ugt ptr addrspace(3) %a, %b |
| %ule_result = icmp ule ptr addrspace(3) %a, %b |
| %uge_result = icmp uge ptr addrspace(3) %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_p5_uniform(ptr addrspace(5) inreg %a, ptr addrspace(5) inreg %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p5_uniform: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: s_cmp_eq_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lt_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_gt_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_le_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_ge_u32 s16, s17 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s5, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s7, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s8, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s9, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s10, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s5 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s7 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s8 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s9 |
| ; HAWAII-NEXT: s_add_i32 s4, s4, s10 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s4 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p5_uniform: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_cmp_eq_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lt_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_gt_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_le_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_ge_u32 s0, s1 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s3, 0 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s2 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s3 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s4 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s1, s5 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX12-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr addrspace(5) %a, %b |
| %ne_result = icmp ne ptr addrspace(5) %a, %b |
| %ult_result = icmp ult ptr addrspace(5) %a, %b |
| %ugt_result = icmp ugt ptr addrspace(5) %a, %b |
| %ule_result = icmp ule ptr addrspace(5) %a, %b |
| %uge_result = icmp uge ptr addrspace(5) %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_p5_divergent(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p5_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v4, v5 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v6 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p5_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v4, v4, v5 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v1 |
| ; GFX12-NEXT: v_add3_u32 v1, v4, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v5, v0 |
| ; GFX12-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr addrspace(5) %a, %b |
| %ne_result = icmp ne ptr addrspace(5) %a, %b |
| %ult_result = icmp ult ptr addrspace(5) %a, %b |
| %ugt_result = icmp ugt ptr addrspace(5) %a, %b |
| %ule_result = icmp ule ptr addrspace(5) %a, %b |
| %uge_result = icmp uge ptr addrspace(5) %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| ; 64-bit pointer tests |
| |
| define void @icmp_p0_uniform(ptr inreg %a, ptr inreg %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p0_uniform: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s18 |
| ; HAWAII-NEXT: v_mov_b32_e32 v3, s19 |
| ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s11, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s7, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s8, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s9, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s10, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s11, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s7 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s8 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s9 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s10 |
| ; HAWAII-NEXT: s_add_i32 s4, s5, s4 |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s4 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p0_uniform: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_lt_u64_e64 s5, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cmp_eq_u64 s[0:1], s[2:3] |
| ; GFX12-NEXT: v_cmp_gt_u64_e64 s7, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u64 s[0:1], s[2:3] |
| ; GFX12-NEXT: v_cmp_le_u64_e64 s8, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: v_cmp_ge_u64_e64 s0, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 |
| ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s1, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s2, s2, s3 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s2, s2, s4 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s2, s2, s5 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s2, s1 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX12-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr %a, %b |
| %ne_result = icmp ne ptr %a, %b |
| %ult_result = icmp ult ptr %a, %b |
| %ugt_result = icmp ugt ptr %a, %b |
| %ule_result = icmp ule ptr %a, %b |
| %uge_result = icmp uge ptr %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_p0_divergent(ptr %a, ptr %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p0_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v6, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p0_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0 |
| ; GFX12-NEXT: global_store_b32 v[4:5], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr %a, %b |
| %ne_result = icmp ne ptr %a, %b |
| %ult_result = icmp ult ptr %a, %b |
| %ugt_result = icmp ugt ptr %a, %b |
| %ule_result = icmp ule ptr %a, %b |
| %uge_result = icmp uge ptr %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_p1_uniform(ptr addrspace(1) inreg %a, ptr addrspace(1) inreg %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p1_uniform: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s18 |
| ; HAWAII-NEXT: v_mov_b32_e32 v3, s19 |
| ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, s[16:17], v[2:3] |
| ; HAWAII-NEXT: s_cselect_b32 s11, 1, 0 |
| ; HAWAII-NEXT: s_or_b64 s[4:5], vcc, vcc |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s7, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s5, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s8, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s7, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s9, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s8, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s10, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s9, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s11, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s10, 1, 0 |
| ; HAWAII-NEXT: s_cmp_lg_u32 s4, 0 |
| ; HAWAII-NEXT: s_cselect_b32 s4, 1, 0 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s7 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s8 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s9 |
| ; HAWAII-NEXT: s_add_i32 s5, s5, s10 |
| ; HAWAII-NEXT: s_add_i32 s4, s5, s4 |
| ; HAWAII-NEXT: v_mov_b32_e32 v2, s4 |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p1_uniform: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_lt_u64_e64 s5, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cmp_eq_u64 s[0:1], s[2:3] |
| ; GFX12-NEXT: v_cmp_gt_u64_e64 s7, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u64 s[0:1], s[2:3] |
| ; GFX12-NEXT: v_cmp_le_u64_e64 s8, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cselect_b32 s6, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: v_cmp_ge_u64_e64 s0, s[0:1], s[2:3] |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 |
| ; GFX12-NEXT: s_cselect_b32 s7, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s8, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX12-NEXT: s_cselect_b32 s2, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s6, 0 |
| ; GFX12-NEXT: s_cselect_b32 s3, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s5, 0 |
| ; GFX12-NEXT: s_cselect_b32 s4, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s7, 0 |
| ; GFX12-NEXT: s_cselect_b32 s5, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s1, 0 |
| ; GFX12-NEXT: s_cselect_b32 s1, 1, 0 |
| ; GFX12-NEXT: s_cmp_lg_u32 s0, 0 |
| ; GFX12-NEXT: s_cselect_b32 s0, 1, 0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s2, s2, s3 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s2, s2, s4 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s2, s2, s5 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s1, s2, s1 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: s_add_co_i32 s0, s1, s0 |
| ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) |
| ; GFX12-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX12-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr addrspace(1) %a, %b |
| %ne_result = icmp ne ptr addrspace(1) %a, %b |
| %ult_result = icmp ult ptr addrspace(1) %a, %b |
| %ugt_result = icmp ugt ptr addrspace(1) %a, %b |
| %ule_result = icmp ule ptr addrspace(1) %a, %b |
| %uge_result = icmp uge ptr addrspace(1) %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |
| |
| define void @icmp_p1_divergent(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %p) { |
| ; HAWAII-LABEL: icmp_p1_divergent: |
| ; HAWAII: ; %bb.0: |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s6, 0 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ne_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b32 s7, 0xf000 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: s_mov_b64 s[4:5], 0 |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_le_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc |
| ; HAWAII-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3] |
| ; HAWAII-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v6, v7 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v8 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v9 |
| ; HAWAII-NEXT: v_add_i32_e32 v1, vcc, v1, v10 |
| ; HAWAII-NEXT: v_add_i32_e32 v0, vcc, v1, v0 |
| ; HAWAII-NEXT: buffer_store_dword v0, v[4:5], s[4:7], 0 addr64 |
| ; HAWAII-NEXT: s_waitcnt vmcnt(0) |
| ; HAWAII-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: icmp_p1_divergent: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX12-NEXT: v_add_nc_u32_e32 v6, v6, v7 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc_lo |
| ; GFX12-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX12-NEXT: v_add3_u32 v1, v6, v8, v9 |
| ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) |
| ; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_add3_u32 v0, v1, v7, v0 |
| ; GFX12-NEXT: global_store_b32 v[4:5], v0, off |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %eq_result = icmp eq ptr addrspace(1) %a, %b |
| %ne_result = icmp ne ptr addrspace(1) %a, %b |
| %ult_result = icmp ult ptr addrspace(1) %a, %b |
| %ugt_result = icmp ugt ptr addrspace(1) %a, %b |
| %ule_result = icmp ule ptr addrspace(1) %a, %b |
| %uge_result = icmp uge ptr addrspace(1) %a, %b |
| %eq_zext = zext i1 %eq_result to i32 |
| %ne_zext = zext i1 %ne_result to i32 |
| %ult_zext = zext i1 %ult_result to i32 |
| %ugt_zext = zext i1 %ugt_result to i32 |
| %ule_zext = zext i1 %ule_result to i32 |
| %uge_zext = zext i1 %uge_result to i32 |
| %sum1 = add i32 %eq_zext, %ne_zext |
| %sum2 = add i32 %sum1, %ult_zext |
| %sum3 = add i32 %sum2, %ugt_zext |
| %sum4 = add i32 %sum3, %ule_zext |
| %result = add i32 %sum4, %uge_zext |
| store i32 %result, ptr addrspace(1) %p |
| ret void |
| } |