| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s |
| ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s |
| |
| define i32 @mul_u24_s32_divergent(i32 %a, i32 %b) { |
| ; GFX9-LABEL: mul_u24_s32_divergent: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: mul_u24_s32_divergent: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call i32 @llvm.amdgcn.mul.u24.i32(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define amdgpu_ps i32 @mul_u24_s32_uniform(i32 inreg %a, i32 inreg %b) { |
| ; GFX9-LABEL: mul_u24_s32_uniform: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX9-NEXT: v_mul_u32_u24_e32 v0, s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: mul_u24_s32_uniform: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mul_u32_u24_e64 v0, s0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: ; return to shader part epilog |
| %result = call i32 @llvm.amdgcn.mul.u24.i32(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define i64 @mul_u24_s64_divergent(i32 %a, i32 %b) { |
| ; GFX9-LABEL: mul_u24_s64_divergent: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mul_hi_u32_u24_e32 v2, v0, v1 |
| ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, v2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: mul_u24_s64_divergent: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mul_u32_u24_e32 v2, v0, v1 |
| ; GFX11-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call i64 @llvm.amdgcn.mul.u24.i64(i32 %a, i32 %b) |
| ret i64 %result |
| } |
| |
| define amdgpu_ps i64 @mul_u24_s64_uniform(i32 inreg %a, i32 inreg %b) { |
| ; GFX9-LABEL: mul_u24_s64_uniform: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, s0, v0 |
| ; GFX9-NEXT: v_mul_u32_u24_e32 v0, s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s1, v1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: mul_u24_s64_uniform: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mul_u32_u24_e64 v0, s0, s1 |
| ; GFX11-NEXT: v_mul_hi_u32_u24_e64 v1, s0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 |
| ; GFX11-NEXT: ; return to shader part epilog |
| %result = call i64 @llvm.amdgcn.mul.u24.i64(i32 %a, i32 %b) |
| ret i64 %result |
| } |
| |
| define i32 @mul_i24_s32_divergent(i32 %a, i32 %b) { |
| ; GFX9-LABEL: mul_i24_s32_divergent: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: mul_i24_s32_divergent: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mul_i32_i24_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call i32 @llvm.amdgcn.mul.i24.i32(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define amdgpu_ps i32 @mul_i24_s32_uniform(i32 inreg %a, i32 inreg %b) { |
| ; GFX9-LABEL: mul_i24_s32_uniform: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX9-NEXT: v_mul_i32_i24_e32 v0, s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: mul_i24_s32_uniform: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mul_i32_i24_e64 v0, s0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: ; return to shader part epilog |
| %result = call i32 @llvm.amdgcn.mul.i24.i32(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define i64 @mul_i24_s64_divergent(i32 %a, i32 %b) { |
| ; GFX9-LABEL: mul_i24_s64_divergent: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v2, v0, v1 |
| ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, v2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: mul_i24_s64_divergent: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mul_i32_i24_e32 v2, v0, v1 |
| ; GFX11-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call i64 @llvm.amdgcn.mul.i24.i64(i32 %a, i32 %b) |
| ret i64 %result |
| } |
| |
| define amdgpu_ps i64 @mul_i24_s64_uniform(i32 inreg %a, i32 inreg %b) { |
| ; GFX9-LABEL: mul_i24_s64_uniform: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, s0, v0 |
| ; GFX9-NEXT: v_mul_i32_i24_e32 v0, s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s1, v1 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: mul_i24_s64_uniform: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mul_i32_i24_e64 v0, s0, s1 |
| ; GFX11-NEXT: v_mul_hi_i32_i24_e64 v1, s0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 |
| ; GFX11-NEXT: ; return to shader part epilog |
| %result = call i64 @llvm.amdgcn.mul.i24.i64(i32 %a, i32 %b) |
| ret i64 %result |
| } |
| |
| define i32 @mulhi_u24_s32_divergent(i32 %a, i32 %b) { |
| ; GFX9-LABEL: mulhi_u24_s32_divergent: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: mulhi_u24_s32_divergent: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define amdgpu_ps i32 @mulhi_u24_s32_uniform(i32 inreg %a, i32 inreg %b) { |
| ; GFX9-LABEL: mulhi_u24_s32_uniform: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX9-NEXT: v_mul_hi_u32_u24_e32 v0, s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: mulhi_u24_s32_uniform: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mul_hi_u32_u24_e64 v0, s0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: ; return to shader part epilog |
| %result = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define i32 @mulhi_i24_s32_divergent(i32 %a, i32 %b) { |
| ; GFX9-LABEL: mulhi_i24_s32_divergent: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: mulhi_i24_s32_divergent: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| define amdgpu_ps i32 @mulhi_i24_s32_uniform(i32 inreg %a, i32 inreg %b) { |
| ; GFX9-LABEL: mulhi_i24_s32_uniform: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v0, s0, v0 |
| ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX9-NEXT: ; return to shader part epilog |
| ; |
| ; GFX11-LABEL: mulhi_i24_s32_uniform: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: v_mul_hi_i32_i24_e64 v0, s0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: ; return to shader part epilog |
| %result = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b) |
| ret i32 %result |
| } |
| |
| declare i32 @llvm.amdgcn.mul.u24.i32(i32, i32) |
| declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32) |
| declare i32 @llvm.amdgcn.mul.i24.i32(i32, i32) |
| declare i64 @llvm.amdgcn.mul.i24.i64(i32, i32) |
| declare i32 @llvm.amdgcn.mulhi.u24(i32, i32) |
| declare i32 @llvm.amdgcn.mulhi.i24(i32, i32) |