blob: 2b984a22825fbdb880d5a057a25f5a2eb02ab245 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s
define i32 @mul_u24_s32_divergent(i32 %a, i32 %b) {
; GFX9-LABEL: mul_u24_s32_divergent:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mul_u24_s32_divergent:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.amdgcn.mul.u24.i32(i32 %a, i32 %b)
ret i32 %result
}
define amdgpu_ps i32 @mul_u24_s32_uniform(i32 inreg %a, i32 inreg %b) {
; GFX9-LABEL: mul_u24_s32_uniform:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mul_u32_u24_e32 v0, s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mul_u24_s32_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_u32_u24_e64 v0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
%result = call i32 @llvm.amdgcn.mul.u24.i32(i32 %a, i32 %b)
ret i32 %result
}
define i64 @mul_u24_s64_divergent(i32 %a, i32 %b) {
; GFX9-LABEL: mul_u24_s64_divergent:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32_u24_e32 v2, v0, v1
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mul_u24_s64_divergent:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_u32_u24_e32 v2, v0, v1
; GFX11-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i64 @llvm.amdgcn.mul.u24.i64(i32 %a, i32 %b)
ret i64 %result
}
define amdgpu_ps i64 @mul_u24_s64_uniform(i32 inreg %a, i32 inreg %b) {
; GFX9-LABEL: mul_u24_s64_uniform:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, s0, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mul_u24_s64_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_u32_u24_e64 v0, s0, s1
; GFX11-NEXT: v_mul_hi_u32_u24_e64 v1, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-NEXT: ; return to shader part epilog
%result = call i64 @llvm.amdgcn.mul.u24.i64(i32 %a, i32 %b)
ret i64 %result
}
define i32 @mul_i24_s32_divergent(i32 %a, i32 %b) {
; GFX9-LABEL: mul_i24_s32_divergent:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mul_i24_s32_divergent:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_i32_i24_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.amdgcn.mul.i24.i32(i32 %a, i32 %b)
ret i32 %result
}
define amdgpu_ps i32 @mul_i24_s32_uniform(i32 inreg %a, i32 inreg %b) {
; GFX9-LABEL: mul_i24_s32_uniform:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mul_i32_i24_e32 v0, s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mul_i24_s32_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_i32_i24_e64 v0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
%result = call i32 @llvm.amdgcn.mul.i24.i32(i32 %a, i32 %b)
ret i32 %result
}
define i64 @mul_i24_s64_divergent(i32 %a, i32 %b) {
; GFX9-LABEL: mul_i24_s64_divergent:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_i32_i24_e32 v2, v0, v1
; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mul_i24_s64_divergent:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_i32_i24_e32 v2, v0, v1
; GFX11-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mov_b32_e32 v0, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i64 @llvm.amdgcn.mul.i24.i64(i32 %a, i32 %b)
ret i64 %result
}
define amdgpu_ps i64 @mul_i24_s64_uniform(i32 inreg %a, i32 inreg %b) {
; GFX9-LABEL: mul_i24_s64_uniform:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, s0, v0
; GFX9-NEXT: v_mul_i32_i24_e32 v0, s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mul_i24_s64_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_i32_i24_e64 v0, s0, s1
; GFX11-NEXT: v_mul_hi_i32_i24_e64 v1, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
; GFX11-NEXT: ; return to shader part epilog
%result = call i64 @llvm.amdgcn.mul.i24.i64(i32 %a, i32 %b)
ret i64 %result
}
define i32 @mulhi_u24_s32_divergent(i32 %a, i32 %b) {
; GFX9-LABEL: mulhi_u24_s32_divergent:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mulhi_u24_s32_divergent:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b)
ret i32 %result
}
define amdgpu_ps i32 @mulhi_u24_s32_uniform(i32 inreg %a, i32 inreg %b) {
; GFX9-LABEL: mulhi_u24_s32_uniform:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mul_hi_u32_u24_e32 v0, s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mulhi_u24_s32_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_hi_u32_u24_e64 v0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
%result = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b)
ret i32 %result
}
define i32 @mulhi_i24_s32_divergent(i32 %a, i32 %b) {
; GFX9-LABEL: mulhi_i24_s32_divergent:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: mulhi_i24_s32_divergent:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b)
ret i32 %result
}
define amdgpu_ps i32 @mulhi_i24_s32_uniform(i32 inreg %a, i32 inreg %b) {
; GFX9-LABEL: mulhi_i24_s32_uniform:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mul_hi_i32_i24_e32 v0, s0, v0
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: mulhi_i24_s32_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mul_hi_i32_i24_e64 v0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
%result = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b)
ret i32 %result
}
declare i32 @llvm.amdgcn.mul.u24.i32(i32, i32)
declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32)
declare i32 @llvm.amdgcn.mul.i24.i32(i32, i32)
declare i64 @llvm.amdgcn.mul.i24.i64(i32, i32)
declare i32 @llvm.amdgcn.mulhi.u24(i32, i32)
declare i32 @llvm.amdgcn.mulhi.i24(i32, i32)