blob: 52259c4c2e6e124796abe817b763b4623609d64f [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; Reduce a 64-bit add by a constant if we know the low 32-bits are all
; zero.
; add i64:x, K if computeTrailingZeros(K) >= 32
; => build_pair (add x.hi, K.hi), x.lo
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) {
; GFX9-LABEL: s_add_i64_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 0x40000
; GFX9-NEXT: ; return to shader part epilog
%add = add i64 %reg, 1125899906842624 ; (1 << 50)
ret i64 %add
}
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) {
; GFX9-LABEL: s_add_i64_const_low_bits_known0_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 1
; GFX9-NEXT: ; return to shader part epilog
%add = add i64 %reg, 4294967296 ; (1 << 32)
ret i64 %add
}
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) {
; GFX9-LABEL: s_add_i64_const_low_bits_known0_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 2
; GFX9-NEXT: ; return to shader part epilog
%add = add i64 %reg, 8589934592 ; (1 << 33)
ret i64 %add
}
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) {
; GFX9-LABEL: s_add_i64_const_low_bits_known0_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 0x80000000
; GFX9-NEXT: ; return to shader part epilog
%add = add i64 %reg, -9223372036854775808 ; (1 << 63)
ret i64 %add
}
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) {
; GFX9-LABEL: s_add_i64_const_low_bits_known0_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, -1
; GFX9-NEXT: ; return to shader part epilog
%add = add i64 %reg, -4294967296 ; 0xffffffff00000000
ret i64 %add
}
define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) {
; GFX9-LABEL: v_add_i64_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 0x40000, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add i64 %reg, 1125899906842624 ; (1 << 50)
ret i64 %add
}
define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) {
; GFX9-LABEL: v_add_i64_const_low_bits_known0_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add i64 %reg, 4294967296 ; (1 << 32)
ret i64 %add
}
define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) {
; GFX9-LABEL: v_add_i64_const_low_bits_known0_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 2, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add i64 %reg, 8589934592 ; (1 << 33)
ret i64 %add
}
define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) {
; GFX9-LABEL: v_add_i64_const_low_bits_known0_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 0x80000000, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add i64 %reg, -9223372036854775808 ; (1 << 63)
ret i64 %add
}
define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) {
; GFX9-LABEL: v_add_i64_const_low_bits_known0_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add i64 %reg, -4294967296 ; 0xffffffff00000000
ret i64 %add
}
define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) {
; GFX9-LABEL: s_add_i64_const_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_u32 s0, s0, -1
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: ; return to shader part epilog
%add = add i64 %reg, 4294967295 ; (1 << 31)
ret i64 %add
}
define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) {
; GFX9-LABEL: v_add_i64_const_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add i64 %reg, 4294967295 ; (1 << 31)
ret i64 %add
}
define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
; GFX9-NEXT: v_add_u32_e32 v3, 1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
ret <2 x i64> %add
}
define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
; GFX9-NEXT: v_add_u32_e32 v3, 2, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
ret <2 x i64> %add
}
define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 1
; GFX9-NEXT: s_add_i32 s3, s3, 1
; GFX9-NEXT: ; return to shader part epilog
%add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
ret <2 x i64> %add
}
define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 1
; GFX9-NEXT: s_add_i32 s3, s3, 2
; GFX9-NEXT: ; return to shader part epilog
%add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
ret <2 x i64> %add
}
; We could reduce this to use a 32-bit add if we use computeKnownBits
define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
%in.high.bits = shl i64 %zext.offset.hi32, 32
%add = add i64 %reg, %in.high.bits
ret i64 %add
}
; We could reduce this to use a 32-bit add if we use computeKnownBits
define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_u32 s0, s0, 0
; GFX9-NEXT: s_addc_u32 s1, s1, s2
; GFX9-NEXT: ; return to shader part epilog
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
%in.high.bits = shl i64 %zext.offset.hi32, 32
%add = add i64 %reg, %in.high.bits
ret i64 %add
}