blob: 6a9c4c8d41c202bffaa6036887e47ba642eea707 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
declare void @llvm.set.rounding(i32)
declare i32 @llvm.get.rounding()
define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
; GFX678-LABEL: s_set_rounding:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_add_i32 s34, s4, -4
; GFX678-NEXT: s_min_u32 s34, s4, s34
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_add_i32 s34, s4, -4
; GFX9-NEXT: s_min_u32 s34, s4, s34
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_add_i32 s34, s4, -4
; GFX10-NEXT: s_min_u32 s34, s4, s34
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_add_i32 s0, s4, -4
; GFX11-NEXT: s_min_u32 s0, s4, s0
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
; GFX6-LABEL: s_set_rounding_kernel:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9
; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX6-NEXT: ;;#ASMSTART
; GFX6-NEXT: ;;#ASMEND
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s3, s2, -4
; GFX6-NEXT: s_min_u32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s2, s2, 2
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: s_set_rounding_kernel:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dword s2, s[0:1], 0x9
; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s3, s2, -4
; GFX7-NEXT: s_min_u32 s2, s2, s3
; GFX7-NEXT: s_lshl_b32 s2, s2, 2
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: s_set_rounding_kernel:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dword s2, s[0:1], 0x24
; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_add_i32 s3, s2, -4
; GFX8-NEXT: s_min_u32 s2, s2, s3
; GFX8-NEXT: s_lshl_b32 s2, s2, 2
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_set_rounding_kernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24
; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_add_i32 s3, s2, -4
; GFX9-NEXT: s_min_u32 s2, s2, s3
; GFX9-NEXT: s_lshl_b32 s2, s2, 2
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: s_set_rounding_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_add_i32 s1, s0, -4
; GFX10-NEXT: s_min_u32 s2, s0, s1
; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_set_rounding_kernel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x24
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_endpgm
call void @llvm.set.rounding(i32 %rounding)
call void asm sideeffect "",""()
ret void
}
define void @v_set_rounding(i32 %rounding) {
; GFX6-LABEL: v_set_rounding:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0
; GFX6-NEXT: v_min_u32_e32 v0, v0, v1
; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
; GFX6-NEXT: v_readfirstlane_b32 s4, v0
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_set_rounding:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0
; GFX7-NEXT: v_min_u32_e32 v0, v0, v1
; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0
; GFX7-NEXT: v_readfirstlane_b32 s4, v0
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_set_rounding:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0
; GFX8-NEXT: v_min_u32_e32 v0, v0, v1
; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_set_rounding:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -4, v0
; GFX9-NEXT: v_min_u32_e32 v0, v0, v1
; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_set_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX10-NEXT: v_min_u32_e32 v0, v0, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5]
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_set_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: v_min_u32_e32 v0, v0, v1
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define void @set_rounding_get_rounding() {
; GFX678-LABEL: set_rounding_get_rounding:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
; GFX678-NEXT: s_lshl_b32 s6, s4, 2
; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX678-NEXT: s_mov_b32 s5, 0xc96f385
; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX678-NEXT: s_and_b32 s4, s4, 15
; GFX678-NEXT: s_add_i32 s5, s4, 4
; GFX678-NEXT: s_cmp_lt_u32 s4, 4
; GFX678-NEXT: s_cselect_b32 s4, s4, s5
; GFX678-NEXT: s_add_i32 s5, s4, -4
; GFX678-NEXT: s_min_u32 s4, s4, s5
; GFX678-NEXT: s_lshl_b32 s6, s4, 2
; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: set_rounding_get_rounding:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
; GFX9-NEXT: s_lshl_b32 s6, s4, 2
; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX9-NEXT: s_mov_b32 s5, 0xc96f385
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX9-NEXT: s_and_b32 s4, s4, 15
; GFX9-NEXT: s_add_i32 s5, s4, 4
; GFX9-NEXT: s_cmp_lt_u32 s4, 4
; GFX9-NEXT: s_cselect_b32 s4, s4, s5
; GFX9-NEXT: s_add_i32 s5, s4, -4
; GFX9-NEXT: s_min_u32 s4, s4, s5
; GFX9-NEXT: s_lshl_b32 s6, s4, 2
; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: set_rounding_get_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX10-NEXT: s_mov_b32 s5, 0xc96f385
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX10-NEXT: s_and_b32 s4, s4, 15
; GFX10-NEXT: s_add_i32 s5, s4, 4
; GFX10-NEXT: s_cmp_lt_u32 s4, 4
; GFX10-NEXT: s_cselect_b32 s4, s4, s5
; GFX10-NEXT: s_add_i32 s5, s4, -4
; GFX10-NEXT: s_min_u32 s4, s4, s5
; GFX10-NEXT: s_lshl_b32 s6, s4, 2
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: set_rounding_get_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: s_add_i32 s1, s0, 4
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s0, s0, s1
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%rounding = call i32 @llvm.get.rounding()
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define void @s_set_rounding_0() {
; GFX678-LABEL: s_set_rounding_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_0:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xf
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 0)
ret void
}
define void @s_set_rounding_1() {
; GFX678-LABEL: s_set_rounding_1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_1:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x0
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 1)
ret void
}
define void @s_set_rounding_2() {
; GFX678-LABEL: s_set_rounding_2:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_2:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x5
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 2)
ret void
}
define void @s_set_rounding_3() {
; GFX678-LABEL: s_set_rounding_3:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_3:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xa
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 3)
ret void
}
; Unsupported mode.
define void @s_set_rounding_4() {
; GFX678-LABEL: s_set_rounding_4:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_4:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xf
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 4)
ret void
}
; undefined
define void @s_set_rounding_5() {
; GFX678-LABEL: s_set_rounding_5:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_5:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x0
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 5)
ret void
}
; undefined
define void @s_set_rounding_6() {
; GFX678-LABEL: s_set_rounding_6:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_6:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x5
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 6)
ret void
}
; "Dynamic"
define void @s_set_rounding_7() {
; GFX678-LABEL: s_set_rounding_7:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_7:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xa
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 7)
ret void
}
; Invalid
define void @s_set_rounding_neg1() {
; GFX678-LABEL: s_set_rounding_neg1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_neg1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_neg1:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 -1)
ret void
}
; --------------------------------------------------------------------
; Test extended values
; --------------------------------------------------------------------
; NearestTiesToEvenF32_TowardPositiveF64 = 8
define void @s_set_rounding_8() {
; GFX678-LABEL: s_set_rounding_8:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_8:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x4
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 8)
ret void
}
; NearestTiesToEvenF32_TowardNegativeF64 = 9
define void @s_set_rounding_9() {
; GFX678-LABEL: s_set_rounding_9:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_9:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x8
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 9)
ret void
}
; NearestTiesToEvenF32_TowardZeroF64 = 10
define void @s_set_rounding_10() {
; GFX678-LABEL: s_set_rounding_10:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_10:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_10:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xc
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 10)
ret void
}
; TowardPositiveF32_NearestTiesToEvenF64 = 11
define void @s_set_rounding_11() {
; GFX678-LABEL: s_set_rounding_11:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_11:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_11:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x1
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 11)
ret void
}
; TowardPositiveF32_TowardNegativeF64 = 12
define void @s_set_rounding_12() {
; GFX678-LABEL: s_set_rounding_12:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_12:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_12:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x9
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 12)
ret void
}
; TowardPositiveF32_TowardZeroF64 = 13
define void @s_set_rounding_13() {
; GFX678-LABEL: s_set_rounding_13:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_13:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_13:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xd
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 13)
ret void
}
; TowardNegativeF32_NearestTiesToEvenF64 = 14
define void @s_set_rounding_14() {
; GFX678-LABEL: s_set_rounding_14:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_14:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_14:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x2
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 14)
ret void
}
; TowardNegativeF32_TowardPositiveF64 = 15
define void @s_set_rounding_15() {
; GFX678-LABEL: s_set_rounding_15:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_15:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_15:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x6
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 15)
ret void
}
; TowardNegativeF32_TowardZeroF64 = 16
define void @s_set_rounding_16() {
; GFX678-LABEL: s_set_rounding_16:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_16:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xe
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 16)
ret void
}
; TowardZeroF32_NearestTiesToEvenF64 = 17
define void @s_set_rounding_17() {
; GFX678-LABEL: s_set_rounding_17:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_17:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_17:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x3
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 17)
ret void
}
; TowardZeroF32_TowardPositiveF64 = 18
define void @s_set_rounding_18() {
; GFX678-LABEL: s_set_rounding_18:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_18:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_18:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0x7
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 18)
ret void
}
; TowardZeroF32_TowardNegativeF64 = 19,
define void @s_set_rounding_19() {
; GFX678-LABEL: s_set_rounding_19:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_19:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_19:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 19)
ret void
}
; Invalid, out of bounds
define void @s_set_rounding_20() {
; GFX678-LABEL: s_set_rounding_20:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_20:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_20:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 20)
ret void
}
define void @s_set_rounding_0xffff() {
; GFX678-LABEL: s_set_rounding_0xffff:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_0xffff:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX1011-LABEL: s_set_rounding_0xffff:
; GFX1011: ; %bb.0:
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1011-NEXT: s_round_mode 0xb
; GFX1011-NEXT: s_setpc_b64 s[30:31]
call void @llvm.set.rounding(i32 65535)
ret void
}
; --------------------------------------------------------------------
; Test optimization knowing the value can only be in the standard
; range
; --------------------------------------------------------------------
define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i2_zeroext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_lshl_b32 s34, s4, 2
; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i2_zeroext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_lshl_b32 s34, s4, 2
; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i2_zeroext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s34, s34, 2
; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i2_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
; GFX9-NEXT: s_lshl_b32 s34, s34, 2
; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i2_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i2_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%zext.rounding = zext i2 %rounding to i32
call void @llvm.set.rounding(i32 %zext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i2_signext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s34, s4, -4
; GFX6-NEXT: s_min_u32 s34, s4, s34
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i2_signext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s34, s4, -4
; GFX7-NEXT: s_min_u32 s34, s4, s34
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i2_signext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_sext_i32_i16 s34, s4
; GFX8-NEXT: s_add_i32 s35, s34, -4
; GFX8-NEXT: s_min_u32 s34, s34, s35
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i2_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_sext_i32_i16 s34, s4
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i2_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_sext_i32_i16 s34, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s34, s34, s35
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i2_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s0, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s0, s0, s1
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sext.rounding = sext i2 %rounding to i32
call void @llvm.set.rounding(i32 %sext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i3_signext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s34, s4, -4
; GFX6-NEXT: s_min_u32 s34, s4, s34
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i3_signext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s34, s4, -4
; GFX7-NEXT: s_min_u32 s34, s4, s34
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i3_signext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_sext_i32_i16 s34, s4
; GFX8-NEXT: s_add_i32 s35, s34, -4
; GFX8-NEXT: s_min_u32 s34, s34, s35
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i3_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_sext_i32_i16 s34, s4
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i3_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_sext_i32_i16 s34, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s34, s34, s35
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i3_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s0, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s0, s0, s1
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sext.rounding = sext i3 %rounding to i32
call void @llvm.set.rounding(i32 %sext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
; GFX6-LABEL: s_set_rounding_i3_zeroext:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_add_i32 s34, s4, -4
; GFX6-NEXT: s_min_u32 s34, s4, s34
; GFX6-NEXT: s_lshl_b32 s36, s34, 2
; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_i3_zeroext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_add_i32 s34, s4, -4
; GFX7-NEXT: s_min_u32 s34, s4, s34
; GFX7-NEXT: s_lshl_b32 s36, s34, 2
; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_i3_zeroext:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_and_b32 s34, 0xffff, s4
; GFX8-NEXT: s_add_i32 s35, s34, -4
; GFX8-NEXT: s_min_u32 s34, s34, s35
; GFX8-NEXT: s_lshl_b32 s36, s34, 2
; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_i3_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s34, 0xffff, s4
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_i3_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s34, s34, s35
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_i3_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s0, s0, s1
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sext.rounding = zext i3 %rounding to i32
call void @llvm.set.rounding(i32 %sext.rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) {
; GFX6-LABEL: s_set_rounding_select_0_1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_cmp_lg_u32 s4, 0
; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
; GFX6-NEXT: v_readfirstlane_b32 s34, v0
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_select_0_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_cmp_lg_u32 s4, 0
; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
; GFX7-NEXT: v_readfirstlane_b32 s34, v0
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_select_0_1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_cmp_lg_u32 s4, 0
; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_mov_b32 s34, 0xa50f
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
; GFX8-NEXT: v_readfirstlane_b32 s34, v0
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_0_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_lg_u32 s4, 0
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-NEXT: s_mov_b32 s34, 0xa50f
; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_0_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_lg_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_0_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_lg_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 0, i32 1
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_1_3:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_1_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_1_3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_1_3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 1, i32 3
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define void @v_set_rounding_select_1_3(i32 %cond) {
; GFX678-LABEL: v_set_rounding_select_1_3:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50
; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
; GFX678-NEXT: v_readfirstlane_b32 s4, v0
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_set_rounding_select_1_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_set_rounding_select_1_3:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_set_rounding_select_1_3:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 1, i32 3
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) {
; GFX6-LABEL: s_set_rounding_select_2_0:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_cmp_eq_u32 s4, 0
; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
; GFX6-NEXT: v_readfirstlane_b32 s34, v0
; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: s_set_rounding_select_2_0:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_cmp_eq_u32 s4, 0
; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0
; GFX7-NEXT: v_readfirstlane_b32 s34, v0
; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: s_set_rounding_select_2_0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_cmp_eq_u32 s4, 0
; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX8-NEXT: s_mov_b32 s34, 0xa50f
; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34
; GFX8-NEXT: v_readfirstlane_b32 s34, v0
; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_2_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX9-NEXT: s_mov_b32 s34, 0xa50f
; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_2_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_2_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 2, i32 0
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_2_1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa5
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_2_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa5
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_2_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa5
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_2_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa5
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 2, i32 1
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_1_2:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_movk_i32 s34, 0xa50
; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_1_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_movk_i32 s34, 0xa50
; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_1_2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_movk_i32 s34, 0xa50
; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_1_2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_movk_i32 s0, 0xa50
; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 1, i32 2
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_3_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_3_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_3_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_3_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 3, i32 0
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_4_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX678-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX678-NEXT: v_readfirstlane_b32 s34, v0
; GFX678-NEXT: s_lshl_b32 s34, s34, 2
; GFX678-NEXT: s_add_i32 s35, s34, -4
; GFX678-NEXT: s_min_u32 s34, s34, s35
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_4_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35]
; GFX9-NEXT: v_readfirstlane_b32 s34, v0
; GFX9-NEXT: s_lshl_b32 s34, s34, 2
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_4_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, -1, 0
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s34, s34, s35
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_4_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s0, s0, s1
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 4, i32 0
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
; GFX678-LABEL: s_set_rounding_select_3_5:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_cmp_eq_u32 s4, 0
; GFX678-NEXT: s_cselect_b32 s34, 3, 5
; GFX678-NEXT: s_add_i32 s35, s34, -4
; GFX678-NEXT: s_min_u32 s34, s34, s35
; GFX678-NEXT: s_lshl_b32 s36, s34, 2
; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: s_set_rounding_select_3_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b32 s34, 3, 5
; GFX9-NEXT: s_add_i32 s35, s34, -4
; GFX9-NEXT: s_min_u32 s34, s34, s35
; GFX9-NEXT: s_lshl_b32 s36, s34, 2
; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: s_set_rounding_select_3_5:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 3, 5
; GFX10-NEXT: s_add_i32 s35, s34, -4
; GFX10-NEXT: s_min_u32 s34, s34, s35
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: s_set_rounding_select_3_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 3, 5
; GFX11-NEXT: s_add_i32 s1, s0, -4
; GFX11-NEXT: s_min_u32 s0, s0, s1
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %cond, 0
%rounding = select i1 %cmp, i32 3, i32 5
call void @llvm.set.rounding(i32 %rounding)
ret void
}
define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
; GFX6-LABEL: get_rounding_after_set_rounding_1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_nop 0
; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX6-NEXT: s_lshl_b32 s2, s0, 2
; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX6-NEXT: s_mov_b32 s1, 0xc96f385
; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX6-NEXT: s_and_b32 s0, s0, 15
; GFX6-NEXT: s_add_i32 s1, s0, 4
; GFX6-NEXT: s_cmp_lt_u32 s0, 4
; GFX6-NEXT: s_cselect_b32 s4, s0, s1
; GFX6-NEXT: s_mov_b32 s0, 0
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s1, s0
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: get_rounding_after_set_rounding_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_nop 0
; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX7-NEXT: s_lshl_b32 s2, s0, 2
; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX7-NEXT: s_mov_b32 s1, 0xc96f385
; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX7-NEXT: s_and_b32 s0, s0, 15
; GFX7-NEXT: s_add_i32 s1, s0, 4
; GFX7-NEXT: s_cmp_lt_u32 s0, 4
; GFX7-NEXT: s_cselect_b32 s4, s0, s1
; GFX7-NEXT: s_mov_b32 s0, 0
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b32 s1, s0
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: get_rounding_after_set_rounding_1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX8-NEXT: s_lshl_b32 s2, s0, 2
; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX8-NEXT: s_mov_b32 s1, 0xc96f385
; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX8-NEXT: s_and_b32 s0, s0, 15
; GFX8-NEXT: s_add_i32 s1, s0, 4
; GFX8-NEXT: s_cmp_lt_u32 s0, 4
; GFX8-NEXT: s_cselect_b32 s0, s0, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: get_rounding_after_set_rounding_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX9-NEXT: s_lshl_b32 s2, s0, 2
; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX9-NEXT: s_mov_b32 s1, 0xc96f385
; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX9-NEXT: s_and_b32 s0, s0, 15
; GFX9-NEXT: s_add_i32 s1, s0, 4
; GFX9-NEXT: s_cmp_lt_u32 s0, 4
; GFX9-NEXT: s_cselect_b32 s0, s0, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: get_rounding_after_set_rounding_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_lshl_b32 s2, s0, 2
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX10-NEXT: s_and_b32 s0, s0, 15
; GFX10-NEXT: s_add_i32 s1, s0, 4
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
; GFX10-NEXT: s_cselect_b32 s0, s0, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: get_rounding_after_set_rounding_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_round_mode 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: s_add_i32 s1, s0, 4
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
tail call void @llvm.set.rounding(i32 1)
%set.mode = tail call i32 @llvm.get.rounding()
store volatile i32 %set.mode, ptr addrspace(1) null
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}