| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s |
| |
| declare void @llvm.set.rounding(i32) |
| declare i32 @llvm.get.rounding() |
| |
| define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) { |
| ; GFX678-LABEL: s_set_rounding: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX678-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX678-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX9-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX10-NEXT: s_min_u32 s36, s4, s34 |
| ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_add_i32 s0, s4, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s4, s0 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { |
| ; GFX6-LABEL: s_set_rounding_kernel: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_load_dword s2, s[4:5], 0x9 |
| ; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX6-NEXT: ;;#ASMSTART |
| ; GFX6-NEXT: ;;#ASMEND |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: s_add_i32 s3, s2, -4 |
| ; GFX6-NEXT: s_min_u32 s2, s2, s3 |
| ; GFX6-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX6-NEXT: s_endpgm |
| ; |
| ; GFX7-LABEL: s_set_rounding_kernel: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9 |
| ; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_add_i32 s3, s2, -4 |
| ; GFX7-NEXT: s_min_u32 s2, s2, s3 |
| ; GFX7-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX8-LABEL: s_set_rounding_kernel: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24 |
| ; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_add_i32 s3, s2, -4 |
| ; GFX8-NEXT: s_min_u32 s2, s2, s3 |
| ; GFX8-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX8-NEXT: s_endpgm |
| ; |
| ; GFX9-LABEL: s_set_rounding_kernel: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x24 |
| ; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX9-NEXT: ;;#ASMSTART |
| ; GFX9-NEXT: ;;#ASMEND |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_add_i32 s3, s2, -4 |
| ; GFX9-NEXT: s_min_u32 s2, s2, s3 |
| ; GFX9-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: s_set_rounding_kernel: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24 |
| ; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX10-NEXT: ;;#ASMSTART |
| ; GFX10-NEXT: ;;#ASMEND |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: s_add_i32 s3, s2, -4 |
| ; GFX10-NEXT: s_min_u32 s2, s2, s3 |
| ; GFX10-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_set_rounding_kernel: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_add_i32 s3, s2, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s2, s3 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_endpgm |
| call void @llvm.set.rounding(i32 %rounding) |
| call void asm sideeffect "",""() |
| ret void |
| } |
| |
| define void @v_set_rounding(i32 %rounding) { |
| ; GFX6-LABEL: v_set_rounding: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0 |
| ; GFX6-NEXT: v_min_u32_e32 v0, v0, v1 |
| ; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 |
| ; GFX6-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: v_set_rounding: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0 |
| ; GFX7-NEXT: v_min_u32_e32 v0, v0, v1 |
| ; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 |
| ; GFX7-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v_set_rounding: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0 |
| ; GFX8-NEXT: v_min_u32_e32 v0, v0, v1 |
| ; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] |
| ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_set_rounding: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v1, -4, v0 |
| ; GFX9-NEXT: v_min_u32_e32 v0, v0, v1 |
| ; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] |
| ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_set_rounding: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0 |
| ; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX10-NEXT: v_min_u32_e32 v0, v0, v1 |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] |
| ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_set_rounding: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: v_min_u32_e32 v0, v0, v1 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define void @set_rounding_get_rounding() { |
| ; GFX678-LABEL: set_rounding_get_rounding: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX678-NEXT: s_lshl_b32 s6, s4, 2 |
| ; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71 |
| ; GFX678-NEXT: s_mov_b32 s5, 0xc96f385 |
| ; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 |
| ; GFX678-NEXT: s_and_b32 s4, s4, 15 |
| ; GFX678-NEXT: s_add_i32 s5, s4, 4 |
| ; GFX678-NEXT: s_cmp_lt_u32 s4, 4 |
| ; GFX678-NEXT: s_cselect_b32 s4, s4, s5 |
| ; GFX678-NEXT: s_add_i32 s5, s4, -4 |
| ; GFX678-NEXT: s_min_u32 s4, s4, s5 |
| ; GFX678-NEXT: s_lshl_b32 s6, s4, 2 |
| ; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: set_rounding_get_rounding: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX9-NEXT: s_lshl_b32 s6, s4, 2 |
| ; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71 |
| ; GFX9-NEXT: s_mov_b32 s5, 0xc96f385 |
| ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 |
| ; GFX9-NEXT: s_and_b32 s4, s4, 15 |
| ; GFX9-NEXT: s_add_i32 s5, s4, 4 |
| ; GFX9-NEXT: s_cmp_lt_u32 s4, 4 |
| ; GFX9-NEXT: s_cselect_b32 s4, s4, s5 |
| ; GFX9-NEXT: s_add_i32 s5, s4, -4 |
| ; GFX9-NEXT: s_min_u32 s4, s4, s5 |
| ; GFX9-NEXT: s_lshl_b32 s6, s4, 2 |
| ; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: set_rounding_get_rounding: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71 |
| ; GFX10-NEXT: s_mov_b32 s5, 0xc96f385 |
| ; GFX10-NEXT: s_lshl_b32 s6, s6, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 |
| ; GFX10-NEXT: s_and_b32 s4, s4, 15 |
| ; GFX10-NEXT: s_add_i32 s5, s4, 4 |
| ; GFX10-NEXT: s_cmp_lt_u32 s4, 4 |
| ; GFX10-NEXT: s_cselect_b32 s4, s4, s5 |
| ; GFX10-NEXT: s_add_i32 s5, s4, -4 |
| ; GFX10-NEXT: s_min_u32 s6, s4, s5 |
| ; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s6, s6, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: set_rounding_get_rounding: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX11-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX11-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX11-NEXT: s_cselect_b32 s0, s0, s1 |
| ; GFX11-NEXT: s_add_i32 s1, s0, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s0, s1 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %rounding = call i32 @llvm.get.rounding() |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define void @s_set_rounding_0() { |
| ; GFX678-LABEL: s_set_rounding_0: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_0: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xf |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 0) |
| ret void |
| } |
| |
| define void @s_set_rounding_1() { |
| ; GFX678-LABEL: s_set_rounding_1: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_1: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x0 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 1) |
| ret void |
| } |
| |
| define void @s_set_rounding_2() { |
| ; GFX678-LABEL: s_set_rounding_2: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_2: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_2: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x5 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 2) |
| ret void |
| } |
| |
| define void @s_set_rounding_3() { |
| ; GFX678-LABEL: s_set_rounding_3: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_3: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xa |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 3) |
| ret void |
| } |
| |
| ; Unsupported mode. |
| define void @s_set_rounding_4() { |
| ; GFX678-LABEL: s_set_rounding_4: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_4: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_4: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xf |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 4) |
| ret void |
| } |
| |
| ; undefined |
| define void @s_set_rounding_5() { |
| ; GFX678-LABEL: s_set_rounding_5: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_5: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x0 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 5) |
| ret void |
| } |
| |
| ; undefined |
| define void @s_set_rounding_6() { |
| ; GFX678-LABEL: s_set_rounding_6: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_6: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_6: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x5 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 6) |
| ret void |
| } |
| |
| ; "Dynamic" |
| define void @s_set_rounding_7() { |
| ; GFX678-LABEL: s_set_rounding_7: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_7: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_7: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xa |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 7) |
| ret void |
| } |
| |
| ; Invalid |
| define void @s_set_rounding_neg1() { |
| ; GFX678-LABEL: s_set_rounding_neg1: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_neg1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_neg1: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xb |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 -1) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; Test extended values |
| ; -------------------------------------------------------------------- |
| |
| ; NearestTiesToEvenF32_TowardPositiveF64 = 8 |
| define void @s_set_rounding_8() { |
| ; GFX678-LABEL: s_set_rounding_8: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_8: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_8: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x4 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 8) |
| ret void |
| } |
| |
| ; NearestTiesToEvenF32_TowardNegativeF64 = 9 |
| define void @s_set_rounding_9() { |
| ; GFX678-LABEL: s_set_rounding_9: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_9: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_9: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x8 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 9) |
| ret void |
| } |
| |
| ; NearestTiesToEvenF32_TowardZeroF64 = 10 |
| define void @s_set_rounding_10() { |
| ; GFX678-LABEL: s_set_rounding_10: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_10: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_10: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xc |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 10) |
| ret void |
| } |
| |
| ; TowardPositiveF32_NearestTiesToEvenF64 = 11 |
| define void @s_set_rounding_11() { |
| ; GFX678-LABEL: s_set_rounding_11: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_11: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_11: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x1 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 11) |
| ret void |
| } |
| |
| ; TowardPositiveF32_TowardNegativeF64 = 12 |
| define void @s_set_rounding_12() { |
| ; GFX678-LABEL: s_set_rounding_12: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_12: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_12: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x9 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 12) |
| ret void |
| } |
| |
| ; TowardPositiveF32_TowardZeroF64 = 13 |
| define void @s_set_rounding_13() { |
| ; GFX678-LABEL: s_set_rounding_13: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_13: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_13: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xd |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 13) |
| ret void |
| } |
| |
| ; TowardNegativeF32_NearestTiesToEvenF64 = 14 |
| define void @s_set_rounding_14() { |
| ; GFX678-LABEL: s_set_rounding_14: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_14: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_14: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x2 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 14) |
| ret void |
| } |
| |
| ; TowardNegativeF32_TowardPositiveF64 = 15 |
| define void @s_set_rounding_15() { |
| ; GFX678-LABEL: s_set_rounding_15: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_15: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_15: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x6 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 15) |
| ret void |
| } |
| |
| |
| ; TowardNegativeF32_TowardZeroF64 = 16 |
| define void @s_set_rounding_16() { |
| ; GFX678-LABEL: s_set_rounding_16: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_16: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xe |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 16) |
| ret void |
| } |
| |
| ; TowardZeroF32_NearestTiesToEvenF64 = 17 |
| define void @s_set_rounding_17() { |
| ; GFX678-LABEL: s_set_rounding_17: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_17: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_17: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x3 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 17) |
| ret void |
| } |
| |
| ; TowardZeroF32_TowardPositiveF64 = 18 |
| define void @s_set_rounding_18() { |
| ; GFX678-LABEL: s_set_rounding_18: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_18: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_18: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0x7 |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 18) |
| ret void |
| } |
| |
| ; TowardZeroF32_TowardNegativeF64 = 19, |
| define void @s_set_rounding_19() { |
| ; GFX678-LABEL: s_set_rounding_19: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_19: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_19: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xb |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 19) |
| ret void |
| } |
| |
| ; Invalid, out of bounds |
| define void @s_set_rounding_20() { |
| ; GFX678-LABEL: s_set_rounding_20: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_20: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_20: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xb |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 20) |
| ret void |
| } |
| |
| define void @s_set_rounding_0xffff() { |
| ; GFX678-LABEL: s_set_rounding_0xffff: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_0xffff: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1011-LABEL: s_set_rounding_0xffff: |
| ; GFX1011: ; %bb.0: |
| ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX1011-NEXT: s_round_mode 0xb |
| ; GFX1011-NEXT: s_setpc_b64 s[30:31] |
| call void @llvm.set.rounding(i32 65535) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; Test optimization knowing the value can only be in the standard |
| ; range |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) { |
| ; GFX6-LABEL: s_set_rounding_i2_zeroext: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_lshl_b32 s34, s4, 2 |
| ; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: s_set_rounding_i2_zeroext: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_lshl_b32 s34, s4, 2 |
| ; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: s_set_rounding_i2_zeroext: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 |
| ; GFX8-NEXT: s_lshl_b32 s34, s34, 2 |
| ; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_i2_zeroext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 |
| ; GFX9-NEXT: s_lshl_b32 s34, s34, 2 |
| ; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_i2_zeroext: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 |
| ; GFX10-NEXT: s_lshl_b32 s34, s34, 2 |
| ; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_i2_zeroext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 |
| ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 |
| ; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %zext.rounding = zext i2 %rounding to i32 |
| call void @llvm.set.rounding(i32 %zext.rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) { |
| ; GFX6-LABEL: s_set_rounding_i2_signext: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX6-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX6-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: s_set_rounding_i2_signext: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX7-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX7-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: s_set_rounding_i2_signext: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_sext_i32_i16 s34, s4 |
| ; GFX8-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX8-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX8-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_i2_signext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_sext_i32_i16 s34, s4 |
| ; GFX9-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX9-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_i2_signext: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_sext_i32_i16 s34, s4 |
| ; GFX10-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX10-NEXT: s_min_u32 s36, s34, s35 |
| ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_i2_signext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_sext_i32_i16 s0, s4 |
| ; GFX11-NEXT: s_add_i32 s1, s0, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s0, s1 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %sext.rounding = sext i2 %rounding to i32 |
| call void @llvm.set.rounding(i32 %sext.rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) { |
| ; GFX6-LABEL: s_set_rounding_i3_signext: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX6-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX6-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: s_set_rounding_i3_signext: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX7-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX7-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: s_set_rounding_i3_signext: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_sext_i32_i16 s34, s4 |
| ; GFX8-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX8-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX8-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_i3_signext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_sext_i32_i16 s34, s4 |
| ; GFX9-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX9-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_i3_signext: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_sext_i32_i16 s34, s4 |
| ; GFX10-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX10-NEXT: s_min_u32 s36, s34, s35 |
| ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_i3_signext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_sext_i32_i16 s0, s4 |
| ; GFX11-NEXT: s_add_i32 s1, s0, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s0, s1 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %sext.rounding = sext i3 %rounding to i32 |
| call void @llvm.set.rounding(i32 %sext.rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) { |
| ; GFX6-LABEL: s_set_rounding_i3_zeroext: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX6-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX6-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: s_set_rounding_i3_zeroext: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_add_i32 s34, s4, -4 |
| ; GFX7-NEXT: s_min_u32 s34, s4, s34 |
| ; GFX7-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: s_set_rounding_i3_zeroext: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 |
| ; GFX8-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX8-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX8-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_i3_zeroext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 |
| ; GFX9-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX9-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_i3_zeroext: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 |
| ; GFX10-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX10-NEXT: s_min_u32 s36, s34, s35 |
| ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_i3_zeroext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 |
| ; GFX11-NEXT: s_add_i32 s1, s0, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s0, s1 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %sext.rounding = zext i3 %rounding to i32 |
| call void @llvm.set.rounding(i32 %sext.rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) { |
| ; GFX6-LABEL: s_set_rounding_select_0_1: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0 |
| ; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] |
| ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 |
| ; GFX6-NEXT: v_readfirstlane_b32 s34, v0 |
| ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: s_set_rounding_select_0_1: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0 |
| ; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] |
| ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 |
| ; GFX7-NEXT: v_readfirstlane_b32 s34, v0 |
| ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: s_set_rounding_select_0_1: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0 |
| ; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] |
| ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX8-NEXT: s_mov_b32 s34, 0xa50f |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34 |
| ; GFX8-NEXT: v_readfirstlane_b32 s34, v0 |
| ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_0_1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 |
| ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] |
| ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX9-NEXT: s_mov_b32 s34, 0xa50f |
| ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34 |
| ; GFX9-NEXT: v_readfirstlane_b32 s34, v0 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_0_1: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX10-NEXT: s_cselect_b32 s34, -1, 0 |
| ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 |
| ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f |
| ; GFX10-NEXT: v_readfirstlane_b32 s34, v0 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_0_1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_lg_u32 s4, 0 |
| ; GFX11-NEXT: s_cselect_b32 s0, -1, 0 |
| ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 0, i32 1 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_1_3: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_1_3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_1_3: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_1_3: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 1, i32 3 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define void @v_set_rounding_select_1_3(i32 %cond) { |
| ; GFX678-LABEL: v_set_rounding_select_1_3: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50 |
| ; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc |
| ; GFX678-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_set_rounding_select_1_3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50 |
| ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc |
| ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_set_rounding_select_1_3: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo |
| ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_set_rounding_select_1_3: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 1, i32 3 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_2_0: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_movk_i32 s34, 0xa5 |
| ; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50f |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_2_0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_movk_i32 s34, 0xa5 |
| ; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50f |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_2_0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_movk_i32 s34, 0xa5 |
| ; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50f |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_2_0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_movk_i32 s0, 0xa5 |
| ; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50f |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 2, i32 0 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_2_1: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_movk_i32 s34, 0xa5 |
| ; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_2_1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_movk_i32 s34, 0xa5 |
| ; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_2_1: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_movk_i32 s34, 0xa5 |
| ; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_2_1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_movk_i32 s0, 0xa5 |
| ; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 2, i32 1 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_1_2: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_movk_i32 s34, 0xa50 |
| ; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_1_2: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_movk_i32 s34, 0xa50 |
| ; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_1_2: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_movk_i32 s34, 0xa50 |
| ; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_1_2: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_movk_i32 s0, 0xa50 |
| ; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 1, i32 2 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_3_0: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_3_0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_3_0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_3_0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 3, i32 0 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_4_0: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_cselect_b32 s34, 4, 0 |
| ; GFX678-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX678-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX678-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_4_0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_cselect_b32 s34, 4, 0 |
| ; GFX9-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX9-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_4_0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_cselect_b32 s34, 4, 0 |
| ; GFX10-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX10-NEXT: s_min_u32 s36, s34, s35 |
| ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_4_0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_cselect_b32 s0, 4, 0 |
| ; GFX11-NEXT: s_add_i32 s1, s0, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s0, s1 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 4, i32 0 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { |
| ; GFX678-LABEL: s_set_rounding_select_3_5: |
| ; GFX678: ; %bb.0: |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX678-NEXT: s_cselect_b32 s34, 3, 5 |
| ; GFX678-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX678-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX678-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: s_set_rounding_select_3_5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX9-NEXT: s_cselect_b32 s34, 3, 5 |
| ; GFX9-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX9-NEXT: s_min_u32 s34, s34, s35 |
| ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 |
| ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: s_set_rounding_select_3_5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX10-NEXT: s_cselect_b32 s34, 3, 5 |
| ; GFX10-NEXT: s_add_i32 s35, s34, -4 |
| ; GFX10-NEXT: s_min_u32 s36, s34, s35 |
| ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f |
| ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 |
| ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 |
| ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 |
| ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: s_set_rounding_select_3_5: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 |
| ; GFX11-NEXT: s_cselect_b32 s0, 3, 5 |
| ; GFX11-NEXT: s_add_i32 s1, s0, -4 |
| ; GFX11-NEXT: s_min_u32 s2, s0, s1 |
| ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f |
| ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %cmp = icmp eq i32 %cond, 0 |
| %rounding = select i1 %cmp, i32 3, i32 5 |
| call void @llvm.set.rounding(i32 %rounding) |
| ret void |
| } |
| |
| define amdgpu_kernel void @get_rounding_after_set_rounding_1() { |
| ; GFX6-LABEL: get_rounding_after_set_rounding_1: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX6-NEXT: s_mov_b32 s3, 0xf000 |
| ; GFX6-NEXT: s_nop 0 |
| ; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX6-NEXT: s_lshl_b32 s2, s0, 2 |
| ; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX6-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX6-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX6-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX6-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX6-NEXT: s_cselect_b32 s4, s0, s1 |
| ; GFX6-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX6-NEXT: s_mov_b32 s2, -1 |
| ; GFX6-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: s_endpgm |
| ; |
| ; GFX7-LABEL: get_rounding_after_set_rounding_1: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX7-NEXT: s_mov_b32 s3, 0xf000 |
| ; GFX7-NEXT: s_nop 0 |
| ; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX7-NEXT: s_lshl_b32 s2, s0, 2 |
| ; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX7-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX7-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX7-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX7-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX7-NEXT: s_cselect_b32 s4, s0, s1 |
| ; GFX7-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX7-NEXT: s_mov_b32 s2, -1 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_endpgm |
| ; |
| ; GFX8-LABEL: get_rounding_after_set_rounding_1: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX8-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX8-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX8-NEXT: s_lshl_b32 s2, s0, 2 |
| ; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX8-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX8-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX8-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX8-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX8-NEXT: s_cselect_b32 s0, s0, s1 |
| ; GFX8-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX8-NEXT: flat_store_dword v[0:1], v2 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_endpgm |
| ; |
| ; GFX9-LABEL: get_rounding_after_set_rounding_1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX9-NEXT: s_lshl_b32 s2, s0, 2 |
| ; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX9-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX9-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX9-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX9-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX9-NEXT: s_cselect_b32 s0, s0, s1 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: get_rounding_after_set_rounding_1: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_round_mode 0x0 |
| ; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX10-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX10-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX10-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX10-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX10-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX10-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX10-NEXT: s_cselect_b32 s0, s0, s1 |
| ; GFX10-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX10-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: get_rounding_after_set_rounding_1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_round_mode 0x0 |
| ; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 |
| ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) |
| ; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 |
| ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 |
| ; GFX11-NEXT: s_and_b32 s0, s0, 15 |
| ; GFX11-NEXT: s_add_i32 s1, s0, 4 |
| ; GFX11-NEXT: s_cmp_lt_u32 s0, 4 |
| ; GFX11-NEXT: s_cselect_b32 s0, s0, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_endpgm |
| tail call void @llvm.set.rounding(i32 1) |
| %set.mode = tail call i32 @llvm.get.rounding() |
| store volatile i32 %set.mode, ptr addrspace(1) null |
| ret void |
| } |
| |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GCN: {{.*}} |