| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -global-isel -new-reg-bank-select -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX678 %s |
| ; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX9 %s |
| ; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx1010 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX10 %s |
| ; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx1100 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX11 %s |
| |
| declare i64 @llvm.smax.i64(i64, i64) |
| declare i64 @llvm.smin.i64(i64, i64) |
| |
| define i16 @v_clamp_i64_i16(i64 %in) #0 { |
| ; GFX678-LABEL: v_clamp_i64_i16: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_cvt_pk_i16_i32_e32 v0, v0, v1 |
| ; GFX678-NEXT: v_mov_b32_e32 v1, 0xffff8000 |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fff |
| ; GFX678-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_clamp_i64_i16: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff8000 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff |
| ; GFX9-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_clamp_i64_i16: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, 0x7fff |
| ; GFX10-NEXT: v_med3_i32 v0, 0xffff8000, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_clamp_i64_i16: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0x7fff |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_med3_i32 v0, 0xffff8000, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768) |
| %min = call i64 @llvm.smin.i64(i64 %max, i64 32767) |
| %result = trunc i64 %min to i16 |
| ret i16 %result |
| } |
| |
| define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 { |
| ; GFX678-LABEL: v_clamp_i64_i16_reverse: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_cvt_pk_i16_i32_e32 v0, v0, v1 |
| ; GFX678-NEXT: v_mov_b32_e32 v1, 0xffff8000 |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fff |
| ; GFX678-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_clamp_i64_i16_reverse: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff8000 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff |
| ; GFX9-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_clamp_i64_i16_reverse: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, 0x7fff |
| ; GFX10-NEXT: v_med3_i32 v0, 0xffff8000, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_clamp_i64_i16_reverse: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0x7fff |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_med3_i32 v0, 0xffff8000, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %min = call i64 @llvm.smin.i64(i64 %in, i64 32767) |
| %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768) |
| %result = trunc i64 %max to i16 |
| ret i16 %result |
| } |
| |
| define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 { |
| ; GFX678-LABEL: v_clamp_i64_i16_invalid_lower: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x8001 |
| ; GFX678-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX678-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX678-NEXT: v_mov_b32_e32 v4, 0x8001 |
| ; GFX678-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX678-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0xffff8000 |
| ; GFX678-NEXT: v_mov_b32_e32 v3, -1 |
| ; GFX678-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX678-NEXT: v_mov_b32_e32 v4, 0xffff8000 |
| ; GFX678-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_clamp_i64_i16_invalid_lower: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x8001 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0x8001 |
| ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff8000 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, -1 |
| ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff8000 |
| ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_clamp_i64_i16_invalid_lower: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0x8001, v[0:1] |
| ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff8000 |
| ; GFX10-NEXT: v_mov_b32_e32 v3, -1 |
| ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x8001, v0, vcc_lo |
| ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo |
| ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v0, vcc_lo |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_clamp_i64_i16_invalid_lower: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0x8001, v[0:1] |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0xffff8000 :: v_dual_mov_b32 v3, -1 |
| ; GFX11-NEXT: v_dual_cndmask_b32 v0, 0x8001, v0 :: v_dual_cndmask_b32 v1, 0, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xffff8000, v0, vcc_lo |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %min = call i64 @llvm.smin.i64(i64 %in, i64 32769) |
| %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768) |
| %result = trunc i64 %max to i16 |
| ret i16 %result |
| } |
| |
| define i16 @v_clamp_i64_i16_invalid_lower_and_higher(i64 %in) #0 { |
| ; GFX678-LABEL: v_clamp_i64_i16_invalid_lower_and_higher: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0xffff7fff |
| ; GFX678-NEXT: v_mov_b32_e32 v3, -1 |
| ; GFX678-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX678-NEXT: v_mov_b32_e32 v4, 0xffff7fff |
| ; GFX678-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX678-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x8000 |
| ; GFX678-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX678-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX678-NEXT: v_mov_b32_e32 v4, 0x8000 |
| ; GFX678-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_clamp_i64_i16_invalid_lower_and_higher: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff7fff |
| ; GFX9-NEXT: v_mov_b32_e32 v3, -1 |
| ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff7fff |
| ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX9-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[0:1], v[2:3] |
| ; GFX9-NEXT: v_mov_b32_e32 v4, 0x8000 |
| ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_clamp_i64_i16_invalid_lower_and_higher: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff7fff |
| ; GFX10-NEXT: v_mov_b32_e32 v3, -1 |
| ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0xffff7fff, v0, vcc_lo |
| ; GFX10-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc_lo |
| ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0x8000, v[0:1] |
| ; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_clamp_i64_i16_invalid_lower_and_higher: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0xffff7fff :: v_dual_mov_b32 v3, -1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] |
| ; GFX11-NEXT: v_dual_cndmask_b32 v0, 0xffff7fff, v0 :: v_dual_cndmask_b32 v1, -1, v1 |
| ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0x8000, v[0:1] |
| ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %max = call i64 @llvm.smax.i64(i64 %in, i64 -32769) |
| %min = call i64 @llvm.smin.i64(i64 %max, i64 32768) |
| %result = trunc i64 %min to i16 |
| ret i16 %result |
| } |
| |
| define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 { |
| ; GFX678-LABEL: v_clamp_i64_i16_lower_than_short: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_cvt_pk_i16_i32_e32 v0, v0, v1 |
| ; GFX678-NEXT: v_mov_b32_e32 v1, 0xffffff01 |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x100 |
| ; GFX678-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_clamp_i64_i16_lower_than_short: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffff01 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x100 |
| ; GFX9-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_clamp_i64_i16_lower_than_short: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, 0x100 |
| ; GFX10-NEXT: v_med3_i32 v0, 0xffffff01, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_clamp_i64_i16_lower_than_short: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0x100 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_med3_i32 v0, 0xffffff01, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %min = call i64 @llvm.smin.i64(i64 %in, i64 256) |
| %max = call i64 @llvm.smax.i64(i64 %min, i64 -255) |
| %result = trunc i64 %max to i16 |
| ret i16 %result |
| } |
| |
| define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 { |
| ; GFX678-LABEL: v_clamp_i64_i16_lower_than_short_reverse: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_cvt_pk_i16_i32_e32 v0, v0, v1 |
| ; GFX678-NEXT: v_mov_b32_e32 v1, 0xffffff01 |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x100 |
| ; GFX678-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v_clamp_i64_i16_lower_than_short_reverse: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffff01 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x100 |
| ; GFX9-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: v_clamp_i64_i16_lower_than_short_reverse: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, 0x100 |
| ; GFX10-NEXT: v_med3_i32 v0, 0xffffff01, v0, v1 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_clamp_i64_i16_lower_than_short_reverse: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cvt_pk_i16_i32 v0, v0, v1 |
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0x100 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_med3_i32 v0, 0xffffff01, v0, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %max = call i64 @llvm.smax.i64(i64 %in, i64 -255) |
| %min = call i64 @llvm.smin.i64(i64 %max, i64 256) |
| %result = trunc i64 %min to i16 |
| ret i16 %result |
| } |
| |
| define i16 @v_clamp_i64_i16_zero(i64 %in) #0 { |
| ; GCN-LABEL: v_clamp_i64_i16_zero: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %max = call i64 @llvm.smax.i64(i64 %in, i64 0) |
| %min = call i64 @llvm.smin.i64(i64 %max, i64 0) |
| %result = trunc i64 %min to i16 |
| ret i16 %result |
| } |
| |
| define i16 @clamp_i64_i16_uniform(i64 inreg %in) #0 { |
| ; GFX678-LABEL: clamp_i64_i16_uniform: |
| ; GFX678: ; %bb.0: ; %entry |
| ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX678-NEXT: v_mov_b32_e32 v0, s17 |
| ; GFX678-NEXT: v_cvt_pk_i16_i32_e32 v0, s16, v0 |
| ; GFX678-NEXT: v_mov_b32_e32 v1, 0xffff8000 |
| ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fff |
| ; GFX678-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX678-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX678-NEXT: s_add_i32 s4, s4, s4 |
| ; GFX678-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX678-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: clamp_i64_i16_uniform: |
| ; GFX9: ; %bb.0: ; %entry |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s17 |
| ; GFX9-NEXT: v_cvt_pk_i16_i32 v0, s16, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff8000 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff |
| ; GFX9-NEXT: v_med3_i32 v0, v1, v0, v2 |
| ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX9-NEXT: s_add_i32 s4, s4, s4 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: clamp_i64_i16_uniform: |
| ; GFX10: ; %bb.0: ; %entry |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: v_cvt_pk_i16_i32 v0, s16, s17 |
| ; GFX10-NEXT: v_mov_b32_e32 v1, 0x7fff |
| ; GFX10-NEXT: v_med3_i32 v0, 0xffff8000, v0, v1 |
| ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX10-NEXT: s_add_i32 s4, s4, s4 |
| ; GFX10-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: clamp_i64_i16_uniform: |
| ; GFX11: ; %bb.0: ; %entry |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_cvt_pk_i16_i32 v0, s0, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0x7fff |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX11-NEXT: v_med3_i32 v0, 0xffff8000, v0, v1 |
| ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 |
| ; GFX11-NEXT: s_add_i32 s0, s0, s0 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768) |
| %min = call i64 @llvm.smin.i64(i64 %max, i64 32767) |
| %result = trunc i64 %min to i16 |
| %ret = add i16 %result, %result |
| ret i16 %ret |
| } |