| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-SDAG,SI-SDAG %s |
| ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,GCN-GISEL,SI-GISEL %s |
| ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-SDAG,VI-SDAG %s |
| ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,GCN-GISEL,VI-GISEL %s |
| ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-SDAG,GFX900-SDAG %s |
| ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX900,GCN-GISEL,GFX900-GISEL %s |
| |
| ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s |
| ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s |
| |
| define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { |
| ; SI-LABEL: s_exp2_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[0:1], 0xb |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_exp_f32_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_exp2_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_exp_f32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: flat_store_dword v[0:1], v2 |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX900-LABEL: s_exp2_f32: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_load_dword s4, s[0:1], 0x2c |
| ; GFX900-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX900-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-NEXT: v_exp_f32_e32 v0, s4 |
| ; GFX900-NEXT: global_store_dword v1, v0, s[2:3] |
| ; GFX900-NEXT: s_endpgm |
| ; |
| ; R600-LABEL: s_exp2_f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] |
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; R600-NEXT: ALU clause starting at 4: |
| ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, |
| ; R600-NEXT: EXP_IEEE * T1.X, KC0[2].Z, |
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_exp2_f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: EXP_IEEE T1.X, KC0[2].Z, |
| ; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[2].Z, |
| ; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[2].Z, |
| ; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[2].Z, |
| %result = call float @llvm.exp2.f32(float %in) |
| store float %result, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; FIXME: We should be able to merge these packets together on Cayman so we |
| ; have a maximum of 4 instructions. |
| define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) { |
| ; SI-SDAG-LABEL: s_exp2_v2f32: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; SI-SDAG-NEXT: s_mov_b32 s6, -1 |
| ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, s3 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, s2 |
| ; SI-SDAG-NEXT: s_mov_b32 s4, s0 |
| ; SI-SDAG-NEXT: s_mov_b32 s5, s1 |
| ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; SI-SDAG-NEXT: s_endpgm |
| ; |
| ; SI-GISEL-LABEL: s_exp2_v2f32: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, s2 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, s3 |
| ; SI-GISEL-NEXT: s_mov_b32 s2, -1 |
| ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-GISEL-NEXT: s_endpgm |
| ; |
| ; VI-SDAG-LABEL: s_exp2_v2f32: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v1, s3 |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v0, s2 |
| ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 |
| ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 |
| ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-SDAG-NEXT: s_endpgm |
| ; |
| ; VI-GISEL-LABEL: s_exp2_v2f32: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v0, s2 |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v1, s3 |
| ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 |
| ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 |
| ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX900-SDAG-LABEL: s_exp2_v2f32: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, s3 |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, s2 |
| ; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
| ; GFX900-SDAG-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: s_exp2_v2f32: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, s2 |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, s3 |
| ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
| ; GFX900-GISEL-NEXT: s_endpgm |
| ; |
| ; R600-LABEL: s_exp2_v2f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] |
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; R600-NEXT: ALU clause starting at 4: |
| ; R600-NEXT: EXP_IEEE * T0.Y, KC0[3].X, |
| ; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, |
| ; R600-NEXT: EXP_IEEE * T0.X, KC0[2].W, |
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_exp2_v2f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[3].X, |
| ; CM-NEXT: EXP_IEEE T1.Y, KC0[3].X, |
| ; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[3].X, |
| ; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].X, |
| ; CM-NEXT: EXP_IEEE T1.X, KC0[2].W, |
| ; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[2].W, |
| ; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[2].W, |
| ; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[2].W, |
| %result = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in) |
| store <2 x float> %result, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) { |
| ; SI-SDAG-LABEL: s_exp2_v3f32: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd |
| ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v2, s6 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, s5 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, s4 |
| ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 |
| ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-SDAG-NEXT: s_endpgm |
| ; |
| ; SI-GISEL-LABEL: s_exp2_v3f32: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd |
| ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-GISEL-NEXT: s_mov_b32 s2, -1 |
| ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, s4 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, s5 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v2, s6 |
| ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 |
| ; SI-GISEL-NEXT: s_endpgm |
| ; |
| ; VI-SDAG-LABEL: s_exp2_v3f32: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v2, s6 |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v1, s5 |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v0, s4 |
| ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s1 |
| ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] |
| ; VI-SDAG-NEXT: s_endpgm |
| ; |
| ; VI-GISEL-LABEL: s_exp2_v3f32: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v0, s4 |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v1, s5 |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v2, s6 |
| ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s1 |
| ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] |
| ; VI-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX900-SDAG-LABEL: s_exp2_v3f32: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, s6 |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, s5 |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, s4 |
| ; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] |
| ; GFX900-SDAG-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: s_exp2_v3f32: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, s4 |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, s5 |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, s6 |
| ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] |
| ; GFX900-GISEL-NEXT: s_endpgm |
| ; |
| ; R600-LABEL: s_exp2_v3f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] |
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.X, T2.X, 0 |
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: ALU clause starting at 4: |
| ; R600-NEXT: EXP_IEEE * T0.Y, KC0[3].Z, |
| ; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, |
| ; R600-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, |
| ; R600-NEXT: EXP_IEEE * T0.X, KC0[3].Y, |
| ; R600-NEXT: 2(2.802597e-45), 8(1.121039e-44) |
| ; R600-NEXT: LSHR T2.X, PV.W, literal.x, |
| ; R600-NEXT: EXP_IEEE * T3.X, KC0[3].W, |
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_exp2_v3f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T1.X |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, |
| ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T0.X, PV.W, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: EXP_IEEE T2.X, KC0[3].W, |
| ; CM-NEXT: EXP_IEEE T2.Y (MASKED), KC0[3].W, |
| ; CM-NEXT: EXP_IEEE T2.Z (MASKED), KC0[3].W, |
| ; CM-NEXT: EXP_IEEE * T2.W (MASKED), KC0[3].W, |
| ; CM-NEXT: EXP_IEEE T3.X (MASKED), KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE T3.Y, KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE T3.Z (MASKED), KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE * T3.W (MASKED), KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE T3.X, KC0[3].Y, |
| ; CM-NEXT: EXP_IEEE T3.Y (MASKED), KC0[3].Y, |
| ; CM-NEXT: EXP_IEEE T3.Z (MASKED), KC0[3].Y, |
| ; CM-NEXT: EXP_IEEE * T3.W (MASKED), KC0[3].Y, |
| %result = call <3 x float> @llvm.exp2.v3f32(<3 x float> %in) |
| store <3 x float> %result, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; FIXME: We should be able to merge these packets together on Cayman so we |
| ; have a maximum of 4 instructions. |
| define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) { |
| ; SI-SDAG-LABEL: s_exp2_v4f32: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd |
| ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-SDAG-NEXT: s_mov_b32 s2, -1 |
| ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v3, s7 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v2, s6 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, s5 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, s4 |
| ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 |
| ; SI-SDAG-NEXT: s_endpgm |
| ; |
| ; SI-GISEL-LABEL: s_exp2_v4f32: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd |
| ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 |
| ; SI-GISEL-NEXT: s_mov_b32 s2, -1 |
| ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, s4 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, s5 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v2, s6 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v3, s7 |
| ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 |
| ; SI-GISEL-NEXT: s_endpgm |
| ; |
| ; VI-SDAG-LABEL: s_exp2_v4f32: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v3, s7 |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v2, s6 |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v1, s5 |
| ; VI-SDAG-NEXT: v_exp_f32_e32 v0, s4 |
| ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s1 |
| ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s0 |
| ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] |
| ; VI-SDAG-NEXT: s_endpgm |
| ; |
| ; VI-GISEL-LABEL: s_exp2_v4f32: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 |
| ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v0, s4 |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v1, s5 |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v2, s6 |
| ; VI-GISEL-NEXT: v_exp_f32_e32 v3, s7 |
| ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s1 |
| ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s0 |
| ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] |
| ; VI-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX900-SDAG-LABEL: s_exp2_v4f32: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| ; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, s7 |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, s6 |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, s5 |
| ; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, s4 |
| ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] |
| ; GFX900-SDAG-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: s_exp2_v4f32: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 |
| ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
| ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, s4 |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, s5 |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, s6 |
| ; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, s7 |
| ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] |
| ; GFX900-GISEL-NEXT: s_endpgm |
| ; |
| ; R600-LABEL: s_exp2_v4f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] |
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; R600-NEXT: ALU clause starting at 4: |
| ; R600-NEXT: EXP_IEEE * T0.W, KC0[4].X, |
| ; R600-NEXT: EXP_IEEE * T0.Z, KC0[3].W, |
| ; R600-NEXT: EXP_IEEE * T0.Y, KC0[3].Z, |
| ; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, |
| ; R600-NEXT: EXP_IEEE * T0.X, KC0[3].Y, |
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; |
| ; CM-LABEL: s_exp2_v4f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[] |
| ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; CM-NEXT: ALU clause starting at 4: |
| ; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, |
| ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) |
| ; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[4].X, |
| ; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[4].X, |
| ; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[4].X, |
| ; CM-NEXT: EXP_IEEE * T1.W, KC0[4].X, |
| ; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[3].W, |
| ; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[3].W, |
| ; CM-NEXT: EXP_IEEE T1.Z, KC0[3].W, |
| ; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].W, |
| ; CM-NEXT: EXP_IEEE T1.X (MASKED), KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE T1.Y, KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].Z, |
| ; CM-NEXT: EXP_IEEE T1.X, KC0[3].Y, |
| ; CM-NEXT: EXP_IEEE T1.Y (MASKED), KC0[3].Y, |
| ; CM-NEXT: EXP_IEEE T1.Z (MASKED), KC0[3].Y, |
| ; CM-NEXT: EXP_IEEE * T1.W (MASKED), KC0[3].Y, |
| %result = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in) |
| store <4 x float> %result, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define float @v_exp2_f32(float %in) { |
| ; GCN-LABEL: v_exp2_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_fabs_f32(float %in) { |
| ; GCN-LABEL: v_exp2_fabs_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e64 v0, |v0| |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fabs_f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fabs_f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fabs = call float @llvm.fabs.f32(float %in) |
| %result = call float @llvm.exp2.f32(float %fabs) |
| ret float %result |
| } |
| |
| define float @v_exp2_fneg_fabs_f32(float %in) { |
| ; GCN-LABEL: v_exp2_fneg_fabs_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e64 v0, -|v0| |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fneg_fabs_f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fneg_fabs_f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fabs = call float @llvm.fabs.f32(float %in) |
| %fneg.fabs = fneg float %fabs |
| %result = call float @llvm.exp2.f32(float %fneg.fabs) |
| ret float %result |
| } |
| |
| define float @v_exp2_fneg_f32(float %in) { |
| ; GCN-LABEL: v_exp2_fneg_f32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e64 v0, -v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fneg_f32: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fneg_f32: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fneg = fneg float %in |
| %result = call float @llvm.exp2.f32(float %fneg) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_fast(float %in) { |
| ; GCN-LABEL: v_exp2_f32_fast: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_fast: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_fast: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call fast float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { |
| ; GCN-LABEL: v_exp2_f32_unsafe_math_attr: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_unsafe_math_attr: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_unsafe_math_attr: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { |
| ; GCN-LABEL: v_exp2_f32_approx_fn_attr: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_approx_fn_attr: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_approx_fn_attr: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_ninf(float %in) { |
| ; GCN-LABEL: v_exp2_f32_ninf: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_ninf: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_ninf: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call ninf float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_afn(float %in) { |
| ; GCN-LABEL: v_exp2_f32_afn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_afn: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_afn: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call afn float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_afn_daz(float %in) #0 { |
| ; GCN-LABEL: v_exp2_f32_afn_daz: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_afn_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_afn_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call afn float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_afn_dynamic(float %in) #1 { |
| ; GCN-LABEL: v_exp2_f32_afn_dynamic: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_afn_dynamic: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_afn_dynamic: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call afn float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_fabs_exp2_f32_afn(float %in) { |
| ; GCN-LABEL: v_fabs_exp2_f32_afn: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e64 v0, |v0| |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_fabs_exp2_f32_afn: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_fabs_exp2_f32_afn: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fabs = call float @llvm.fabs.f32(float %in) |
| %result = call afn float @llvm.exp2.f32(float %fabs) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_daz(float %in) #0 { |
| ; GCN-LABEL: v_exp2_f32_daz: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_nnan(float %in) { |
| ; GCN-LABEL: v_exp2_f32_nnan: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_nnan: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_nnan: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call nnan float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_nnan_daz(float %in) #0 { |
| ; GCN-LABEL: v_exp2_f32_nnan_daz: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_nnan_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_nnan_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call nnan float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_nnan_dynamic(float %in) #1 { |
| ; GCN-LABEL: v_exp2_f32_nnan_dynamic: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_nnan_dynamic: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_nnan_dynamic: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call nnan float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_ninf_daz(float %in) #0 { |
| ; GCN-LABEL: v_exp2_f32_ninf_daz: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_ninf_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_ninf_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call ninf float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_ninf_dynamic(float %in) #1 { |
| ; GCN-LABEL: v_exp2_f32_ninf_dynamic: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_ninf_dynamic: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_ninf_dynamic: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call ninf float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_nnan_ninf(float %in) { |
| ; GCN-LABEL: v_exp2_f32_nnan_ninf: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_nnan_ninf: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_nnan_ninf: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call nnan ninf float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_nnan_ninf_daz(float %in) #0 { |
| ; GCN-LABEL: v_exp2_f32_nnan_ninf_daz: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_nnan_ninf_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_nnan_ninf_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call nnan ninf float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 { |
| ; GCN-LABEL: v_exp2_f32_nnan_ninf_dynamic: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_nnan_ninf_dynamic: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_nnan_ninf_dynamic: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call nnan ninf float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_fast_daz(float %in) #0 { |
| ; GCN-LABEL: v_exp2_f32_fast_daz: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_fast_daz: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_fast_daz: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call fast float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_dynamic_mode(float %in) #1 { |
| ; GCN-LABEL: v_exp2_f32_dynamic_mode: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_dynamic_mode: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_dynamic_mode: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call float @llvm.exp2.f32(float %in) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_undef() { |
| ; GCN-LABEL: v_exp2_f32_undef: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_exp_f32_e32 v0, s4 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_undef: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_undef: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call float @llvm.exp2.f32(float undef) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_0() { |
| ; GCN-LABEL: v_exp2_f32_0: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_mov_b32_e32 v0, 1.0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_0: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_0: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-SDAG-LABEL: v_exp2_f32_0: |
| ; GFX89-SDAG: ; %bb.0: |
| ; GFX89-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-SDAG-NEXT: v_log_f32_e32 v0, 0 |
| ; GFX89-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; GFX89-GISEL-LABEL: v_exp2_f32_0: |
| ; GFX89-GISEL: ; %bb.0: |
| ; GFX89-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 |
| ; GFX89-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %result = call float @llvm.exp2.f32(float 0.0) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_from_fpext_f16(i16 %src.i) { |
| ; GCN-LABEL: v_exp2_f32_from_fpext_f16: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GCN-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_from_fpext_f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_from_fpext_f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %src = bitcast i16 %src.i to half |
| %fpext = fpext half %src to float |
| %result = call float @llvm.exp2.f32(float %fpext) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { |
| ; SI-SDAG-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_f16_e32 v0, v0, v1 |
| ; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; VI-NEXT: v_exp_f32_e32 v0, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_add_f16_e32 v0, v0, v1 |
| ; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX900-NEXT: v_exp_f32_e32 v0, v0 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-LABEL: v_exp2_f32_from_fpext_math_f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_add_f16_e32 v0, v0, v1 |
| ; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX89-NEXT: v_log_f32_e32 v0, v0 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| %src0 = bitcast i16 %src0.i to half |
| %src1 = bitcast i16 %src1.i to half |
| %fadd = fadd half %src0, %src1 |
| %fpext = fpext half %fadd to float |
| %result = call float @llvm.exp2.f32(float %fpext) |
| ret float %result |
| } |
| |
| define float @v_exp2_f32_from_fpext_bf16(bfloat %src) { |
| ; GCN-SDAG-LABEL: v_exp2_f32_from_fpext_bf16: |
| ; GCN-SDAG: ; %bb.0: |
| ; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GCN-GISEL-LABEL: v_exp2_f32_from_fpext_bf16: |
| ; GCN-GISEL: ; %bb.0: |
| ; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f32_from_fpext_bf16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f32_from_fpext_bf16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-SDAG-LABEL: v_exp2_f32_from_fpext_bf16: |
| ; GFX89-SDAG: ; %bb.0: |
| ; GFX89-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-SDAG-NEXT: v_log_f32_e32 v0, v0 |
| ; GFX89-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; GFX89-GISEL-LABEL: v_exp2_f32_from_fpext_bf16: |
| ; GFX89-GISEL: ; %bb.0: |
| ; GFX89-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; GFX89-GISEL-NEXT: v_log_f32_e32 v0, v0 |
| ; GFX89-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %fpext = fpext bfloat %src to float |
| %result = call float @llvm.exp2.f32(float %fpext) |
| ret float %result |
| } |
| |
| ; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets |
| define half @v_exp2_f16(half %in) { |
| ; SI-SDAG-LABEL: v_exp2_f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_exp2_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_exp_f16_e32 v0, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: v_exp2_f16: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_exp_f16_e32 v0, v0 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-LABEL: v_exp2_f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_log_f16_e32 v0, v0 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| %result = call half @llvm.exp2.f16(half %in) |
| ret half %result |
| } |
| |
| define half @v_exp2_fabs_f16(half %in) { |
| ; SI-SDAG-LABEL: v_exp2_fabs_f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_fabs_f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_exp2_fabs_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_exp_f16_e64 v0, |v0| |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: v_exp2_fabs_f16: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_exp_f16_e64 v0, |v0| |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fabs_f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fabs_f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-LABEL: v_exp2_fabs_f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_log_f16_e64 v0, |v0| |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| %fabs = call half @llvm.fabs.f16(half %in) |
| %result = call half @llvm.exp2.f16(half %fabs) |
| ret half %result |
| } |
| |
| define half @v_exp2_fneg_fabs_f16(half %in) { |
| ; SI-SDAG-LABEL: v_exp2_fneg_fabs_f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_fneg_fabs_f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_exp2_fneg_fabs_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_exp_f16_e64 v0, -|v0| |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: v_exp2_fneg_fabs_f16: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_exp_f16_e64 v0, -|v0| |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fneg_fabs_f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fneg_fabs_f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-LABEL: v_exp2_fneg_fabs_f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_log_f16_e64 v0, -|v0| |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| %fabs = call half @llvm.fabs.f16(half %in) |
| %fneg.fabs = fneg half %fabs |
| %result = call half @llvm.exp2.f16(half %fneg.fabs) |
| ret half %result |
| } |
| |
| define half @v_exp2_fneg_f16(half %in) { |
| ; SI-SDAG-LABEL: v_exp2_fneg_f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_fneg_f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_exp2_fneg_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_exp_f16_e64 v0, -v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: v_exp2_fneg_f16: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_exp_f16_e64 v0, -v0 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fneg_f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fneg_f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-LABEL: v_exp2_fneg_f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_log_f16_e64 v0, -v0 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| %fneg = fneg half %in |
| %result = call half @llvm.exp2.f16(half %fneg) |
| ret half %result |
| } |
| |
| define half @v_exp2_f16_fast(half %in) { |
| ; SI-SDAG-LABEL: v_exp2_f16_fast: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_f16_fast: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_exp2_f16_fast: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_exp_f16_e32 v0, v0 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: v_exp2_f16_fast: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: v_exp_f16_e32 v0, v0 |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_f16_fast: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_f16_fast: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| ; GFX89-LABEL: v_exp2_f16_fast: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_log_f16_e32 v0, v0 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| %result = call fast half @llvm.exp2.f16(half %in) |
| ret half %result |
| } |
| |
| define <2 x half> @v_exp2_v2f16(<2 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp2_v2f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_v2f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp2_v2f16: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp2_v2f16: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp2_v2f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp2_v2f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_v2f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_v2f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %in) |
| ret <2 x half> %result |
| } |
| |
| define <2 x half> @v_exp2_fabs_v2f16(<2 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp2_fabs_v2f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_fabs_v2f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp2_fabs_v2f16: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, |v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e64 v0, |v0| |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp2_fabs_v2f16: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp2_fabs_v2f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e64 v0, |v0| |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp2_fabs_v2f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fabs_v2f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fabs_v2f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) |
| %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %fabs) |
| ret <2 x half> %result |
| } |
| |
| define <2 x half> @v_exp2_fneg_fabs_v2f16(<2 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, -|v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e64 v0, -|v0| |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e64 v0, -|v0| |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fneg_fabs_v2f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) |
| %fneg.fabs = fneg <2 x half> %fabs |
| %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %fneg.fabs) |
| ret <2 x half> %result |
| } |
| |
| define <2 x half> @v_exp2_fneg_v2f16(<2 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp2_fneg_v2f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_fneg_v2f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp2_fneg_v2f16: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e64 v0, -v0 |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp2_fneg_v2f16: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp2_fneg_v2f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e64 v0, -v0 |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp2_fneg_v2f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_fneg_v2f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_fneg_v2f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %fneg = fneg <2 x half> %in |
| %result = call <2 x half> @llvm.exp2.v2f16(<2 x half> %fneg) |
| ret <2 x half> %result |
| } |
| |
| define <2 x half> @v_exp2_v2f16_fast(<2 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp2_v2f16_fast: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_v2f16_fast: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp2_v2f16_fast: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp2_v2f16_fast: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp2_v2f16_fast: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp2_v2f16_fast: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_v2f16_fast: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_v2f16_fast: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %in) |
| ret <2 x half> %result |
| } |
| |
| define <3 x half> @v_exp_v3f16(<3 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp_v3f16: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp_v3f16: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp_v3f16: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1 |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp_v3f16: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v2, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp_v3f16: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp_v3f16: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp_v3f16: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp_v3f16: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call <3 x half> @llvm.exp2.v3f16(<3 x half> %in) |
| ret <3 x half> %result |
| } |
| |
| define <3 x half> @v_exp2_v3f16_afn(<3 x half> %in) { |
| ; SI-SDAG-LABEL: v_exp2_v3f16_afn: |
| ; SI-SDAG: ; %bb.0: |
| ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 |
| ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; SI-GISEL-LABEL: v_exp2_v3f16_afn: |
| ; SI-GISEL: ; %bb.0: |
| ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-SDAG-LABEL: v_exp2_v3f16_afn: |
| ; VI-SDAG: ; %bb.0: |
| ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1 |
| ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-GISEL-LABEL: v_exp2_v3f16_afn: |
| ; VI-GISEL: ; %bb.0: |
| ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v2, v0 |
| ; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 |
| ; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 |
| ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-SDAG-LABEL: v_exp2_v3f16_afn: |
| ; GFX900-SDAG: ; %bb.0: |
| ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-SDAG-NEXT: v_exp_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 |
| ; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 |
| ; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 |
| ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-GISEL-LABEL: v_exp2_v3f16_afn: |
| ; GFX900-GISEL: ; %bb.0: |
| ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v0 |
| ; GFX900-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 |
| ; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 |
| ; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 |
| ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; R600-LABEL: v_exp2_v3f16_afn: |
| ; R600: ; %bb.0: |
| ; R600-NEXT: CF_END |
| ; R600-NEXT: PAD |
| ; |
| ; CM-LABEL: v_exp2_v3f16_afn: |
| ; CM: ; %bb.0: |
| ; CM-NEXT: CF_END |
| ; CM-NEXT: PAD |
| %result = call afn <3 x half> @llvm.exp2.v3f16(<3 x half> %in) |
| ret <3 x half> %result |
| } |
| |
| declare float @llvm.fabs.f32(float) #2 |
| declare float @llvm.exp2.f32(float) #2 |
| declare <2 x float> @llvm.exp2.v2f32(<2 x float>) #2 |
| declare <3 x float> @llvm.exp2.v3f32(<3 x float>) #2 |
| declare <4 x float> @llvm.exp2.v4f32(<4 x float>) #2 |
| declare half @llvm.fabs.f16(half) #2 |
| declare half @llvm.exp2.f16(half) #2 |
| declare <2 x half> @llvm.exp2.v2f16(<2 x half>) #2 |
| declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 |
| declare <3 x half> @llvm.exp2.v3f16(<3 x half>) #2 |
| |
| attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } |
| attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } |
| attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |