| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SIVI,SI %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=SIVI,VI %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s |
| |
| declare double @llvm.copysign.f64(double, double) #0 |
| declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0 |
| declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>) #0 |
| declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) #0 |
| |
| define amdgpu_kernel void @s_test_copysign_f64(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], double %sign) { |
| ; SI-LABEL: s_test_copysign_f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_brev_b32 s4, -2 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s7 |
| ; SI-NEXT: v_mov_b32_e32 v1, s5 |
| ; SI-NEXT: v_bfi_b32 v1, s4, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x74 |
| ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_brev_b32 s2, -2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s1 |
| ; VI-NEXT: v_mov_b32_e32 v1, s3 |
| ; VI-NEXT: v_mov_b32_e32 v2, s4 |
| ; VI-NEXT: v_bfi_b32 v1, s2, v0, v1 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v3, s5 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x74 |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s3, v0 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s2 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double %mag, double %sign) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_0(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SI-LABEL: s_test_copysign_f64_0: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_and_b32 s4, s7, 0x7fffffff |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: v_mov_b32_e32 v1, s4 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_0: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_bitset0_b32 s1, 31 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_bitset0_b32 s1, 31 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double %mag, double 0.0) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_1(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SI-LABEL: s_test_copysign_f64_1: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_and_b32 s4, s7, 0x7fffffff |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: v_mov_b32_e32 v1, s4 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_1: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_bitset0_b32 s1, 31 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_bitset0_b32 s1, 31 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double %mag, double 1.0) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_10(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SI-LABEL: s_test_copysign_f64_10: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_and_b32 s4, s7, 0x7fffffff |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: v_mov_b32_e32 v1, s4 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_10: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_bitset0_b32 s1, 31 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_10: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_bitset0_b32 s1, 31 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double %mag, double 10.0) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_neg1(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SI-LABEL: s_test_copysign_f64_neg1: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_or_b32 s4, s7, 0x80000000 |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: v_mov_b32_e32 v1, s4 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_neg1: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_bitset1_b32 s1, 31 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_neg1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_bitset1_b32 s1, 31 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double %mag, double -1.0) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_neg10(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SI-LABEL: s_test_copysign_f64_neg10: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_or_b32 s4, s7, 0x80000000 |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: v_mov_b32_e32 v1, s4 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_neg10: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_bitset1_b32 s1, 31 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_neg10: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_bitset1_b32 s1, 31 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double %mag, double -10.0) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_f32(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], float %sign) { |
| ; SI-LABEL: s_test_copysign_f64_f32: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13 |
| ; SI-NEXT: s_load_dword s4, s[4:5], 0x1d |
| ; SI-NEXT: s_brev_b32 s5, -2 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mov_b32_e32 v0, s7 |
| ; SI-NEXT: v_mov_b32_e32 v1, s4 |
| ; SI-NEXT: v_bfi_b32 v1, s5, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s6 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dword s6, s[4:5], 0x74 |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_brev_b32 s4, -2 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s1 |
| ; VI-NEXT: v_mov_b32_e32 v1, s6 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_bfi_b32 v1, s4, v0, v1 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x74 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s6 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s1, v0 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %sign.ext = fpext float %sign to double |
| %result = call double @llvm.copysign.f64(double %mag, double %sign.ext) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_f16(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], half %sign) { |
| ; SI-LABEL: s_test_copysign_f64_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dword s2, s[4:5], 0x1d |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x13 |
| ; SI-NEXT: s_brev_b32 s6, -2 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: v_mov_b32_e32 v1, s5 |
| ; SI-NEXT: v_bfi_b32 v1, s6, v1, v0 |
| ; SI-NEXT: v_mov_b32_e32 v0, s4 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dword s6, s[4:5], 0x74 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x4c |
| ; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 |
| ; VI-NEXT: s_brev_b32 s4, -2 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_lshlrev_b32_e64 v0, 16, s6 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v2, s2 |
| ; VI-NEXT: v_bfi_b32 v1, s4, v1, v0 |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: v_mov_b32_e32 v3, s3 |
| ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x74 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c |
| ; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b32_e64 v0, 16, s6 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s1, v0 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] |
| ; GFX11-NEXT: s_endpgm |
| %sign.ext = fpext half %sign to double |
| %result = call double @llvm.copysign.f64(double %mag, double %sign.ext) |
| store double %result, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_0_mag(ptr addrspace(1) %out, double %sign) { |
| ; SI-LABEL: s_test_copysign_f64_0_mag: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| ; SI-NEXT: s_mov_b32 s6, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, s0 |
| ; SI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; SI-NEXT: s_mov_b32 s5, s1 |
| ; SI-NEXT: v_mov_b32_e32 v1, s0 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_0_mag: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v2, 0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_0_mag: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s2, s3, 0x80000000 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 |
| ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double 0.0, double %sign) |
| store double %result, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_1_mag(ptr addrspace(1) %out, double %sign) { |
| ; SI-LABEL: s_test_copysign_f64_1_mag: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| ; SI-NEXT: s_mov_b32 s6, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, s0 |
| ; SI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; SI-NEXT: s_or_b32 s0, s0, 0x3ff00000 |
| ; SI-NEXT: s_mov_b32 s5, s1 |
| ; SI-NEXT: v_mov_b32_e32 v1, s0 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_1_mag: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v2, 0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; VI-NEXT: s_or_b32 s0, s0, 0x3ff00000 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_1_mag: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s2, s3, 0x80000000 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_or_b32 s2, s2, 0x3ff00000 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 |
| ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double 1.0, double %sign) |
| store double %result, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_10_mag(ptr addrspace(1) %out, double %sign) { |
| ; SI-LABEL: s_test_copysign_f64_10_mag: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| ; SI-NEXT: s_mov_b32 s6, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, s0 |
| ; SI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; SI-NEXT: s_or_b32 s0, s0, 0x40240000 |
| ; SI-NEXT: s_mov_b32 s5, s1 |
| ; SI-NEXT: v_mov_b32_e32 v1, s0 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_10_mag: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v2, 0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; VI-NEXT: s_or_b32 s0, s0, 0x40240000 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_10_mag: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s2, s3, 0x80000000 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_or_b32 s2, s2, 0x40240000 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 |
| ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double 10.0, double %sign) |
| store double %result, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_neg1_mag(ptr addrspace(1) %out, double %sign) { |
| ; SI-LABEL: s_test_copysign_f64_neg1_mag: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| ; SI-NEXT: s_mov_b32 s6, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, s0 |
| ; SI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; SI-NEXT: s_or_b32 s0, s0, 0x3ff00000 |
| ; SI-NEXT: s_mov_b32 s5, s1 |
| ; SI-NEXT: v_mov_b32_e32 v1, s0 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_neg1_mag: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v2, 0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; VI-NEXT: s_or_b32 s0, s0, 0x3ff00000 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_neg1_mag: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s2, s3, 0x80000000 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_or_b32 s2, s2, 0x3ff00000 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 |
| ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double -1.0, double %sign) |
| store double %result, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_f64_neg10_mag(ptr addrspace(1) %out, double %sign) { |
| ; SI-LABEL: s_test_copysign_f64_neg10_mag: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s7, 0xf000 |
| ; SI-NEXT: s_mov_b32 s6, -1 |
| ; SI-NEXT: v_mov_b32_e32 v0, 0 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: s_mov_b32 s4, s0 |
| ; SI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; SI-NEXT: s_or_b32 s0, s0, 0x40240000 |
| ; SI-NEXT: s_mov_b32 s5, s1 |
| ; SI-NEXT: v_mov_b32_e32 v1, s0 |
| ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_f64_neg10_mag: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 |
| ; VI-NEXT: v_mov_b32_e32 v2, 0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s0 |
| ; VI-NEXT: s_and_b32 s0, s3, 0x80000000 |
| ; VI-NEXT: s_or_b32 s0, s0, 0x40240000 |
| ; VI-NEXT: v_mov_b32_e32 v1, s1 |
| ; VI-NEXT: v_mov_b32_e32 v3, s0 |
| ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_f64_neg10_mag: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s2, s3, 0x80000000 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_or_b32 s2, s2, 0x40240000 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 |
| ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call double @llvm.copysign.f64(double -10.0, double %sign) |
| store double %result, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_v2f64(ptr addrspace(1) %out, <2 x double> %mag, <2 x double> %sign) { |
| ; SI-LABEL: s_test_copysign_v2f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xd |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_brev_b32 s6, -2 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mov_b32_e32 v0, s11 |
| ; SI-NEXT: v_mov_b32_e32 v1, s15 |
| ; SI-NEXT: v_bfi_b32 v3, s6, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s9 |
| ; SI-NEXT: v_mov_b32_e32 v1, s13 |
| ; SI-NEXT: v_bfi_b32 v1, s6, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s8 |
| ; SI-NEXT: v_mov_b32_e32 v2, s10 |
| ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_v2f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_brev_b32 s2, -2 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s11 |
| ; VI-NEXT: v_mov_b32_e32 v1, s15 |
| ; VI-NEXT: v_mov_b32_e32 v2, s9 |
| ; VI-NEXT: v_bfi_b32 v3, s2, v0, v1 |
| ; VI-NEXT: v_mov_b32_e32 v0, s13 |
| ; VI-NEXT: v_mov_b32_e32 v5, s1 |
| ; VI-NEXT: v_bfi_b32 v1, s2, v2, v0 |
| ; VI-NEXT: v_mov_b32_e32 v0, s8 |
| ; VI-NEXT: v_mov_b32_e32 v2, s10 |
| ; VI-NEXT: v_mov_b32_e32 v4, s0 |
| ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_v2f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s15 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s13 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s8 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) |
| ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, s11, v1 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s9, v2 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s10 |
| ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) |
| store <2 x double> %result, ptr addrspace(1) %out, align 16 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_v3f64(ptr addrspace(1) %out, <3 x double> %mag, <3 x double> %sign) { |
| ; SI-LABEL: s_test_copysign_v3f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x11 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_brev_b32 s6, -2 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mov_b32_e32 v0, s11 |
| ; SI-NEXT: v_mov_b32_e32 v1, s19 |
| ; SI-NEXT: v_bfi_b32 v3, s6, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s9 |
| ; SI-NEXT: v_mov_b32_e32 v1, s17 |
| ; SI-NEXT: v_bfi_b32 v1, s6, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s13 |
| ; SI-NEXT: v_mov_b32_e32 v2, s21 |
| ; SI-NEXT: v_bfi_b32 v5, s6, v0, v2 |
| ; SI-NEXT: v_mov_b32_e32 v4, s12 |
| ; SI-NEXT: v_mov_b32_e32 v0, s8 |
| ; SI-NEXT: v_mov_b32_e32 v2, s10 |
| ; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:16 |
| ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_v3f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x44 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_brev_b32 s2, -2 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s11 |
| ; VI-NEXT: v_mov_b32_e32 v1, s19 |
| ; VI-NEXT: v_mov_b32_e32 v2, s9 |
| ; VI-NEXT: v_bfi_b32 v3, s2, v0, v1 |
| ; VI-NEXT: v_mov_b32_e32 v0, s17 |
| ; VI-NEXT: v_bfi_b32 v1, s2, v2, v0 |
| ; VI-NEXT: v_mov_b32_e32 v0, s13 |
| ; VI-NEXT: v_mov_b32_e32 v2, s21 |
| ; VI-NEXT: v_bfi_b32 v5, s2, v0, v2 |
| ; VI-NEXT: s_add_u32 s2, s0, 16 |
| ; VI-NEXT: s_addc_u32 s3, s1, 0 |
| ; VI-NEXT: v_mov_b32_e32 v7, s3 |
| ; VI-NEXT: v_mov_b32_e32 v4, s12 |
| ; VI-NEXT: v_mov_b32_e32 v6, s2 |
| ; VI-NEXT: flat_store_dwordx2 v[6:7], v[4:5] |
| ; VI-NEXT: v_mov_b32_e32 v5, s1 |
| ; VI-NEXT: v_mov_b32_e32 v0, s8 |
| ; VI-NEXT: v_mov_b32_e32 v2, s10 |
| ; VI-NEXT: v_mov_b32_e32 v4, s0 |
| ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_v3f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x44 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v1, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v5, s21 :: v_dual_mov_b32 v0, s8 |
| ; GFX11-NEXT: v_dual_mov_b32 v7, s17 :: v_dual_mov_b32 v4, s12 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s10 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) |
| ; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, s13, v5 |
| ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, s11, v1 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s9, v7 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16 |
| ; GFX11-NEXT: global_store_b128 v6, v[0:3], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call <3 x double> @llvm.copysign.v3f64(<3 x double> %mag, <3 x double> %sign) |
| store <3 x double> %result, ptr addrspace(1) %out, align 32 |
| ret void |
| } |
| |
| define amdgpu_kernel void @s_test_copysign_v4f64(ptr addrspace(1) %out, <4 x double> %mag, <4 x double> %sign) { |
| ; SI-LABEL: s_test_copysign_v4f64: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x11 |
| ; SI-NEXT: s_brev_b32 s6, -2 |
| ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 |
| ; SI-NEXT: s_mov_b32 s3, 0xf000 |
| ; SI-NEXT: s_mov_b32 s2, -1 |
| ; SI-NEXT: s_waitcnt lgkmcnt(0) |
| ; SI-NEXT: v_mov_b32_e32 v0, s11 |
| ; SI-NEXT: v_mov_b32_e32 v1, s19 |
| ; SI-NEXT: v_bfi_b32 v3, s6, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s9 |
| ; SI-NEXT: v_mov_b32_e32 v1, s17 |
| ; SI-NEXT: v_bfi_b32 v1, s6, v0, v1 |
| ; SI-NEXT: v_mov_b32_e32 v0, s15 |
| ; SI-NEXT: v_mov_b32_e32 v2, s23 |
| ; SI-NEXT: v_bfi_b32 v7, s6, v0, v2 |
| ; SI-NEXT: v_mov_b32_e32 v0, s13 |
| ; SI-NEXT: v_mov_b32_e32 v2, s21 |
| ; SI-NEXT: v_bfi_b32 v5, s6, v0, v2 |
| ; SI-NEXT: v_mov_b32_e32 v4, s12 |
| ; SI-NEXT: v_mov_b32_e32 v6, s14 |
| ; SI-NEXT: v_mov_b32_e32 v0, s8 |
| ; SI-NEXT: v_mov_b32_e32 v2, s10 |
| ; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 |
| ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 |
| ; SI-NEXT: s_endpgm |
| ; |
| ; VI-LABEL: s_test_copysign_v4f64: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x44 |
| ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| ; VI-NEXT: s_brev_b32 s2, -2 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: v_mov_b32_e32 v0, s11 |
| ; VI-NEXT: v_mov_b32_e32 v1, s19 |
| ; VI-NEXT: v_mov_b32_e32 v2, s9 |
| ; VI-NEXT: v_bfi_b32 v3, s2, v0, v1 |
| ; VI-NEXT: v_mov_b32_e32 v0, s17 |
| ; VI-NEXT: v_bfi_b32 v1, s2, v2, v0 |
| ; VI-NEXT: v_mov_b32_e32 v0, s15 |
| ; VI-NEXT: v_mov_b32_e32 v2, s23 |
| ; VI-NEXT: v_bfi_b32 v7, s2, v0, v2 |
| ; VI-NEXT: v_mov_b32_e32 v0, s13 |
| ; VI-NEXT: v_mov_b32_e32 v2, s21 |
| ; VI-NEXT: v_bfi_b32 v5, s2, v0, v2 |
| ; VI-NEXT: s_add_u32 s2, s0, 16 |
| ; VI-NEXT: s_addc_u32 s3, s1, 0 |
| ; VI-NEXT: v_mov_b32_e32 v9, s3 |
| ; VI-NEXT: v_mov_b32_e32 v4, s12 |
| ; VI-NEXT: v_mov_b32_e32 v6, s14 |
| ; VI-NEXT: v_mov_b32_e32 v8, s2 |
| ; VI-NEXT: flat_store_dwordx4 v[8:9], v[4:7] |
| ; VI-NEXT: v_mov_b32_e32 v0, s8 |
| ; VI-NEXT: v_mov_b32_e32 v5, s1 |
| ; VI-NEXT: v_mov_b32_e32 v2, s10 |
| ; VI-NEXT: v_mov_b32_e32 v4, s0 |
| ; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] |
| ; VI-NEXT: s_endpgm |
| ; |
| ; GFX11-LABEL: s_test_copysign_v4f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x44 |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v3, s23 :: v_dual_mov_b32 v2, s14 |
| ; GFX11-NEXT: v_dual_mov_b32 v9, s21 :: v_dual_mov_b32 v4, s8 |
| ; GFX11-NEXT: v_dual_mov_b32 v5, s17 :: v_dual_mov_b32 v0, s12 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) |
| ; GFX11-NEXT: v_bfi_b32 v7, 0x7fffffff, s11, v1 |
| ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, s15, v3 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, s13, v9 |
| ; GFX11-NEXT: v_mov_b32_e32 v6, s10 |
| ; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, s9, v5 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 |
| ; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] |
| ; GFX11-NEXT: s_endpgm |
| %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) |
| store <4 x double> %result, ptr addrspace(1) %out, align 32 |
| ret void |
| } |
| |
| define double @v_test_copysign_f64(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], double %sign) { |
| ; SIVI-LABEL: v_test_copysign_f64: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: s_brev_b32 s4, -2 |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_bfi_b32 v1, s4, v11, v21 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v10 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v21 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.copysign.f64(double %mag, double %sign) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_0(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SIVI-LABEL: v_test_copysign_f64_0: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v11 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.copysign.f64(double %mag, double 0.0) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_1(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SIVI-LABEL: v_test_copysign_f64_1: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v11 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.copysign.f64(double %mag, double 1.0) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_10(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SIVI-LABEL: v_test_copysign_f64_10: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v11 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_10: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_and_b32 v1, 0x7fffffff, v11 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.copysign.f64(double %mag, double 10.0) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_neg1(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SIVI-LABEL: v_test_copysign_f64_neg1: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_or_b32_e32 v1, 0x80000000, v11 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_neg1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v10 |
| ; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v11 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.copysign.f64(double %mag, double -1.0) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_neg10(ptr addrspace(1) %out, [8 x i32], double %mag) { |
| ; SIVI-LABEL: v_test_copysign_f64_neg10: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_or_b32_e32 v1, 0x80000000, v11 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_neg10: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v10 |
| ; GFX11-NEXT: v_or_b32_e32 v1, 0x80000000, v11 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call double @llvm.copysign.f64(double %mag, double -10.0) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_f32(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], float %sign) { |
| ; SIVI-LABEL: v_test_copysign_f64_f32: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: s_brev_b32 s4, -2 |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SIVI-NEXT: v_bfi_b32 v1, s4, v11, v20 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v10 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v20 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %sign.ext = fpext float %sign to double |
| %result = call double @llvm.copysign.f64(double %mag, double %sign.ext) |
| ret double %result |
| } |
| |
| define double @v_test_copysign_f64_f16(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], half %sign) { |
| ; SI-LABEL: v_test_copysign_f64_f16: |
| ; SI: ; %bb.0: |
| ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SI-NEXT: s_brev_b32 s4, -2 |
| ; SI-NEXT: v_mov_b32_e32 v0, v10 |
| ; SI-NEXT: v_bfi_b32 v1, s4, v11, v20 |
| ; SI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: v_test_copysign_f64_f16: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v20 |
| ; VI-NEXT: s_brev_b32 s4, -2 |
| ; VI-NEXT: v_mov_b32_e32 v0, v10 |
| ; VI-NEXT: v_bfi_b32 v1, s4, v11, v1 |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_f64_f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_lshlrev_b32 v1, 16, v20 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v11, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %sign.ext = fpext half %sign to double |
| %result = call double @llvm.copysign.f64(double %mag, double %sign.ext) |
| ret double %result |
| } |
| |
| define <2 x double> @v_test_copysign_v2f64(ptr addrspace(1) %out, <2 x double> %mag, <2 x double> %sign) { |
| ; SIVI-LABEL: v_test_copysign_v2f64: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: s_brev_b32 s4, -2 |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v2 |
| ; SIVI-NEXT: v_bfi_b32 v1, s4, v3, v7 |
| ; SIVI-NEXT: v_bfi_b32 v3, s4, v5, v9 |
| ; SIVI-NEXT: v_mov_b32_e32 v2, v4 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_v2f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v7 |
| ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v9 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, v4 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) |
| ret <2 x double> %result |
| } |
| |
| define <3 x double> @v_test_copysign_v3f64(ptr addrspace(1) %out, <3 x double> %mag, <3 x double> %sign) { |
| ; SIVI-LABEL: v_test_copysign_v3f64: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: s_brev_b32 s4, -2 |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v2 |
| ; SIVI-NEXT: v_bfi_b32 v1, s4, v3, v9 |
| ; SIVI-NEXT: v_bfi_b32 v3, s4, v5, v11 |
| ; SIVI-NEXT: v_bfi_b32 v5, s4, v7, v13 |
| ; SIVI-NEXT: v_mov_b32_e32 v2, v4 |
| ; SIVI-NEXT: v_mov_b32_e32 v4, v6 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_v3f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v9 |
| ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v11 |
| ; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, v7, v13 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, v4 |
| ; GFX11-NEXT: v_mov_b32_e32 v4, v6 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <3 x double> @llvm.copysign.v3f64(<3 x double> %mag, <3 x double> %sign) |
| ret <3 x double> %result |
| } |
| |
| define <4 x double> @v_test_copysign_v4f64(ptr addrspace(1) %out, <4 x double> %mag, <4 x double> %sign) { |
| ; SIVI-LABEL: v_test_copysign_v4f64: |
| ; SIVI: ; %bb.0: |
| ; SIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; SIVI-NEXT: s_brev_b32 s4, -2 |
| ; SIVI-NEXT: v_mov_b32_e32 v0, v2 |
| ; SIVI-NEXT: v_bfi_b32 v1, s4, v3, v11 |
| ; SIVI-NEXT: v_bfi_b32 v3, s4, v5, v13 |
| ; SIVI-NEXT: v_bfi_b32 v5, s4, v7, v15 |
| ; SIVI-NEXT: v_bfi_b32 v7, s4, v9, v17 |
| ; SIVI-NEXT: v_mov_b32_e32 v2, v4 |
| ; SIVI-NEXT: v_mov_b32_e32 v4, v6 |
| ; SIVI-NEXT: v_mov_b32_e32 v6, v8 |
| ; SIVI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v_test_copysign_v4f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, v2 |
| ; GFX11-NEXT: v_bfi_b32 v1, 0x7fffffff, v3, v11 |
| ; GFX11-NEXT: v_bfi_b32 v3, 0x7fffffff, v5, v13 |
| ; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, v7, v15 |
| ; GFX11-NEXT: v_bfi_b32 v7, 0x7fffffff, v9, v17 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, v4 |
| ; GFX11-NEXT: v_mov_b32_e32 v4, v6 |
| ; GFX11-NEXT: v_mov_b32_e32 v6, v8 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) |
| ret <4 x double> %result |
| } |
| |
| attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |