| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s |
| |
| |
| ; Function Attrs: mustprogress nounwind willreturn |
| define amdgpu_kernel void @half8(ptr addrspace(1) nocapture readonly %0, ptr addrspace(1) nocapture writeonly %1) local_unnamed_addr #0 { |
| ; GFX908-LABEL: half8: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX908-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: global_load_dwordx4 v[0:3], v4, s[0:1] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] |
| ; GFX908-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: half8: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: global_load_dwordx4 v[0:3], v4, s[0:1] |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX1030-LABEL: half8: |
| ; GFX1030: ; %bb.0: |
| ; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX1030-NEXT: v_mov_b32_e32 v4, 0 |
| ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX1030-NEXT: global_load_dwordx4 v[0:3], v4, s[0:1] |
| ; GFX1030-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1030-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] |
| ; GFX1030-NEXT: s_endpgm |
| %gep0 = getelementptr half, ptr addrspace(1) %0, i64 0 |
| %gep1 = getelementptr half, ptr addrspace(1) %0, i64 1 |
| %gep2 = getelementptr half, ptr addrspace(1) %0, i64 2 |
| %gep3 = getelementptr half, ptr addrspace(1) %0, i64 3 |
| %gep4 = getelementptr half, ptr addrspace(1) %0, i64 4 |
| %gep5 = getelementptr half, ptr addrspace(1) %0, i64 5 |
| %gep6 = getelementptr half, ptr addrspace(1) %0, i64 6 |
| %gep7 = getelementptr half, ptr addrspace(1) %0, i64 7 |
| %l0 = load half, ptr addrspace(1) %gep0, align 2 |
| %l1 = load half, ptr addrspace(1) %gep1, align 2 |
| %l2 = load half, ptr addrspace(1) %gep2, align 2 |
| %l3 = load half, ptr addrspace(1) %gep3, align 2 |
| %l4 = load half, ptr addrspace(1) %gep4, align 2 |
| %l5 = load half, ptr addrspace(1) %gep5, align 2 |
| %l6 = load half, ptr addrspace(1) %gep6, align 2 |
| %l7 = load half, ptr addrspace(1) %gep7, align 2 |
| %sgep0 = getelementptr half, ptr addrspace(1) %1, i64 0 |
| %sgep1 = getelementptr half, ptr addrspace(1) %1, i64 1 |
| %sgep2 = getelementptr half, ptr addrspace(1) %1, i64 2 |
| %sgep3 = getelementptr half, ptr addrspace(1) %1, i64 3 |
| %sgep4 = getelementptr half, ptr addrspace(1) %1, i64 4 |
| %sgep5 = getelementptr half, ptr addrspace(1) %1, i64 5 |
| %sgep6 = getelementptr half, ptr addrspace(1) %1, i64 6 |
| %sgep7 = getelementptr half, ptr addrspace(1) %1, i64 7 |
| store half %l0, ptr addrspace(1) %sgep0, align 2 |
| store half %l1, ptr addrspace(1) %sgep1, align 2 |
| store half %l2, ptr addrspace(1) %sgep2, align 2 |
| store half %l3, ptr addrspace(1) %sgep3, align 2 |
| store half %l4, ptr addrspace(1) %sgep4, align 2 |
| store half %l5, ptr addrspace(1) %sgep5, align 2 |
| store half %l6, ptr addrspace(1) %sgep6, align 2 |
| store half %l7, ptr addrspace(1) %sgep7, align 2 |
| ret void |
| } |
| |
| ; Function Attrs: mustprogress nounwind willreturn |
| define amdgpu_kernel void @half6(ptr addrspace(1) nocapture readonly %0, ptr addrspace(1) nocapture writeonly %1) local_unnamed_addr #0 { |
| ; GFX908-LABEL: half6: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX908-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] |
| ; GFX908-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: half6: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1] |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX1030-LABEL: half6: |
| ; GFX1030: ; %bb.0: |
| ; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX1030-NEXT: v_mov_b32_e32 v3, 0 |
| ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX1030-NEXT: global_load_dwordx3 v[0:2], v3, s[0:1] |
| ; GFX1030-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1030-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] |
| ; GFX1030-NEXT: s_endpgm |
| %gep0 = getelementptr half, ptr addrspace(1) %0, i64 0 |
| %gep1 = getelementptr half, ptr addrspace(1) %0, i64 1 |
| %gep2 = getelementptr half, ptr addrspace(1) %0, i64 2 |
| %gep3 = getelementptr half, ptr addrspace(1) %0, i64 3 |
| %gep4 = getelementptr half, ptr addrspace(1) %0, i64 4 |
| %gep5 = getelementptr half, ptr addrspace(1) %0, i64 5 |
| %l0 = load half, ptr addrspace(1) %gep0, align 1 |
| %l1 = load half, ptr addrspace(1) %gep1, align 1 |
| %l2 = load half, ptr addrspace(1) %gep2, align 1 |
| %l3 = load half, ptr addrspace(1) %gep3, align 1 |
| %l4 = load half, ptr addrspace(1) %gep4, align 1 |
| %l5 = load half, ptr addrspace(1) %gep5, align 1 |
| %sgep0 = getelementptr half, ptr addrspace(1) %1, i64 0 |
| %sgep1 = getelementptr half, ptr addrspace(1) %1, i64 1 |
| %sgep2 = getelementptr half, ptr addrspace(1) %1, i64 2 |
| %sgep3 = getelementptr half, ptr addrspace(1) %1, i64 3 |
| %sgep4 = getelementptr half, ptr addrspace(1) %1, i64 4 |
| %sgep5 = getelementptr half, ptr addrspace(1) %1, i64 5 |
| store half %l0, ptr addrspace(1) %sgep0, align 1 |
| store half %l1, ptr addrspace(1) %sgep1, align 1 |
| store half %l2, ptr addrspace(1) %sgep2, align 1 |
| store half %l3, ptr addrspace(1) %sgep3, align 1 |
| store half %l4, ptr addrspace(1) %sgep4, align 1 |
| store half %l5, ptr addrspace(1) %sgep5, align 1 |
| ret void |
| } |
| |
| ; Function Attrs: mustprogress nounwind willreturn |
| define amdgpu_kernel void @half4(ptr addrspace(1) nocapture readonly %0, ptr addrspace(1) nocapture writeonly %1) local_unnamed_addr #0 { |
| ; GFX908-LABEL: half4: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX908-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX908-NEXT: v_mov_b32_e32 v1, s1 |
| ; GFX908-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] |
| ; GFX908-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: half4: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] |
| ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX1030-LABEL: half4: |
| ; GFX1030: ; %bb.0: |
| ; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX1030-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX1030-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 |
| ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX1030-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX1030-NEXT: v_mov_b32_e32 v1, s1 |
| ; GFX1030-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] |
| ; GFX1030-NEXT: s_endpgm |
| %gep0 = getelementptr half, ptr addrspace(1) %0, i64 0 |
| %gep1 = getelementptr half, ptr addrspace(1) %0, i64 1 |
| %gep2 = getelementptr half, ptr addrspace(1) %0, i64 2 |
| %gep3 = getelementptr half, ptr addrspace(1) %0, i64 3 |
| %l0 = load half, ptr addrspace(1) %gep0, align 4 |
| %l1 = load half, ptr addrspace(1) %gep1, align 4 |
| %l2 = load half, ptr addrspace(1) %gep2, align 4 |
| %l3 = load half, ptr addrspace(1) %gep3, align 4 |
| %sgep0 = getelementptr half, ptr addrspace(1) %1, i64 0 |
| %sgep1 = getelementptr half, ptr addrspace(1) %1, i64 1 |
| %sgep2 = getelementptr half, ptr addrspace(1) %1, i64 2 |
| %sgep3 = getelementptr half, ptr addrspace(1) %1, i64 3 |
| store half %l0, ptr addrspace(1) %sgep0, align 4 |
| store half %l1, ptr addrspace(1) %sgep1, align 4 |
| store half %l2, ptr addrspace(1) %sgep2, align 4 |
| store half %l3, ptr addrspace(1) %sgep3, align 4 |
| ret void |
| } |
| |
| |
| ; Function Attrs: mustprogress nounwind willreturn |
| define amdgpu_kernel void @half2(ptr addrspace(1) nocapture readonly %0, ptr addrspace(1) nocapture writeonly %1) local_unnamed_addr #0 { |
| ; GFX908-LABEL: half2: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX908-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: global_load_dword v1, v0, s[0:1] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dword v0, v1, s[2:3] |
| ; GFX908-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: half2: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: global_load_dword v1, v0, s[0:1] |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dword v0, v1, s[2:3] |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX1030-LABEL: half2: |
| ; GFX1030: ; %bb.0: |
| ; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| ; GFX1030-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX1030-NEXT: global_load_dword v1, v0, s[0:1] |
| ; GFX1030-NEXT: s_waitcnt vmcnt(0) |
| ; GFX1030-NEXT: global_store_dword v0, v1, s[2:3] |
| ; GFX1030-NEXT: s_endpgm |
| %gep0 = getelementptr half, ptr addrspace(1) %0, i64 0 |
| %gep1 = getelementptr half, ptr addrspace(1) %0, i64 1 |
| %l0 = load half, ptr addrspace(1) %gep0 |
| %l1 = load half, ptr addrspace(1) %gep1 |
| %sgep0 = getelementptr half, ptr addrspace(1) %1, i64 0 |
| %sgep1 = getelementptr half, ptr addrspace(1) %1, i64 1 |
| store half %l0, ptr addrspace(1) %sgep0 |
| store half %l1, ptr addrspace(1) %sgep1 |
| ret void |
| } |
| |
| |