| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s |
| |
| define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_v2i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <2 x i32> %a |
| store <2 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_v3i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx3 v[4:6], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx3 v[2:3], v[4:6], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v3i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b96 v[4:6], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b96 v[2:3], v[4:6], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <3 x i32> %a |
| store <3 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_v4i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <4 x i32> %a |
| store <4 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v5i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dword v8, v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v8, off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v5i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v5i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b32 v8, v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v8, off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v5i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <5 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <5 x i32> %a |
| store <5 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v6i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v6i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v6i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v6i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <6 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <6 x i32> %a |
| store <6 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v7i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v7i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v7i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v7i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <7 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <7 x i32> %a |
| store <7 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v8i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <8 x i32> %a |
| store <8 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v9i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x2 |
| ; GFX10-SDAG-NEXT: global_load_dword v12, v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v12, off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v9i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x2 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v9i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x2 |
| ; GFX11-SDAG-NEXT: global_load_b32 v12, v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v12, off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v9i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x2 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:32 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <9 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <9 x i32> %a |
| store <9 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_v10i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v10i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <10 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <10 x i32> %a |
| store <10 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_v11i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v11i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-NEXT: global_load_b96 v[12:14], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b96 v[2:3], v[12:14], off offset:32 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <11 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <11 x i32> %a |
| store <11 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_v12i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v12i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <12 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <12 x i32> %a |
| store <12 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v13i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dword v16, v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v16, off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v13i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v13i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b32 v16, v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v16, off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v13i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <13 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <13 x i32> %a |
| store <13 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v14i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v14i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v14i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[16:17], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[16:17], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v14i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <14 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <14 x i32> %a |
| store <14 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v15i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v15i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v15i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v15i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <15 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <15 x i32> %a |
| store <15 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v16i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <16 x i32> %a |
| store <16 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v17i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dword v20, v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v20, off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v17i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v17i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b32 v20, v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v20, off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v17i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <17 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <17 x i32> %a |
| store <17 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v18i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v18i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v18i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[20:21], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[20:21], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v18i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <18 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <18 x i32> %a |
| store <18 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v19i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v19i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v19i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v19i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <19 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <19 x i32> %a |
| store <19 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v20i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v20i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v20i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v20i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <20 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <20 x i32> %a |
| store <20 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v21i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x5 |
| ; GFX10-SDAG-NEXT: global_load_dword v24, v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v24, off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v21i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x5 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v21i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x5 |
| ; GFX11-SDAG-NEXT: global_load_b32 v24, v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v24, off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v21i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x5 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:80 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <21 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <21 x i32> %a |
| store <21 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v22i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x5 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v22i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x5 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v22i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x5 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[24:25], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[24:25], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v22i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x5 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <22 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <22 x i32> %a |
| store <22 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v30i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v30i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v30i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[32:33], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[32:33], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v30i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <30 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <30 x i32> %a |
| store <30 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v31i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v31i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v31i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v31i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <31 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <31 x i32> %a |
| store <31 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_v32i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v32i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v32i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v32i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <32 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <32 x i32> %a |
| store <32 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i32, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i32 %a |
| store i32 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_i64: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i64, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i64 %a |
| store i64 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_float: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_float: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load float, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze float %a |
| store float %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-LABEL: freeze_i128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i128, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i128 %a |
| store i128 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX10-SDAG-LABEL: freeze_i256: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_i256: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_i256: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_i256: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load i256, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i256 %a |
| store i256 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |