| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX8-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s |
| |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
| ; FIXME-TRUE16 enable gisel |
| ; XUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s |
| |
| define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2i32: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <2 x i32> %a |
| store <2 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 offset:8 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:8 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:8 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v3i32: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx3 v[4:6], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx3 v[2:3], v[4:6] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v3i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx3 v[4:6], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx3 v[2:3], v[4:6], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v3i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx3 v[4:6], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx3 v[2:3], v[4:6], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v3i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b96 v[4:6], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b96 v[2:3], v[4:6], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <3 x i32> %a |
| store <3 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v4i32: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v4i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v4i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <4 x i32> %a |
| store <4 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v5i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v5i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v5i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v5i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v5i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dword v8, v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v8 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v5i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v5i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dword v8, v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v8, off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v5i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v5i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b32 v8, v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v8, off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v5i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <5 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <5 x i32> %a |
| store <5 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v6i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v6i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v6i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v6i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v6i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v6i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v6i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v6i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v6i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v6i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <6 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <6 x i32> %a |
| store <6 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v7i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v10, v[0:1], s[4:7], 0 addr64 offset:24 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v10, v[2:3], s[4:7], 0 addr64 offset:24 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v7i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:24 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:24 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v7i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v7i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v7i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx3 v[8:10], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[8:10] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v7i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v7i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v7i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v7i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v7i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <7 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <7 x i32> %a |
| store <7 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v8i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v8i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <8 x i32> %a |
| store <8 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v9i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v9i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v9i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v9i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v9i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dword v14, v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dword v[12:13], v14 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v9i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v9i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x2 |
| ; GFX10-SDAG-NEXT: global_load_dword v12, v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v12, off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v9i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x2 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v9i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x2 |
| ; GFX11-SDAG-NEXT: global_load_b32 v12, v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v12, off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v9i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x2 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:32 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <9 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <9 x i32> %a |
| store <9 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v10i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v10i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v10i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v10i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v10i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[14:15], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v10i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v10i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v10i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <10 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <10 x i32> %a |
| store <10 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v11i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v14, v[0:1], s[4:7], 0 addr64 offset:40 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v14, v[2:3], s[4:7], 0 addr64 offset:40 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v11i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:40 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:40 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v11i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v11i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v11i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx3 v[12:14], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx3 v[15:16], v[12:14] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v11i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v11i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v11i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-NEXT: global_load_b96 v[12:14], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b96 v[2:3], v[12:14], off offset:32 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <11 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <11 x i32> %a |
| store <11 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v12i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v12i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v12i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v12i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v12i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v12i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v12i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v12i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <12 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <12 x i32> %a |
| store <12 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v13i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v13i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v13i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v13i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v13i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dword v18, v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v18 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v13i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v13i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dword v16, v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v16, off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v13i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v13i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b32 v16, v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v16, off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v13i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <13 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <13 x i32> %a |
| store <13 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v14i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v14i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v14i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v14i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v14i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[18:19], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v14i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v14i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v14i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v14i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[16:17], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[16:17], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v14i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <14 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <14 x i32> %a |
| store <14 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v15i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v15i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:56 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:56 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v15i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v15i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[16:18], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[16:18], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v15i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx3 v[16:18], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[16:18] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v15i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v15i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v15i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v15i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v15i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <15 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <15 x i32> %a |
| store <15 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <16 x i32> %a |
| store <16 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v17i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v17i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v17i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v17i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v17i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dword v20, v[18:19] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v20 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v17i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v17i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dword v20, v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v20, off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v17i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v17i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b32 v20, v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v20, off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v17i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <17 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <17 x i32> %a |
| store <17 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v18i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v18i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v18i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v18i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v18i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[18:19] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[20:21], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19] |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v18i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v18i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v18i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v18i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[20:21], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[20:21], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v18i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <18 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <18 x i32> %a |
| store <18 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v19i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v19i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v19i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v19i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[20:22], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[20:22], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v19i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx3 v[20:22], v[20:21] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx3 v[6:7], v[20:22] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v19i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v19i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v19i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v19i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v19i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <19 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <19 x i32> %a |
| store <19 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v20i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v20i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v20i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v20i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v20i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[20:23] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v20i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v20i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x4 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v20i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x4 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v20i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x4 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v20i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x4 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <20 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <20 x i32> %a |
| store <20 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v21i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v21i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v21i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v21i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v21i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dword v26, v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[24:25], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] |
| ; GFX8-GISEL-NEXT: flat_store_dword v[6:7], v26 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v21i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v21i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x5 |
| ; GFX10-SDAG-NEXT: global_load_dword v24, v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v24, off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v21i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x5 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v21i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x5 |
| ; GFX11-SDAG-NEXT: global_load_b32 v24, v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v24, off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v21i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x5 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:80 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <21 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <21 x i32> %a |
| store <21 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v22i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v22i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v22i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v22i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v22i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[24:25], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v26, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v27, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[26:27], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[6:7], v[24:25] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v22i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v22i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x5 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v22i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x5 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v22i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x5 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[24:25], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[24:25], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v22i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x5 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <22 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <22 x i32> %a |
| store <22 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v30i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v30i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v30i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v30i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v30i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v34, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v34 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, v2, v34 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v34, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v33, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 0x60, v2 |
| ; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[34:35], v[20:23] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[24:27] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[28:31] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v30i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX9-GISEL-NEXT: s_nop 0 |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v30i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v30i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v30i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[32:33], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[32:33], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v30i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <30 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <30 x i32> %a |
| store <30 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v31i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v31i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v31i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v31i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[32:34], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[32:34], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v31i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v35, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v35 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx3 v[32:34], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v35 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[32:34] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v31i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX9-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v31i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v31i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v31i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v31i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <31 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <31 x i32> %a |
| store <31 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v32i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v32i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v32i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v32i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v32i32: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v32i32: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v32i32: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v32i32: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v32i32: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v32i32: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <32 x i32>, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze <32 x i32> %a |
| store <32 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i32: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i32: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i32: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i32: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i32: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dword v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i32: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i32, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i32 %a |
| store i32 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i64: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i64: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i64: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i64: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i64: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i64, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i64 %a |
| store i64 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_float: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_float: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_float: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_float: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_float: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dword v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_float: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_float: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_float: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load float, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze float %a |
| store float %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i128: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i128: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i128: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i128: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i128: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i128: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i128: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i128: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i128, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i128 %a |
| store i128 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i256: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i256: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i256: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i256: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_i256: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_i256: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_i256: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_i256: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_i256: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_i256: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load i256, ptr addrspace(1) %ptra, align 4 |
| %freeze = freeze i256 %a |
| store i256 %freeze, ptr addrspace(1) %ptrb, align 4 |
| ret void |
| } |
| |
| define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_ushort v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_i16: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_i16: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_i16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load i16, ptr addrspace(1) %ptra |
| %freeze = freeze i16 %a |
| store i16 %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2i16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2i16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2i16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2i16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2i16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dword v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2i16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2i16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x i16>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x i16> %a |
| store <2 x i16> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3i16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3i16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3i16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3i16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3i16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0 |
| ; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8 |
| ; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3i16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3i16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3i16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v3i16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4 |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3i16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x2 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x i16>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x i16> %a |
| store <3 x i16> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4i16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4i16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4i16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4i16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v4i16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v4i16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v4i16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x i16>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x i16> %a |
| store <4 x i16> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8i16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8i16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8i16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8i16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v8i16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v8i16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v8i16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v8i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x i16>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x i16> %a |
| store <8 x i16> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16i16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16i16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16i16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16i16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16i16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16i16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16i16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16i16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16i16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16i16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x i16>, ptr addrspace(1) %ptra |
| %freeze = freeze <16 x i16> %a |
| store <16 x i16> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_f16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_f16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_ushort v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_f16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_f16: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_f16: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load half, ptr addrspace(1) %ptra |
| %freeze = freeze half %a |
| store half %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2f16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2f16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dword v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2f16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x half>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x half> %a |
| store <2 x half> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3f16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3f16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0 |
| ; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8 |
| ; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3f16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v3f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4 |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x2 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x half>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x half> %a |
| store <3 x half> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4f16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4f16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v4f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v4f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v4f16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x half>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x half> %a |
| store <4 x half> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8f16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8f16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v8f16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v8f16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v8f16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v8f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x half>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x half> %a |
| store <8 x half> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16f16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16f16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16f16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16f16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16f16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16f16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16f16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16f16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16f16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16f16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x half>, ptr addrspace(1) %ptra |
| %freeze = freeze <16 x half> %a |
| store <16 x half> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_bf16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_bf16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_bf16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_ushort v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_bf16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_bf16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_bf16: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_bf16: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_bf16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load bfloat, ptr addrspace(1) %ptra |
| %freeze = freeze bfloat %a |
| store bfloat %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2bf16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2bf16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2bf16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dword v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2bf16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2bf16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x bfloat>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x bfloat> %a |
| store <2 x bfloat> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3bf16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX6-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; GFX6-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4 |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; GFX6-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16 |
| ; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3bf16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4 |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; GFX7-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16 |
| ; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3bf16: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0 |
| ; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8 |
| ; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3bf16: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3bf16: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3bf16: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v3bf16: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4 |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3bf16: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x2 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x bfloat>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x bfloat> %a |
| store <3 x bfloat> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4bf16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4bf16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v4bf16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v4bf16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v4bf16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x bfloat>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x bfloat> %a |
| store <4 x bfloat> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8bf16: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8bf16: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8bf16: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8bf16: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v8bf16: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v8bf16: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v8bf16: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v8bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x bfloat>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x bfloat> %a |
| store <8 x bfloat> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_f64: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_f64: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_f64: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_f64: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_f64: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load double, ptr addrspace(1) %ptra |
| %freeze = freeze double %a |
| store double %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2f64: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2f64: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2f64: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2f64: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2f64: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x double>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x double> %a |
| store <2 x double> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3f64: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3f64: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[8:9] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3f64: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v3f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x double>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x double> %a |
| store <3 x double> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4f64: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4f64: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v4f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v4f64: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v4f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v4f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v4f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v4f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x double>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x double> %a |
| store <4 x double> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8f64: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8f64: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8f64: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8f64: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8f64: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v8f64: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v8f64: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8f64: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8f64: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8f64: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x double>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x double> %a |
| store <8 x double> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_p0: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_p0: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_p0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_p0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_p0: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_p0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_p0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_p0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load ptr, ptr addrspace(1) %ptra |
| %freeze = freeze ptr %a |
| store ptr %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2p0: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2p0: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2p0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2p0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2p0: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2p0: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2p0: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2p0: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x ptr>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x ptr> %a |
| store <2 x ptr> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3p0: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3p0: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3p0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3p0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3p0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3p0: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off |
| ; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3p0: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3p0: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off |
| ; GFX10-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v3p0: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3p0: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[6:9], off |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x ptr>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x ptr> %a |
| store <3 x ptr> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4p0: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4p0: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4p0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4p0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v4p0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v4p0: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v4p0: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v4p0: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v4p0: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v4p0: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x ptr>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x ptr> %a |
| store <4 x ptr> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8p0: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8p0: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8p0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8p0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8p0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v8p0: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v8p0: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8p0: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8p0: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8p0: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x ptr>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x ptr> %a |
| store <8 x ptr> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16p0: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16p0: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16p0: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16p0: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16p0: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16p0: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16p0: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16p0: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16p0: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16p0: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x ptr>, ptr addrspace(1) %ptra |
| %freeze = freeze <16 x ptr> %a |
| store <16 x ptr> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_p1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_p1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_p1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_p1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_p1: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_p1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_p1: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_p1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load ptr addrspace(1), ptr addrspace(1) %ptra |
| %freeze = freeze ptr addrspace(1) %a |
| store ptr addrspace(1) %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2p1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2p1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2p1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2p1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2p1: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2p1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2p1: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2p1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x ptr addrspace(1)>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x ptr addrspace(1)> %a |
| store <2 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3p1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3p1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3p1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3p1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3p1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3p1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off |
| ; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3p1: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3p1: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off |
| ; GFX10-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4 |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v3p1: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3p1: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[6:9], off |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x ptr addrspace(1)>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x ptr addrspace(1)> %a |
| store <3 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4p1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4p1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4p1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4p1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v4p1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v4p1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v4p1: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x1 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v4p1: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x1 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v4p1: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v4p1: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x ptr addrspace(1)>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x ptr addrspace(1)> %a |
| store <4 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8p1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8p1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8p1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8p1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8p1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v8p1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v8p1: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x3 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8p1: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x3 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8p1: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8p1: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x ptr addrspace(1)>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x ptr addrspace(1)> %a |
| store <8 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16p1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16p1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16p1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16p1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16p1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7] |
| ; GFX8-GISEL-NEXT: s_nop 0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38 |
| ; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16p1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16p1: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_clause 0x7 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16p1: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_clause 0x7 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16p1: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x7 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 |
| ; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off |
| ; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16p1: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_clause 0x7 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 |
| ; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x ptr addrspace(1)>, ptr addrspace(1) %ptra |
| %freeze = freeze <16 x ptr addrspace(1)> %a |
| store <16 x ptr addrspace(1)> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { |
| ; GFX6-LABEL: freeze_p3: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-NEXT: ds_read_b32 v0, v0 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: ds_write_b32 v1, v0 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: freeze_p3: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-NEXT: ds_read_b32 v0, v0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: ds_write_b32 v1, v0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_p3: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_read_b32 v0, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: ds_write_b32 v1, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_p3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: ds_write_b32 v1, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_p3: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: ds_read_b32 v0, v0 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: ds_write_b32 v1, v0 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_p3: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b32 v0, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: ds_store_b32 v1, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load ptr addrspace(3), ptr addrspace(3) %ptra |
| %freeze = freeze ptr addrspace(3) %a |
| store ptr addrspace(3) %freeze, ptr addrspace(3) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { |
| ; GFX6-LABEL: freeze_v2p3: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: freeze_v2p3: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v2p3: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2p3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2p3: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2p3: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b64 v[2:3], v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: ds_store_b64 v1, v[2:3] |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x ptr addrspace(3)>, ptr addrspace(3) %ptra |
| %freeze = freeze <2 x ptr addrspace(3)> %a |
| store <2 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3p3: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-SDAG-NEXT: ds_read_b32 v4, v2 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-SDAG-NEXT: ds_write_b32 v0, v4 |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-SDAG-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3p3: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: ds_read_b32 v0, v0 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-GISEL-NEXT: ds_write_b32 v1, v0 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: freeze_v3p3: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-NEXT: ds_read_b96 v[2:4], v0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: ds_write_b96 v1, v[2:4] |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v3p3: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_read_b96 v[2:4], v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: ds_write_b96 v1, v[2:4] |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v3p3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b96 v[2:4], v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: ds_write_b96 v1, v[2:4] |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v3p3: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: ds_read_b96 v[2:4], v0 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: ds_write_b96 v1, v[2:4] |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v3p3: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b96 v[2:4], v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: ds_store_b96 v1, v[2:4] |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x ptr addrspace(3)>, ptr addrspace(3) %ptra |
| %freeze = freeze <3 x ptr addrspace(3)> %a |
| store <3 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { |
| ; GFX6-LABEL: freeze_v4p3: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v0 |
| ; GFX6-NEXT: ds_read_b64 v[4:5], v0 |
| ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v1 |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-NEXT: ds_write_b64 v0, v[4:5] |
| ; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: freeze_v4p3: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX7-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_v4p3: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v4p3: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v4p3: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4p3: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b128 v[2:5], v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: ds_store_b128 v1, v[2:5] |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x ptr addrspace(3)>, ptr addrspace(3) %ptra |
| %freeze = freeze <4 x ptr addrspace(3)> %a |
| store <4 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8p3: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(2) |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1 |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX6-SDAG-NEXT: ds_write_b64 v1, v[6:7] |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[8:9] |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8p3: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v0 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1 |
| ; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9] |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8p3: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:16 |
| ; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0 |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9] |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8p3: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8p3: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v8p3: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v8p3: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:16 |
| ; GFX10-SDAG-NEXT: ds_read_b128 v[6:9], v0 |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX10-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX10-SDAG-NEXT: ds_write_b128 v1, v[6:9] |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8p3: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX10-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX10-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX10-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8p3: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: ds_load_b128 v[2:5], v0 offset:16 |
| ; GFX11-SDAG-NEXT: ds_load_b128 v[6:9], v0 |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX11-SDAG-NEXT: ds_store_b128 v1, v[2:5] offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX11-SDAG-NEXT: ds_store_b128 v1, v[6:9] |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8p3: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: ds_load_b128 v[2:5], v0 |
| ; GFX11-GISEL-NEXT: ds_load_b128 v[6:9], v0 offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX11-GISEL-NEXT: ds_store_b128 v1, v[2:5] |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX11-GISEL-NEXT: ds_store_b128 v1, v[6:9] offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x ptr addrspace(3)>, ptr addrspace(3) %ptra |
| %freeze = freeze <8 x ptr addrspace(3)> %a |
| store <8 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16p3: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 40, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 32, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 56, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 48, v0 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v6 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[10:11], v10 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[12:13], v12 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[14:15], v14 |
| ; GFX6-SDAG-NEXT: ds_read_b64 v[16:17], v16 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 48, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[10:11] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 56, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1) |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[16:17] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 32, v1 |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[14:15] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 40, v1 |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[12:13] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1 |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1 |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3] |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: ds_write_b64 v1, v[8:9] |
| ; GFX6-SDAG-NEXT: ds_write_b64 v0, v[6:7] |
| ; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16p3: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v8 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v0 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[10:11], v10 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[12:13], v12 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[14:15], v14 |
| ; GFX6-GISEL-NEXT: ds_read_b64 v[16:17], v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 32, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[10:11] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 40, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[12:13] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 48, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[14:15] |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v1 |
| ; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3] |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(7) |
| ; GFX6-GISEL-NEXT: ds_write_b64 v0, v[16:17] |
| ; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16p3: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:32 |
| ; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0 offset:48 |
| ; GFX7-SDAG-NEXT: ds_read_b128 v[10:13], v0 |
| ; GFX7-SDAG-NEXT: ds_read_b128 v[14:17], v0 offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:32 |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9] offset:48 |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-SDAG-NEXT: ds_write_b128 v1, v[10:13] |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-SDAG-NEXT: ds_write_b128 v1, v[14:17] offset:16 |
| ; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16p3: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX7-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 |
| ; GFX7-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX7-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 |
| ; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16p3: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX8-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 |
| ; GFX8-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX8-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX8-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 |
| ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16p3: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX9-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 |
| ; GFX9-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX9-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX9-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 |
| ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16p3: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:32 |
| ; GFX10-SDAG-NEXT: ds_read_b128 v[6:9], v0 offset:48 |
| ; GFX10-SDAG-NEXT: ds_read_b128 v[10:13], v0 |
| ; GFX10-SDAG-NEXT: ds_read_b128 v[14:17], v0 offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:32 |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-SDAG-NEXT: ds_write_b128 v1, v[6:9] offset:48 |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-SDAG-NEXT: ds_write_b128 v1, v[10:13] |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-SDAG-NEXT: ds_write_b128 v1, v[14:17] offset:16 |
| ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16p3: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: ds_read_b128 v[2:5], v0 |
| ; GFX10-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 |
| ; GFX10-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 |
| ; GFX10-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-GISEL-NEXT: ds_write_b128 v1, v[2:5] |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX10-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 |
| ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16p3: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: ds_load_b128 v[2:5], v0 offset:32 |
| ; GFX11-SDAG-NEXT: ds_load_b128 v[6:9], v0 offset:48 |
| ; GFX11-SDAG-NEXT: ds_load_b128 v[10:13], v0 |
| ; GFX11-SDAG-NEXT: ds_load_b128 v[14:17], v0 offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-SDAG-NEXT: ds_store_b128 v1, v[2:5] offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-SDAG-NEXT: ds_store_b128 v1, v[6:9] offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-SDAG-NEXT: ds_store_b128 v1, v[10:13] |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-SDAG-NEXT: ds_store_b128 v1, v[14:17] offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16p3: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: ds_load_b128 v[2:5], v0 |
| ; GFX11-GISEL-NEXT: ds_load_b128 v[6:9], v0 offset:16 |
| ; GFX11-GISEL-NEXT: ds_load_b128 v[10:13], v0 offset:32 |
| ; GFX11-GISEL-NEXT: ds_load_b128 v[14:17], v0 offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-GISEL-NEXT: ds_store_b128 v1, v[2:5] |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-GISEL-NEXT: ds_store_b128 v1, v[6:9] offset:16 |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-GISEL-NEXT: ds_store_b128 v1, v[10:13] offset:32 |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(3) |
| ; GFX11-GISEL-NEXT: ds_store_b128 v1, v[14:17] offset:48 |
| ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x ptr addrspace(3)>, ptr addrspace(3) %ptra |
| %freeze = freeze <16 x ptr addrspace(3)> %a |
| store <16 x ptr addrspace(3)> %freeze, ptr addrspace(3) %ptrb |
| ret void |
| } |
| |
| define void @freeze_p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { |
| ; GFX6-LABEL: freeze_p5: |
| ; GFX6: ; %bb.0: |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-LABEL: freeze_p5: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_p5: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_p5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_p5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_p5: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b32 v0, v0, off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b32 v1, v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load ptr addrspace(5), ptr addrspace(5) %ptra |
| %freeze = freeze ptr addrspace(5) %a |
| store ptr addrspace(5) %freeze, ptr addrspace(5) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2p5: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2p5: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2p5: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2p5: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v2p5: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 4, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v2p5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(1) |
| ; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v2p5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x1 |
| ; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v2p5: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b64 v[2:3], v0, off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b64 v1, v[2:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x ptr addrspace(5)>, ptr addrspace(5) %ptra |
| %freeze = freeze <2 x ptr addrspace(5)> %a |
| store <2 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3p5: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3p5: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3p5: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3p5: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3p5: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 4, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 8, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v3p5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(2) |
| ; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v3p5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x2 |
| ; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v3p5: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b96 v[2:4], v0, off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b96 v1, v[2:4], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x ptr addrspace(5)>, ptr addrspace(5) %ptra |
| %freeze = freeze <3 x ptr addrspace(5)> %a |
| store <3 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4p5: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4p5: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4p5: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4p5: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v4p5: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 4, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 8, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 12, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v4p5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: s_waitcnt vmcnt(3) |
| ; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v4p5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x3 |
| ; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; GFX10-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_v4p5: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b128 v[2:5], v0, off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b128 v1, v[2:5], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x ptr addrspace(5)>, ptr addrspace(5) %ptra |
| %freeze = freeze <4 x ptr addrspace(5)> %a |
| store <4 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8p5: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8p5: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8p5: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8p5: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8p5: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 20, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 24, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 28, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 4, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 8, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 12, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 20, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 24, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 28, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v8p5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v8p5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0x7 |
| ; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; GFX10-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; GFX10-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; GFX10-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; GFX10-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; GFX10-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX10-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; GFX10-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8p5: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-NEXT: scratch_load_b128 v[2:5], v0, off offset:16 |
| ; GFX11-SDAG-NEXT: scratch_load_b128 v[6:9], v0, off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[2:5], off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[6:9], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8p5: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v6, 16, v0 |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: scratch_load_b128 v[2:5], v0, off |
| ; GFX11-GISEL-NEXT: scratch_load_b128 v[6:9], v6, off |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 16, v1 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[2:5], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: scratch_store_b128 v0, v[6:9], off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x ptr addrspace(5)>, ptr addrspace(5) %ptra |
| %freeze = freeze <8 x ptr addrspace(5)> %a |
| store <8 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16p5: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 28, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 24, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 20, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 32, v0 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v14, v17, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v13, v7, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v12, v18, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v16, v6, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1 |
| ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1 |
| ; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16p5: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 12, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 20, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 28, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(12) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 44, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1 |
| ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16p5: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 28, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 24, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 20, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v16, vcc, 32, v0 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v14, v17, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v13, v7, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v12, v18, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v16, v6, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1 |
| ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1 |
| ; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16p5: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 12, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 20, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 28, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(12) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 44, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1 |
| ; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16p5: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 12, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 16, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 20, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 24, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 28, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 32, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 36, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 40, v0 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 44, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 48, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 52, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 56, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 60, v0 |
| ; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 4, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 8, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 12, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 20, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(12) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 40, v1 |
| ; GFX8-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 24, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 28, v1 |
| ; GFX8-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 32, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 36, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 44, v1 |
| ; GFX8-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v1 |
| ; GFX8-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 52, v1 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 56, v1 |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 60, v1 |
| ; GFX8-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_v16p5: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; GFX9-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:32 |
| ; GFX9-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:36 |
| ; GFX9-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:40 |
| ; GFX9-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:44 |
| ; GFX9-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:48 |
| ; GFX9-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:52 |
| ; GFX9-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:56 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:36 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:40 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:44 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:48 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:52 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:56 |
| ; GFX9-NEXT: s_waitcnt vmcnt(15) |
| ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_v16p5: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: s_clause 0xf |
| ; GFX10-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen |
| ; GFX10-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; GFX10-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; GFX10-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; GFX10-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; GFX10-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; GFX10-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:32 |
| ; GFX10-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:36 |
| ; GFX10-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:40 |
| ; GFX10-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:44 |
| ; GFX10-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:48 |
| ; GFX10-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:52 |
| ; GFX10-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:56 |
| ; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60 |
| ; GFX10-NEXT: s_waitcnt vmcnt(15) |
| ; GFX10-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen |
| ; GFX10-NEXT: s_waitcnt vmcnt(14) |
| ; GFX10-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; GFX10-NEXT: s_waitcnt vmcnt(13) |
| ; GFX10-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; GFX10-NEXT: s_waitcnt vmcnt(12) |
| ; GFX10-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; GFX10-NEXT: s_waitcnt vmcnt(11) |
| ; GFX10-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; GFX10-NEXT: s_waitcnt vmcnt(10) |
| ; GFX10-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; GFX10-NEXT: s_waitcnt vmcnt(9) |
| ; GFX10-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; GFX10-NEXT: s_waitcnt vmcnt(8) |
| ; GFX10-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; GFX10-NEXT: s_waitcnt vmcnt(7) |
| ; GFX10-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:32 |
| ; GFX10-NEXT: s_waitcnt vmcnt(6) |
| ; GFX10-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:36 |
| ; GFX10-NEXT: s_waitcnt vmcnt(5) |
| ; GFX10-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:40 |
| ; GFX10-NEXT: s_waitcnt vmcnt(4) |
| ; GFX10-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:44 |
| ; GFX10-NEXT: s_waitcnt vmcnt(3) |
| ; GFX10-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:48 |
| ; GFX10-NEXT: s_waitcnt vmcnt(2) |
| ; GFX10-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:52 |
| ; GFX10-NEXT: s_waitcnt vmcnt(1) |
| ; GFX10-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:56 |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60 |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16p5: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: s_clause 0x3 |
| ; GFX11-SDAG-NEXT: scratch_load_b128 v[2:5], v0, off offset:32 |
| ; GFX11-SDAG-NEXT: scratch_load_b128 v[6:9], v0, off offset:48 |
| ; GFX11-SDAG-NEXT: scratch_load_b128 v[10:13], v0, off |
| ; GFX11-SDAG-NEXT: scratch_load_b128 v[14:17], v0, off offset:16 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[2:5], off offset:32 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[6:9], off offset:48 |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[10:13], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: scratch_store_b128 v1, v[14:17], off offset:16 |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16p5: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v6, 16, v0 |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v10, 32, v0 |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v14, 48, v0 |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v18, 32, v1 |
| ; GFX11-GISEL-NEXT: s_clause 0x3 |
| ; GFX11-GISEL-NEXT: scratch_load_b128 v[2:5], v0, off |
| ; GFX11-GISEL-NEXT: scratch_load_b128 v[6:9], v6, off |
| ; GFX11-GISEL-NEXT: scratch_load_b128 v[10:13], v10, off |
| ; GFX11-GISEL-NEXT: scratch_load_b128 v[14:17], v14, off |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 16, v1 |
| ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v19, 48, v1 |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-GISEL-NEXT: scratch_store_b128 v1, v[2:5], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-GISEL-NEXT: scratch_store_b128 v0, v[6:9], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-GISEL-NEXT: scratch_store_b128 v18, v[10:13], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: scratch_store_b128 v19, v[14:17], off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x ptr addrspace(5)>, ptr addrspace(5) %ptra |
| %freeze = freeze <16 x ptr addrspace(5)> %a |
| store <16 x ptr addrspace(5)> %freeze, ptr addrspace(5) %ptrb |
| ret void |
| } |
| |
| define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i8: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i8: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i8: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i8: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i8: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_ubyte v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i8: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i8: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_i8: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_i8: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_i8: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load i8, ptr addrspace(1) %ptra |
| %freeze = freeze i8 %a |
| store i8 %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2i8: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2i8: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2i8: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2i8: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v2i8: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_ushort v0, v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v2i8: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v2i8: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v2i8: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_ushort v0, v[0:1], off |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xff |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v2i8: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v2i8: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v2i8: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u16 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x i8>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x i8> %a |
| store <2 x i8> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3i8: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3i8: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3i8: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3i8: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3i8: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dword v4, v[0:1] |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0xff |
| ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 2, v2 |
| ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v4 |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 |
| ; GFX8-GISEL-NEXT: flat_store_short v[2:3], v4 |
| ; GFX8-GISEL-NEXT: flat_store_byte v[0:1], v5 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3i8: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4 |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 |
| ; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2 |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3i8: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_lshrrev_b16 v1, 8, v0 |
| ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX10-SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX10-SDAG-NEXT: global_store_byte v[2:3], v4, off offset:2 |
| ; GFX10-SDAG-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3i8: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v4, v4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 |
| ; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2 |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v3i8: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_b32 v1, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, 0 |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b16 v0.l, 8, v1.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 8, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.h, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v4, off offset:2 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v3i8: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b16 v1, 8, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v1, v4, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: s_clause 0x1 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off offset:2 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v[2:3], v1, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3i8: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 |
| ; GFX11-GISEL-NEXT: s_clause 0x1 |
| ; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: global_store_d16_hi_b8 v[2:3], v0, off offset:2 |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x i8>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x i8> %a |
| store <3 x i8> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4i8: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4i8: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4i8: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4i8: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v4i8: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v6 |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v4i8: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 8 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v4, v0, v1, v4 |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_or3_b32 v0, v4, v6, v0 |
| ; GFX9-GISEL-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v4i8: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v4i8: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 8 |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0xff |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v4, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0xff, v0, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_or3_b32 v0, v1, v4, v0 |
| ; GFX10-GISEL-NEXT: global_store_dword v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v4i8: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v4i8: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0xff, v0, v1 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v0, v0, v4, v1 |
| ; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x i8>, ptr addrspace(1) %ptra |
| %freeze = freeze <4 x i8> %a |
| store <4 x i8> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v8i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v8i8: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v8i8: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v7 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v5 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v8 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v6 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v9 |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v8i8: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v8i8: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v7 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v5 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v8 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v6 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v9 |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v8i8: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 8 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xff |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0 |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v8 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v10 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v9 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v4 |
| ; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v8i8: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 8 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xff |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0 |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v11, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v4, v6 |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v4, v5 |
| ; GFX9-GISEL-NEXT: v_or3_b32 v0, v0, v8, v9 |
| ; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v10, v11 |
| ; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v8i8: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v8i8: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 8 |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0xff |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0 |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v9, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v7, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0xff, v0, v6 |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0xff, v1, v4 |
| ; GFX10-GISEL-NEXT: v_or3_b32 v0, v0, v8, v7 |
| ; GFX10-GISEL-NEXT: v_or3_b32 v1, v1, v9, v5 |
| ; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v8i8: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v8i8: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0xff, v0, v4 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0xff, v1, v7 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v0, v0, v5, v6 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v1, v1, v8, v9 |
| ; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <8 x i8>, ptr addrspace(1) %ptra |
| %freeze = freeze <8 x i8> %a |
| store <8 x i8> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v16i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v16i8: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v16i8: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v5, v9 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v6, v12 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v15 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v4, v10 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v5, v13 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v9, v6, v16 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v0, v8 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v1, v11 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v14 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v9, v17 |
| ; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v16i8: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v16i8: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v5, v9 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v6, v12 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v15 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v4, v10 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v5, v13 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v9, v6, v16 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v0, v8 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v1, v11 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v14 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v9, v17 |
| ; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v16i8: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 8 |
| ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xff |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4 |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6 |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v6, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v12 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v14 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v16 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v18 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v13 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v15 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v17 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v1, v0 |
| ; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v16i8: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 8 |
| ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xff |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4 |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6 |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_b32_sdwa v19, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v4, v4, v0, v8 |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v5, v5, v0, v9 |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v6, v6, v0, v10 |
| ; GFX9-GISEL-NEXT: v_and_or_b32 v0, v7, v0, v1 |
| ; GFX9-GISEL-NEXT: v_or3_b32 v4, v4, v12, v13 |
| ; GFX9-GISEL-NEXT: v_or3_b32 v5, v5, v14, v15 |
| ; GFX9-GISEL-NEXT: v_or3_b32 v6, v6, v16, v17 |
| ; GFX9-GISEL-NEXT: v_or3_b32 v7, v0, v18, v19 |
| ; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v16i8: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v16i8: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 8 |
| ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0xff |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4 |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6 |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v12, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v13, v4, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v14, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v15, v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v16, v6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v17, v6, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v18, v7, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_b32_sdwa v1, v7, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v4, 0xff, v4, v8 |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v5, 0xff, v5, v9 |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v6, 0xff, v6, v10 |
| ; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0xff, v7, v0 |
| ; GFX10-GISEL-NEXT: v_or3_b32 v4, v4, v12, v13 |
| ; GFX10-GISEL-NEXT: v_or3_b32 v5, v5, v14, v15 |
| ; GFX10-GISEL-NEXT: v_or3_b32 v6, v6, v16, v17 |
| ; GFX10-GISEL-NEXT: v_or3_b32 v7, v0, v18, v1 |
| ; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-LABEL: freeze_v16i8: |
| ; GFX11-SDAG: ; %bb.0: |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v16i8: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0xff, v4, v0 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v5, 0xff, v5, v9 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v6, 0xff, v6, v12 |
| ; GFX11-GISEL-NEXT: v_and_or_b32 v7, 0xff, v7, v15 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v4, v0, v1, v8 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v5, v5, v10, v11 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v6, v6, v13, v14 |
| ; GFX11-GISEL-NEXT: v_or3_b32 v7, v7, v16, v17 |
| ; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <16 x i8>, ptr addrspace(1) %ptra |
| %freeze = freeze <16 x i8> %a |
| store <16 x i8> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i1: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_ubyte v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX8-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i1: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX9-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i1: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX10-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i1, ptr addrspace(1) %ptra |
| %freeze = freeze i1 %a |
| store i1 %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2i1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2i1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2i1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2i1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v2i1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v2i1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v2i1: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v2i1: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v2i1: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 3 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v2i1: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v2i1: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x i1>, ptr addrspace(1) %ptra |
| %freeze = freeze <2 x i1> %a |
| store <2 x i1> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3i1: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3i1: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3i1: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3i1: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3i1: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3i1: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3i1: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3i1: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX10-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v3i1: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 7 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v3i1: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3i1: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 |
| ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x i1>, ptr addrspace(1) %ptra |
| %freeze = freeze <3 x i1> %a |
| store <3 x i1> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_i1_vcc: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_i1_vcc: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_i1_vcc: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_i1_vcc: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: freeze_i1_vcc: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: flat_load_dword v0, v[0:1] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX8-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: freeze_i1_vcc: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX9-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-LABEL: freeze_i1_vcc: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-NEXT: global_load_dword v0, v[0:1], off |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: freeze_i1_vcc: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: global_load_b32 v0, v[0:1], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %a = load i32, ptr addrspace(1) %ptra |
| %cmp = icmp eq i32 %a, 0 |
| %freeze = freeze i1 %cmp |
| store i1 %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v2i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v2i1_vcc: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v2i1_vcc: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v2i1_vcc: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v2i1_vcc: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v2i1_vcc: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v2i1_vcc: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v2i1_vcc: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v2i1_vcc: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v2i1_vcc: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_b64 v[4:5], v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 1, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 3 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v2i1_vcc: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v2i1_vcc: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 |
| ; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <2 x i32>, ptr addrspace(1) %ptra |
| %cmp = icmp eq <2 x i32> %a, zeroinitializer |
| %freeze = freeze <2 x i1> %cmp |
| store <2 x i1> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v3i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v3i1_vcc: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v3i1_vcc: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v3i1_vcc: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v3i1_vcc: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v3i1_vcc: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx3 v[4:6], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v3i1_vcc: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx3 v[4:6], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v3i1_vcc: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx3 v[4:6], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 2, v4 |
| ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v3i1_vcc: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx3 v[4:6], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v3i1_vcc: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_b96 v[4:6], v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 1, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 2, v4.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h |
| ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 7 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v3i1_vcc: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_b96 v[4:6], v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 2, v4 |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v3i1_vcc: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b96 v[4:6], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 2, v4 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 |
| ; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <3 x i32>, ptr addrspace(1) %ptra |
| %cmp = icmp eq <3 x i32> %a, zeroinitializer |
| %freeze = freeze <3 x i1> %cmp |
| store <3 x i1> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| |
| define void @freeze_v4i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { |
| ; GFX6-SDAG-LABEL: freeze_v4i1_vcc: |
| ; GFX6-SDAG: ; %bb.0: |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| ; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5 |
| ; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX6-GISEL-LABEL: freeze_v4i1_vcc: |
| ; GFX6-GISEL: ; %bb.0: |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5 |
| ; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-SDAG-LABEL: freeze_v4i1_vcc: |
| ; GFX7-SDAG: ; %bb.0: |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 |
| ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 |
| ; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| ; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5 |
| ; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX7-GISEL-LABEL: freeze_v4i1_vcc: |
| ; GFX7-GISEL: ; %bb.0: |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 |
| ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5 |
| ; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 |
| ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-GISEL-LABEL: freeze_v4i1_vcc: |
| ; GFX8-GISEL: ; %bb.0: |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5 |
| ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 |
| ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-GISEL-LABEL: freeze_v4i1_vcc: |
| ; GFX9-GISEL: ; %bb.0: |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5 |
| ; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-SDAG-LABEL: freeze_v4i1_vcc: |
| ; GFX10-SDAG: ; %bb.0: |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v4, 2, v4 |
| ; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 3, v1 |
| ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX10-SDAG-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10-GISEL-LABEL: freeze_v4i1_vcc: |
| ; GFX10-GISEL: ; %bb.0: |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off |
| ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v4, 2, v4 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 1, v5 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX10-GISEL-NEXT: v_lshlrev_b16 v1, 3, v1 |
| ; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX10-GISEL-NEXT: global_store_byte v[2:3], v0, off |
| ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-TRUE16-LABEL: freeze_v4i1_vcc: |
| ; GFX11-SDAG-TRUE16: ; %bb.0: |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.l, 1, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v1.l, v0.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 2, v4.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h |
| ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 3, v1.l |
| ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h |
| ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, v0.l, 15 |
| ; GFX11-SDAG-TRUE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-SDAG-FAKE16-LABEL: freeze_v4i1_vcc: |
| ; GFX11-SDAG-FAKE16: ; %bb.0: |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v4, 2, v4 |
| ; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 3, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX11-SDAG-FAKE16-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-GISEL-LABEL: freeze_v4i1_vcc: |
| ; GFX11-GISEL: ; %bb.0: |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off |
| ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v0, 1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 |
| ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v4, 2, v4 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 1, v5 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX11-GISEL-NEXT: v_lshlrev_b16 v1, 3, v1 |
| ; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 |
| ; GFX11-GISEL-NEXT: global_store_b8 v[2:3], v0, off |
| ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] |
| %a = load <4 x i32>, ptr addrspace(1) %ptra |
| %cmp = icmp eq <4 x i32> %a, zeroinitializer |
| %freeze = freeze <4 x i1> %cmp |
| store <4 x i1> %freeze, ptr addrspace(1) %ptrb |
| ret void |
| } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX8-SDAG: {{.*}} |