| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s |
| |
| ; Testing codegen for memcpy with vector operands for all combinations of the following parameters: |
| ; destination address space: 0, 1, 3, 5 |
| ; source address space: 0, 1, 3, 4, 5 |
| ; alignment: 1, 2, 8, 16 |
| ; sizes: 16, 31, 32 |
| |
| |
| define void @memcpy_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30 |
| ; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28 |
| ; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30 |
| ; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28 |
| ; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p0_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30 |
| ; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28 |
| ; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30 |
| ; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28 |
| ; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p1_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 |
| ; CHECK-NEXT: ds_read_b32 v8, v2 offset:24 |
| ; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 |
| ; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(4) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 |
| ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 |
| ; CHECK-NEXT: ds_read_b32 v8, v2 offset:24 |
| ; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 |
| ; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(4) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 |
| ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15 |
| ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 |
| ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p3_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 |
| ; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24 |
| ; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28 |
| ; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 |
| ; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24 |
| ; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28 |
| ; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 |
| ; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p4_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x8 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30 |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x8 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30 |
| ; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p0_p5_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23 |
| ; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23 |
| ; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p0_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p1_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b64 v[7:8], v2 |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8 |
| ; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b64 v[7:8], v2 |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8 |
| ; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1 |
| ; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v2 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p3_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[3:6], v2 |
| ; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8 |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8 |
| ; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p4_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off |
| ; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p1_p5_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23 |
| ; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23 |
| ; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[1:4] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p0_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[1:4] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p1_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23 |
| ; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16 |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3 |
| ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23 |
| ; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16 |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3 |
| ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3 |
| ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[1:4], v1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[1:4] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:15 |
| ; CHECK-NEXT: ds_read_b128 v[6:9], v1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[6:9] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p3_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:16 |
| ; CHECK-NEXT: ds_read_b128 v[6:9], v1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[6:9] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x2 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[1:4] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p4_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: ds_write_b128 v0, v[3:6] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[2:5] |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write_b128 v0, v[2:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p3_p5_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: ds_write_b128 v0, v[2:5] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30 |
| ; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28 |
| ; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) |
| ; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) |
| ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30 |
| ; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28 |
| ; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) |
| ; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) |
| ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p0_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 |
| ; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 |
| ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 |
| ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p1_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b32 v8, v1 offset:24 |
| ; CHECK-NEXT: ds_read_u16 v9, v1 offset:28 |
| ; CHECK-NEXT: ds_read_u8 v10, v1 offset:30 |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b32 v8, v1 offset:24 |
| ; CHECK-NEXT: ds_read_u16 v9, v1 offset:28 |
| ; CHECK-NEXT: ds_read_u8 v10, v1 offset:30 |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(3) |
| ; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(2) |
| ; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 |
| ; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[1:4], v1 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v1 |
| ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p3_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: ds_read_b128 v[2:5], v1 |
| ; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:16 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 |
| ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 |
| ; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 |
| ; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p4_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x1 |
| ; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off |
| ; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz16_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x8 |
| ; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(8) |
| ; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz32_align_1_1: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz16_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x8 |
| ; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_waitcnt vmcnt(8) |
| ; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz32_align_2_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz16_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz31_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz32_align_8_8: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz16_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x3 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz31_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) |
| ret void |
| } |
| |
| define void @memcpy_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { |
| ; CHECK-LABEL: memcpy_p5_p5_sz32_align_16_16: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen |
| ; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_waitcnt vmcnt(7) |
| ; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16 |
| ; CHECK-NEXT: s_waitcnt vmcnt(6) |
| ; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20 |
| ; CHECK-NEXT: s_waitcnt vmcnt(5) |
| ; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24 |
| ; CHECK-NEXT: s_waitcnt vmcnt(4) |
| ; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28 |
| ; CHECK-NEXT: s_waitcnt vmcnt(3) |
| ; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 |
| ; CHECK-NEXT: s_waitcnt vmcnt(1) |
| ; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) |
| ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) |
| ret void |
| } |
| |
| declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 |
| declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 |
| |
| attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } |
| |