| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX12 |
| |
| --- |
| name: merge_s_buffer_load_x2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x2 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %3:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY %3.sub0 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed %3.sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x2 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_BUFFER_LOAD_DWORDX2_IMM]].sub0 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX2_IMM]].sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x1_x2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2 |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s64)) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x2_x1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x2_x1 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64)) |
| ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32)) |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x2_x1 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x4 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x4 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %7:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY %7.sub0_sub1 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY killed %7.sub2_sub3 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY1]].sub0 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub1 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x4 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub2 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) |
| %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x1_x3 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3 |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x3_x1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x3_x1 |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128)) |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0 |
| ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1 |
| ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY9]].sub0 |
| ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub1 |
| ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY10]].sub0 |
| ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY10]].sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2 |
| ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub0 |
| ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub1 |
| ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2 |
| ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3 |
| ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2 |
| ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub0 |
| ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) |
| %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) |
| %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) |
| %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) |
| %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s32)) |
| %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 28, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_reordered |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_reordered |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub1 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub0 |
| ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY7]].sub1 |
| ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY7]].sub0 |
| ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub1 |
| ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub0 |
| ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY8]].sub1 |
| ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY8]].sub0 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_reordered |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2 |
| ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub1 |
| ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub0 |
| ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2 |
| ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3 |
| ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2 |
| ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub1 |
| ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub0 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) |
| %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 28, 0 :: (dereferenceable invariant load (s32)) |
| %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) |
| %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) |
| %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) |
| %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) |
| %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) |
| %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) |
| %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x4 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) |
| %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_mixed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) |
| %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_sgpr_imm |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; GFX10-NEXT: early-clobber %8:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4) |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8.sub0_sub1 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY killed %8.sub2_sub3 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4) |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1_sub2 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub3 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub2 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0 |
| ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32 = COPY $sgpr4 |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) |
| %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s32)) |
| %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: no_merge_for_different_soffsets |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 |
| |
| ; CHECK-LABEL: name: no_merge_for_different_soffsets |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr5 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY2]], 8, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32 = COPY $sgpr4 |
| %2:sreg_32 = COPY $sgpr5 |
| %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %2:sreg_32, 8, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: no_merge_for_non_adjacent_offsets |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| |
| ; CHECK-LABEL: name: no_merge_for_non_adjacent_offsets |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 12, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32 = COPY $sgpr4 |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) |
| %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| |
| # The constrained multi-dword buffer load merge tests. |
| |
| --- |
| name: merge_s_buffer_load_x1_x2ec |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2ec |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s64)) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x2ec_x1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x2ec_x1 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64)) |
| ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32)) |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x2ec_x1 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x1_x3ec |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3ec |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) |
| ; CHECK-NEXT: early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16) |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) |
| early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x3ec_x1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x3ec_x1 |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128)) |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x2ec_reordered |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) |
| early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) |
| early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) |
| early-clobber %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x2ec_x2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) |
| early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) |
| %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) |
| %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x4ec |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) |
| early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x4ec_x4 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4 |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4 |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) |
| %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_out_of_x4_x4ec |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) |
| early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x8_mixed_including_ec_opcodes |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3 |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 |
| ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 |
| ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 |
| ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) |
| %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) |
| early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) |
| %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| |
| ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec |
| ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| ; GFX10-NEXT: {{ $}} |
| ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; GFX10-NEXT: early-clobber %4:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8) |
| ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY %4.sub0_sub1 |
| ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed %4.sub2_sub3 |
| ; GFX10-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec |
| ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8) |
| ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1 |
| ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32 = COPY $sgpr4 |
| early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64)) |
| early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| |
| # No constrained opcode required when the MEM operand has met the required alignment. |
| |
| --- |
| |
| name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128)) |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub2_sub3 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64), align 16) |
| %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256)) |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128), align 32) |
| %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) |
| |
| S_ENDPGM 0 |
| ... |
| --- |
| |
| name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| |
| ; CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed |
| ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 |
| ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128)) |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3 |
| ; CHECK-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 |
| %1:sreg_32 = COPY $sgpr4 |
| %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64), align 16) |
| %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64)) |
| |
| S_ENDPGM 0 |
| ... |