| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s |
| |
| --- | |
| define float @reassoicate_some_inputs_in_different_block(ptr %a, i1 %c) { |
| ret float undef |
| } |
| |
| define float @reassoicate_candidates_in_different_blocks(ptr %a, i1 %c) { |
| ret float undef |
| } |
| |
| define float @reassoicate_candidates_in_different_blocks_no_sink(ptr %a, i1 %c) { |
| ret float undef |
| } |
| |
| define float @no_reassociate_different_block(ptr %a, i1 %c) { |
| ret float undef |
| } |
| |
| declare void @use() |
| |
| |
| ... |
| # FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism. |
| --- |
| name: reassoicate_some_inputs_in_different_block |
| alignment: 4 |
| tracksRegLiveness: true |
| body: | |
| ; CHECK-LABEL: name: reassoicate_some_inputs_in_different_block |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $x0, $w1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 |
| ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) |
| ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 |
| ; CHECK-NEXT: B %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], [[LDRQui1]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 killed [[FADDv4f32_1]], killed [[FADDv4f32_]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] |
| ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr |
| ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] |
| ; CHECK-NEXT: RET_ReallyLR implicit $s0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: $q0 = COPY [[LDRQui]] |
| ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] |
| ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] |
| ; CHECK-NEXT: $q3 = COPY [[LDRQui3]] |
| ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $x0, $w1 |
| |
| %5:gpr32 = COPY $w1 |
| %4:gpr64common = COPY $x0 |
| %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) |
| %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) |
| %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) |
| %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) |
| TBZW %5, 0, %bb.2 |
| B %bb.1 |
| |
| bb.1: |
| %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr |
| %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr |
| %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr |
| %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr |
| %10:gpr64all = COPY %9.dsub |
| %12:fpr64 = COPY %10 |
| %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr |
| $s0 = COPY %11 |
| RET_ReallyLR implicit $s0 |
| |
| bb.2: |
| $q0 = COPY %0 |
| $q1 = COPY %2 |
| $q2 = COPY %1 |
| $q3 = COPY %3 |
| TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| |
| ... |
| # Variation of reassoicate_some_inputs_in_different_block where the candidate |
| # instructions are split across 2 blocks. |
| --- |
| name: reassoicate_candidates_in_different_blocks |
| alignment: 4 |
| tracksRegLiveness: true |
| body: | |
| ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $x0, $w1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 |
| ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr |
| ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 |
| ; CHECK-NEXT: B %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] |
| ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr |
| ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] |
| ; CHECK-NEXT: RET_ReallyLR implicit $s0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: $q0 = COPY [[LDRQui]] |
| ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] |
| ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] |
| ; CHECK-NEXT: $q3 = COPY [[LDRQui3]] |
| ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $x0, $w1 |
| |
| %5:gpr32 = COPY $w1 |
| %4:gpr64common = COPY $x0 |
| %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) |
| %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) |
| %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) |
| %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) |
| %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr |
| %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr |
| TBZW %5, 0, %bb.2 |
| B %bb.1 |
| |
| bb.1: |
| %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr |
| %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr |
| %10:gpr64all = COPY %9.dsub |
| %12:fpr64 = COPY %10 |
| %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr |
| $s0 = COPY %11 |
| RET_ReallyLR implicit $s0 |
| |
| bb.2: |
| $q0 = COPY %0 |
| $q1 = COPY %2 |
| $q2 = COPY %1 |
| $q3 = COPY %3 |
| TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| |
| ... |
| |
| --- |
| name: reassoicate_candidates_in_different_blocks_no_sink |
| alignment: 4 |
| tracksRegLiveness: true |
| body: | |
| ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks_no_sink |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $x0, $w1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 |
| ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr |
| ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 |
| ; CHECK-NEXT: B %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] |
| ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr |
| ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] |
| ; CHECK-NEXT: RET_ReallyLR implicit $s0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: $q0 = COPY [[LDRQui]] |
| ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] |
| ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] |
| ; CHECK-NEXT: $q3 = COPY [[FADDv4f32_1]] |
| ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $x0, $w1 |
| |
| %5:gpr32 = COPY $w1 |
| %4:gpr64common = COPY $x0 |
| %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) |
| %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) |
| %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) |
| %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4) |
| %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr |
| %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr |
| TBZW %5, 0, %bb.2 |
| B %bb.1 |
| |
| bb.1: |
| %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr |
| %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr |
| %10:gpr64all = COPY %9.dsub |
| %12:fpr64 = COPY %10 |
| %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr |
| $s0 = COPY %11 |
| RET_ReallyLR implicit $s0 |
| |
| bb.2: |
| $q0 = COPY %0 |
| $q1 = COPY %2 |
| $q2 = COPY %1 |
| $q3 = COPY %7 |
| TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| |
| ... |
| |
| # Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a |
| # much larger latency than the other loads. |
| --- |
| name: no_reassociate_different_block |
| alignment: 4 |
| tracksRegLiveness: true |
| body: | |
| ; CHECK-LABEL: name: no_reassociate_different_block |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $x0, $w1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 |
| ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4) |
| ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64)) |
| ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64)) |
| ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4) |
| ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2 |
| ; CHECK-NEXT: B %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr |
| ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] |
| ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr |
| ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]] |
| ; CHECK-NEXT: RET_ReallyLR implicit $s0 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: $q0 = COPY [[LDRQui]] |
| ; CHECK-NEXT: $q1 = COPY [[LDRQui2]] |
| ; CHECK-NEXT: $q2 = COPY [[LDRQui1]] |
| ; CHECK-NEXT: $q3 = COPY [[LDRQui3]] |
| ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $x0, $w1 |
| |
| %5:gpr32 = COPY $w1 |
| %4:gpr64common = COPY $x0 |
| %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4) |
| %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4) |
| %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4) |
| %6:gpr64common = LDRXui %4, 8 :: (load (s64)) |
| %7:gpr64common = LDRXui killed %6, 0 :: (load (s64)) |
| %3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4) |
| TBZW %5, 0, %bb.2 |
| B %bb.1 |
| |
| bb.1: |
| %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr |
| %9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr |
| %10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr |
| %11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr |
| %12:gpr64all = COPY %11.dsub |
| %14:fpr64 = COPY %12 |
| %13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr |
| $s0 = COPY %13 |
| RET_ReallyLR implicit $s0 |
| |
| bb.2: |
| $q0 = COPY %0 |
| $q1 = COPY %2 |
| $q2 = COPY %1 |
| $q3 = COPY %3 |
| TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3 |
| |
| ... |