| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=aarch64-linux-gnu -run-pass=aarch64-condopt -verify-machineinstrs %s -o - | FileCheck %s |
| # RUN: llc -mtriple=aarch64-linux-gnu -passes=aarch64-condopt %s -o - | FileCheck %s |
| |
| --- |
| # Three CMP+conditional pairs in sequence. All three are chained: each pair is |
| # adjusted to converge on the same immediate, leaving identical CMPs for CSE. |
| # |
| # Input: CMP #9 GT; CMP #10 GT; CMP #11 GT |
| # Output: CMP #10 GE; CMP #11 GE; CMP #11 GT |
| name: three_pair_chain |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $w0, $w1, $w2, $x3, $x4, $x5 |
| |
| ; CHECK-LABEL: name: three_pair_chain |
| ; CHECK: liveins: $w0, $w1, $w2, $x3, $x4, $x5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 |
| ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 10, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 11, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 10, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri2:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 11, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr2:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: STRWui killed [[CSINCWr]], [[COPY3]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr1]], [[COPY4]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr2]], [[COPY5]], 0 |
| ; CHECK-NEXT: RET_ReallyLR |
| %0:gpr32common = COPY $w0 |
| %1:gpr32 = COPY $w1 |
| %2:gpr32 = COPY $w2 |
| %3:gpr64common = COPY $x3 |
| %4:gpr64common = COPY $x4 |
| %5:gpr64common = COPY $x5 |
| |
| %6:gpr32 = SUBSWri %0, 9, 0, implicit-def $nzcv |
| %7:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| %8:gpr32 = SUBSWri %0, 10, 0, implicit-def $nzcv |
| %9:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| %10:gpr32 = SUBSWri %0, 11, 0, implicit-def $nzcv |
| %11:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| STRWui killed %7, %3, 0 |
| STRWui killed %9, %4, 0 |
| STRWui killed %11, %5, 0 |
| RET_ReallyLR |
| |
| ... |
| --- |
| # Two conditionals reading the same CMP's flags cannot both be adjusted safely; |
| # the pair is discarded. The subsequent independent pair has no prior match, |
| # so it is also left unchanged. |
| name: second_consumer_invalidates |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $w0, $w1, $w2, $x3, $x4, $x5 |
| |
| ; CHECK-LABEL: name: second_consumer_invalidates |
| ; CHECK: liveins: $w0, $w1, $w2, $x3, $x4, $x5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 |
| ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 9, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr2:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: STRWui killed [[CSINCWr]], [[COPY3]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr1]], [[COPY4]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr2]], [[COPY5]], 0 |
| ; CHECK-NEXT: RET_ReallyLR |
| %0:gpr32common = COPY $w0 |
| %1:gpr32 = COPY $w1 |
| %2:gpr32 = COPY $w2 |
| %3:gpr64common = COPY $x3 |
| %4:gpr64common = COPY $x4 |
| %5:gpr64common = COPY $x5 |
| |
| ; CMP feeds two conditionals — the pair is invalidated. |
| %6:gpr32 = SUBSWri %0, 9, 0, implicit-def $nzcv |
| %7:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| %8:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| ; Independent pair: no prior match, so unchanged. |
| %9:gpr32 = SUBSWri %0, 10, 0, implicit-def $nzcv |
| %10:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| STRWui killed %7, %3, 0 |
| STRWui killed %8, %4, 0 |
| STRWui killed %10, %5, 0 |
| RET_ReallyLR |
| |
| ... |
| --- |
| # A non-CMP NZCV clobber between two pairs resets the search state. The pair |
| # before the clobber is optimised; the pair after is not, even though it would |
| # otherwise match. |
| name: nzcv_clobber_resets_state |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $w0, $w1, $w2, $x3, $x4, $x5, $x6 |
| |
| ; CHECK-LABEL: name: nzcv_clobber_resets_state |
| ; CHECK: liveins: $w0, $w1, $w2, $x3, $x4, $x5, $x6 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 |
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64common = COPY $x6 |
| ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 10, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri2:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 5, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[SUBSWri3:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr2:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: STRWui killed [[CSINCWr]], [[COPY3]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr1]], [[COPY4]], 0 |
| ; CHECK-NEXT: STRWui killed [[SUBSWri2]], [[COPY5]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr2]], [[COPY6]], 0 |
| ; CHECK-NEXT: RET_ReallyLR |
| %0:gpr32common = COPY $w0 |
| %1:gpr32 = COPY $w1 |
| %2:gpr32 = COPY $w2 |
| %3:gpr64common = COPY $x3 |
| %4:gpr64common = COPY $x4 |
| %5:gpr64common = COPY $x5 |
| %6:gpr64common = COPY $x6 |
| |
| ; First pair. |
| %7:gpr32 = SUBSWri %0, 9, 0, implicit-def $nzcv |
| %8:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| ; Second pair: optimised against the first. |
| %9:gpr32 = SUBSWri %0, 10, 0, implicit-def $nzcv |
| %10:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| ; Non-CMP NZCV clobber: live destination means it cannot be treated as a |
| ; pure compare, so it resets the search state. |
| %11:gpr32 = SUBSWri %0, 5, 0, implicit-def $nzcv |
| |
| ; Third pair: state was reset by the clobber, so no optimisation. |
| %12:gpr32 = SUBSWri %0, 10, 0, implicit-def $nzcv |
| %13:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| STRWui killed %8, %3, 0 |
| STRWui killed %10, %4, 0 |
| STRWui killed %11, %5, 0 |
| STRWui killed %13, %6, 0 |
| RET_ReallyLR |
| |
| ... |
| --- |
| # After a pair is optimised, the stored CC must reflect the adjusted state. |
| # B (#9 GT) is adjusted to (#10 GE) by the A-B pair. If the map retains the |
| # pre-adjustment CC (GT), the B-C pair incorrectly fires and corrupts B. |
| # C (#11 GT) must be left unchanged. |
| # |
| # Input: CMP #10 GT; CMP #9 GT; CMP #11 GT |
| # Output: CMP #10 GT; CMP #10 GE; CMP #11 GT |
| name: stale_cc_chain |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $w0, $w1, $w2, $x3, $x4, $x5 |
| |
| ; CHECK-LABEL: name: stale_cc_chain |
| ; CHECK: liveins: $w0, $w1, $w2, $x3, $x4, $x5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3 |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4 |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x5 |
| ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 10, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri2:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 11, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr2:%[0-9]+]]:gpr32 = CSINCWr [[COPY1]], [[COPY2]], 12, implicit $nzcv |
| ; CHECK-NEXT: STRWui killed [[CSINCWr]], [[COPY3]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr1]], [[COPY4]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr2]], [[COPY5]], 0 |
| ; CHECK-NEXT: RET_ReallyLR |
| %0:gpr32common = COPY $w0 |
| %1:gpr32 = COPY $w1 |
| %2:gpr32 = COPY $w2 |
| %3:gpr64common = COPY $x3 |
| %4:gpr64common = COPY $x4 |
| %5:gpr64common = COPY $x5 |
| |
| ; A: #10 GT — becomes the target of the A-B optimisation, unchanged. |
| %6:gpr32 = SUBSWri %0, 10, 0, implicit-def $nzcv |
| %7:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| ; B: #9 GT — adjusted to #10 GE by the A-B pair. |
| %8:gpr32 = SUBSWri %0, 9, 0, implicit-def $nzcv |
| %9:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| ; C: #11 GT — no valid pair with adjusted B (GE), left unchanged. |
| %10:gpr32 = SUBSWri %0, 11, 0, implicit-def $nzcv |
| %11:gpr32 = CSINCWr %1, %2, 12, implicit $nzcv |
| |
| STRWui killed %7, %3, 0 |
| STRWui killed %9, %4, 0 |
| STRWui killed %11, %5, 0 |
| RET_ReallyLR |
| |
| ... |
| --- |
| # Two CMP+conditional pairs where the CMPs compare the same value through |
| # different virtual registers. Without lookThruCopyLike the pairs have |
| # different keys and the optimisation silently does not fire. |
| # |
| # Input: CMP %0 #9 GT; CMP %1 #10 GT (%1 = COPY %0) |
| # Output: CMP %0 #10 GE; CMP %1 #10 GT |
| name: copy_traced_registers |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $w0, $w1, $w2, $x3, $x4 |
| |
| ; CHECK-LABEL: name: copy_traced_registers |
| ; CHECK: liveins: $w0, $w1, $w2, $x3, $x4 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32common = COPY [[COPY]] |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w1 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w2 |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x3 |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64common = COPY $x4 |
| ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY2]], [[COPY3]], 10, implicit $nzcv |
| ; CHECK-NEXT: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 10, 0, implicit-def $nzcv |
| ; CHECK-NEXT: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr [[COPY2]], [[COPY3]], 12, implicit $nzcv |
| ; CHECK-NEXT: STRWui killed [[CSINCWr]], [[COPY4]], 0 |
| ; CHECK-NEXT: STRWui killed [[CSINCWr1]], [[COPY5]], 0 |
| ; CHECK-NEXT: RET_ReallyLR |
| %0:gpr32common = COPY $w0 |
| %1:gpr32common = COPY %0 |
| %2:gpr32 = COPY $w1 |
| %3:gpr32 = COPY $w2 |
| %4:gpr64common = COPY $x3 |
| %5:gpr64common = COPY $x4 |
| |
| ; First pair: CMP uses %0 directly. |
| %6:gpr32 = SUBSWri %0, 9, 0, implicit-def $nzcv |
| %7:gpr32 = CSINCWr %2, %3, 12, implicit $nzcv |
| |
| ; Second pair: CMP uses %1 (= COPY %0, same value). |
| %8:gpr32 = SUBSWri %1, 10, 0, implicit-def $nzcv |
| %9:gpr32 = CSINCWr %2, %3, 12, implicit $nzcv |
| |
| STRWui killed %7, %4, 0 |
| STRWui killed %9, %5, 0 |
| RET_ReallyLR |
| |
| ... |