| # RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown %s -o - | FileCheck %s |
| # RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown -acc-max-width=2 %s -o - | FileCheck %s --check-prefix=NARROW-TREE |
| # RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown -acc-min-depth=100 %s -o - | FileCheck %s --check-prefix=NO-TREE |
| |
| # A chain of UABAL instructions that can be reassociated for better ILP. |
| # Before the optimization, we accumulate in a single long chain. |
| # CHECK-LABEL: name: uabal_accumulation |
| # CHECK: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # CHECK: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # CHECK: [[START3:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # CHECK: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] |
| # CHECK: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]] |
| # CHECK: [[C1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START3]] |
| # CHECK: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] |
| # CHECK: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]] |
| # CHECK: [[C2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[C1]] |
| # CHECK: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[A2]], killed [[B2]] |
| # CHECK: [[TOTAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[PARTIAL_SUM]], killed [[C2]] |
| # CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[TOTAL_SUM]] |
| |
| # NARROW-TREE: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # NARROW-TREE: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # NARROW-TREE: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] |
| # NARROW-TREE: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]] |
| # NARROW-TREE: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] |
| # NARROW-TREE: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]] |
| # NARROW-TREE: [[A3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A2]] |
| # NARROW-TREE: [[B3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B2]] |
| # NARROW-TREE: [[A4:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A3]] |
| # NARROW-TREE: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[B3]], killed [[A4]] |
| # NARROW-TREE: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[PARTIAL_SUM]] |
| |
| # NO-TREE: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # NO-TREE: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] |
| # NO-TREE: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] |
| # NO-TREE: [[A3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A2]] |
| # NO-TREE: [[A4:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A3]] |
| # NO-TREE: [[A5:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A4]] |
| # NO-TREE: [[A6:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A5]] |
| # NO-TREE: [[A7:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A6]] |
| # NO-TREE: [[A8:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A7]] |
| # NO-TREE: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[A8]] |
| |
| --- |
| name: uabal_accumulation |
| body: | |
| bb.0.entry: |
| liveins: $x0, $x1, $x2, $x3 |
| |
| %3:gpr64 = COPY $x3 |
| %2:gpr64common = COPY $x2 |
| %1:gpr64 = COPY $x1 |
| %0:gpr64common = COPY $x0 |
| %4:fpr64 = LDRDui %0, 0 :: (load (s64)) |
| %5:fpr64 = LDRDui %2, 0 :: (load (s64)) |
| %6:gpr64common = ADDXrr %0, %1 |
| %7:gpr64common = ADDXrr %2, %3 |
| %8:fpr64 = LDRDui %6, 0 :: (load (s64)) |
| %9:fpr64 = LDRDui %7, 0 :: (load (s64)) |
| %10:fpr128 = UABDLv4i16_v4i32 killed %8, killed %9 |
| %11:fpr128 = UABALv4i16_v4i32 killed %10, killed %4, killed %5 |
| %12:gpr64common = ADDXrr %6, %1 |
| %13:gpr64common = ADDXrr %7, %3 |
| %14:fpr64 = LDRDui %12, 0 :: (load (s64)) |
| %15:fpr64 = LDRDui %13, 0 :: (load (s64)) |
| %16:fpr128 = UABALv4i16_v4i32 killed %11, killed %14, killed %15 |
| %17:gpr64common = ADDXrr %12, %1 |
| %18:gpr64common = ADDXrr %13, %3 |
| %19:fpr64 = LDRDui %17, 0 :: (load (s64)) |
| %20:fpr64 = LDRDui %18, 0 :: (load (s64)) |
| %21:fpr128 = UABALv4i16_v4i32 killed %16, killed %19, killed %20 |
| %22:gpr64common = ADDXrr %17, %1 |
| %23:gpr64common = ADDXrr %18, %3 |
| %24:fpr64 = LDRDui %22, 0 :: (load (s64)) |
| %25:fpr64 = LDRDui %23, 0 :: (load (s64)) |
| %26:fpr128 = UABALv4i16_v4i32 killed %21, killed %24, killed %25 |
| %27:gpr64common = ADDXrr %22, %1 |
| %28:gpr64common = ADDXrr %23, %3 |
| %29:fpr64 = LDRDui %27, 0 :: (load (s64)) |
| %30:fpr64 = LDRDui %28, 0 :: (load (s64)) |
| %31:fpr128 = UABALv4i16_v4i32 killed %26, killed %29, killed %30 |
| %32:gpr64common = ADDXrr %27, %1 |
| %33:gpr64common = ADDXrr %28, %3 |
| %34:fpr64 = LDRDui %32, 0 :: (load (s64)) |
| %35:fpr64 = LDRDui %33, 0 :: (load (s64)) |
| %36:fpr128 = UABALv4i16_v4i32 killed %31, killed %34, killed %35 |
| %37:gpr64common = ADDXrr %32, %1 |
| %38:gpr64common = ADDXrr %33, %3 |
| %39:fpr64 = LDRDui %37, 0 :: (load (s64)) |
| %40:fpr64 = LDRDui %38, 0 :: (load (s64)) |
| %41:fpr128 = UABALv4i16_v4i32 killed %36, killed %39, killed %40 |
| %42:gpr64common = ADDXrr %37, %1 |
| %43:gpr64common = ADDXrr %38, %3 |
| %44:fpr64 = LDRDui %42, 0 :: (load (s64)) |
| %45:fpr64 = LDRDui %43, 0 :: (load (s64)) |
| %46:fpr128 = UABALv4i16_v4i32 killed %41, killed %44, killed %45 |
| %47:fpr32 = ADDVv4i32v killed %46 |
| %48:fpr128 = IMPLICIT_DEF |
| %49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub |
| %50:gpr32all = COPY %49.ssub |
| $w0 = COPY %50 |
| RET_ReallyLR implicit $w0 |
| ... |
| |
| # In this test case we don't perform the reassociation because we don't recognize the |
| # instruction at the top of the chain. |
| # CHECK-LABEL: name: uabal_accumulation_with_different_start |
| # CHECK: [[START1:%.*]]:fpr128 = ADDv4i32 |
| # CHECK: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # CHECK: [[START3:%.*]]:fpr128 = UABDLv4i16_v4i32 |
| # CHECK: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] |
| # CHECK: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]] |
| # CHECK: [[C1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START3]] |
| # CHECK: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] |
| # CHECK: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]] |
| # CHECK: [[C2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[C1]] |
| # CHECK: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[A2]], killed [[B2]] |
| # CHECK: [[TOTAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[PARTIAL_SUM]], killed [[C2]] |
| # CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[TOTAL_SUM]] |
| |
| --- |
| name: uabal_accumulation_with_different_start |
| body: | |
| bb.0.entry: |
| liveins: $x0, $x1, $x2, $x3 |
| |
| %3:gpr64 = COPY $x3 |
| %2:gpr64common = COPY $x2 |
| %1:gpr64 = COPY $x1 |
| %0:gpr64common = COPY $x0 |
| %4:fpr64 = LDRDui %0, 0 :: (load (s64)) |
| %5:fpr64 = LDRDui %2, 0 :: (load (s64)) |
| %6:gpr64common = ADDXrr %0, %1 |
| %7:gpr64common = ADDXrr %2, %3 |
| %8:fpr128 = LDRQui %6, 0 :: (load (s128)) |
| %9:fpr128 = LDRQui %7, 0 :: (load (s128)) |
| %10:fpr128 = ADDv4i32 killed %8, killed %9 |
| %11:fpr128 = UABALv4i16_v4i32 killed %10, killed %4, killed %5 |
| %12:gpr64common = ADDXrr %6, %1 |
| %13:gpr64common = ADDXrr %7, %3 |
| %14:fpr64 = LDRDui %12, 0 :: (load (s64)) |
| %15:fpr64 = LDRDui %13, 0 :: (load (s64)) |
| %16:fpr128 = UABALv4i16_v4i32 killed %11, killed %14, killed %15 |
| %17:gpr64common = ADDXrr %12, %1 |
| %18:gpr64common = ADDXrr %13, %3 |
| %19:fpr64 = LDRDui %17, 0 :: (load (s64)) |
| %20:fpr64 = LDRDui %18, 0 :: (load (s64)) |
| %21:fpr128 = UABALv4i16_v4i32 killed %16, killed %19, killed %20 |
| %22:gpr64common = ADDXrr %17, %1 |
| %23:gpr64common = ADDXrr %18, %3 |
| %24:fpr64 = LDRDui %22, 0 :: (load (s64)) |
| %25:fpr64 = LDRDui %23, 0 :: (load (s64)) |
| %26:fpr128 = UABALv4i16_v4i32 killed %21, killed %24, killed %25 |
| %27:gpr64common = ADDXrr %22, %1 |
| %28:gpr64common = ADDXrr %23, %3 |
| %29:fpr64 = LDRDui %27, 0 :: (load (s64)) |
| %30:fpr64 = LDRDui %28, 0 :: (load (s64)) |
| %31:fpr128 = UABALv4i16_v4i32 killed %26, killed %29, killed %30 |
| %32:gpr64common = ADDXrr %27, %1 |
| %33:gpr64common = ADDXrr %28, %3 |
| %34:fpr64 = LDRDui %32, 0 :: (load (s64)) |
| %35:fpr64 = LDRDui %33, 0 :: (load (s64)) |
| %36:fpr128 = UABALv4i16_v4i32 killed %31, killed %34, killed %35 |
| %37:gpr64common = ADDXrr %32, %1 |
| %38:gpr64common = ADDXrr %33, %3 |
| %39:fpr64 = LDRDui %37, 0 :: (load (s64)) |
| %40:fpr64 = LDRDui %38, 0 :: (load (s64)) |
| %41:fpr128 = UABALv4i16_v4i32 killed %36, killed %39, killed %40 |
| %42:gpr64common = ADDXrr %37, %1 |
| %43:gpr64common = ADDXrr %38, %3 |
| %44:fpr64 = LDRDui %42, 0 :: (load (s64)) |
| %45:fpr64 = LDRDui %43, 0 :: (load (s64)) |
| %46:fpr128 = UABALv4i16_v4i32 killed %41, killed %44, killed %45 |
| %47:fpr32 = ADDVv4i32v killed %46 |
| %48:fpr128 = IMPLICIT_DEF |
| %49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub |
| %50:gpr32all = COPY %49.ssub |
| $w0 = COPY %50 |
| RET_ReallyLR implicit $w0 |
| |
| ... |