blob: 888c4027bc2bcc250fe0966b4bd1decbd82fa2e0 [file] [log] [blame]
# RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown -acc-max-width=2 %s -o - | FileCheck %s --check-prefix=NARROW-TREE
# RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown -acc-min-depth=100 %s -o - | FileCheck %s --check-prefix=NO-TREE
# A chain of UABAL instructions that can be reassociated for better ILP.
# Before the optimization, we accumulate in a single long chain.
# CHECK-LABEL: name: uabal_accumulation
# CHECK: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32
# CHECK: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32
# CHECK: [[START3:%.*]]:fpr128 = UABDLv4i16_v4i32
# CHECK: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]]
# CHECK: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]]
# CHECK: [[C1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START3]]
# CHECK: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]]
# CHECK: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]]
# CHECK: [[C2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[C1]]
# CHECK: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[A2]], killed [[B2]]
# CHECK: [[TOTAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[PARTIAL_SUM]], killed [[C2]]
# CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[TOTAL_SUM]]
# NARROW-TREE: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32
# NARROW-TREE: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32
# NARROW-TREE: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]]
# NARROW-TREE: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]]
# NARROW-TREE: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]]
# NARROW-TREE: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]]
# NARROW-TREE: [[A3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A2]]
# NARROW-TREE: [[B3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B2]]
# NARROW-TREE: [[A4:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A3]]
# NARROW-TREE: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[B3]], killed [[A4]]
# NARROW-TREE: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[PARTIAL_SUM]]
# NO-TREE: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32
# NO-TREE: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]]
# NO-TREE: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]]
# NO-TREE: [[A3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A2]]
# NO-TREE: [[A4:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A3]]
# NO-TREE: [[A5:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A4]]
# NO-TREE: [[A6:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A5]]
# NO-TREE: [[A7:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A6]]
# NO-TREE: [[A8:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A7]]
# NO-TREE: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[A8]]
---
name: uabal_accumulation
body: |
bb.0.entry:
liveins: $x0, $x1, $x2, $x3
%3:gpr64 = COPY $x3
%2:gpr64common = COPY $x2
%1:gpr64 = COPY $x1
%0:gpr64common = COPY $x0
%4:fpr64 = LDRDui %0, 0 :: (load (s64))
%5:fpr64 = LDRDui %2, 0 :: (load (s64))
%6:gpr64common = ADDXrr %0, %1
%7:gpr64common = ADDXrr %2, %3
%8:fpr64 = LDRDui %6, 0 :: (load (s64))
%9:fpr64 = LDRDui %7, 0 :: (load (s64))
%10:fpr128 = UABDLv4i16_v4i32 killed %8, killed %9
%11:fpr128 = UABALv4i16_v4i32 killed %10, killed %4, killed %5
%12:gpr64common = ADDXrr %6, %1
%13:gpr64common = ADDXrr %7, %3
%14:fpr64 = LDRDui %12, 0 :: (load (s64))
%15:fpr64 = LDRDui %13, 0 :: (load (s64))
%16:fpr128 = UABALv4i16_v4i32 killed %11, killed %14, killed %15
%17:gpr64common = ADDXrr %12, %1
%18:gpr64common = ADDXrr %13, %3
%19:fpr64 = LDRDui %17, 0 :: (load (s64))
%20:fpr64 = LDRDui %18, 0 :: (load (s64))
%21:fpr128 = UABALv4i16_v4i32 killed %16, killed %19, killed %20
%22:gpr64common = ADDXrr %17, %1
%23:gpr64common = ADDXrr %18, %3
%24:fpr64 = LDRDui %22, 0 :: (load (s64))
%25:fpr64 = LDRDui %23, 0 :: (load (s64))
%26:fpr128 = UABALv4i16_v4i32 killed %21, killed %24, killed %25
%27:gpr64common = ADDXrr %22, %1
%28:gpr64common = ADDXrr %23, %3
%29:fpr64 = LDRDui %27, 0 :: (load (s64))
%30:fpr64 = LDRDui %28, 0 :: (load (s64))
%31:fpr128 = UABALv4i16_v4i32 killed %26, killed %29, killed %30
%32:gpr64common = ADDXrr %27, %1
%33:gpr64common = ADDXrr %28, %3
%34:fpr64 = LDRDui %32, 0 :: (load (s64))
%35:fpr64 = LDRDui %33, 0 :: (load (s64))
%36:fpr128 = UABALv4i16_v4i32 killed %31, killed %34, killed %35
%37:gpr64common = ADDXrr %32, %1
%38:gpr64common = ADDXrr %33, %3
%39:fpr64 = LDRDui %37, 0 :: (load (s64))
%40:fpr64 = LDRDui %38, 0 :: (load (s64))
%41:fpr128 = UABALv4i16_v4i32 killed %36, killed %39, killed %40
%42:gpr64common = ADDXrr %37, %1
%43:gpr64common = ADDXrr %38, %3
%44:fpr64 = LDRDui %42, 0 :: (load (s64))
%45:fpr64 = LDRDui %43, 0 :: (load (s64))
%46:fpr128 = UABALv4i16_v4i32 killed %41, killed %44, killed %45
%47:fpr32 = ADDVv4i32v killed %46
%48:fpr128 = IMPLICIT_DEF
%49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub
%50:gpr32all = COPY %49.ssub
$w0 = COPY %50
RET_ReallyLR implicit $w0
...
# In this test case we don't perform the reassociation because we don't recognize the
# instruction at the top of the chain.
# CHECK-LABEL: name: uabal_accumulation_with_different_start
# CHECK: [[START1:%.*]]:fpr128 = ADDv4i32
# CHECK: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32
# CHECK: [[START3:%.*]]:fpr128 = UABDLv4i16_v4i32
# CHECK: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]]
# CHECK: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]]
# CHECK: [[C1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START3]]
# CHECK: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]]
# CHECK: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]]
# CHECK: [[C2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[C1]]
# CHECK: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[A2]], killed [[B2]]
# CHECK: [[TOTAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[PARTIAL_SUM]], killed [[C2]]
# CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[TOTAL_SUM]]
---
name: uabal_accumulation_with_different_start
body: |
bb.0.entry:
liveins: $x0, $x1, $x2, $x3
%3:gpr64 = COPY $x3
%2:gpr64common = COPY $x2
%1:gpr64 = COPY $x1
%0:gpr64common = COPY $x0
%4:fpr64 = LDRDui %0, 0 :: (load (s64))
%5:fpr64 = LDRDui %2, 0 :: (load (s64))
%6:gpr64common = ADDXrr %0, %1
%7:gpr64common = ADDXrr %2, %3
%8:fpr128 = LDRQui %6, 0 :: (load (s128))
%9:fpr128 = LDRQui %7, 0 :: (load (s128))
%10:fpr128 = ADDv4i32 killed %8, killed %9
%11:fpr128 = UABALv4i16_v4i32 killed %10, killed %4, killed %5
%12:gpr64common = ADDXrr %6, %1
%13:gpr64common = ADDXrr %7, %3
%14:fpr64 = LDRDui %12, 0 :: (load (s64))
%15:fpr64 = LDRDui %13, 0 :: (load (s64))
%16:fpr128 = UABALv4i16_v4i32 killed %11, killed %14, killed %15
%17:gpr64common = ADDXrr %12, %1
%18:gpr64common = ADDXrr %13, %3
%19:fpr64 = LDRDui %17, 0 :: (load (s64))
%20:fpr64 = LDRDui %18, 0 :: (load (s64))
%21:fpr128 = UABALv4i16_v4i32 killed %16, killed %19, killed %20
%22:gpr64common = ADDXrr %17, %1
%23:gpr64common = ADDXrr %18, %3
%24:fpr64 = LDRDui %22, 0 :: (load (s64))
%25:fpr64 = LDRDui %23, 0 :: (load (s64))
%26:fpr128 = UABALv4i16_v4i32 killed %21, killed %24, killed %25
%27:gpr64common = ADDXrr %22, %1
%28:gpr64common = ADDXrr %23, %3
%29:fpr64 = LDRDui %27, 0 :: (load (s64))
%30:fpr64 = LDRDui %28, 0 :: (load (s64))
%31:fpr128 = UABALv4i16_v4i32 killed %26, killed %29, killed %30
%32:gpr64common = ADDXrr %27, %1
%33:gpr64common = ADDXrr %28, %3
%34:fpr64 = LDRDui %32, 0 :: (load (s64))
%35:fpr64 = LDRDui %33, 0 :: (load (s64))
%36:fpr128 = UABALv4i16_v4i32 killed %31, killed %34, killed %35
%37:gpr64common = ADDXrr %32, %1
%38:gpr64common = ADDXrr %33, %3
%39:fpr64 = LDRDui %37, 0 :: (load (s64))
%40:fpr64 = LDRDui %38, 0 :: (load (s64))
%41:fpr128 = UABALv4i16_v4i32 killed %36, killed %39, killed %40
%42:gpr64common = ADDXrr %37, %1
%43:gpr64common = ADDXrr %38, %3
%44:fpr64 = LDRDui %42, 0 :: (load (s64))
%45:fpr64 = LDRDui %43, 0 :: (load (s64))
%46:fpr128 = UABALv4i16_v4i32 killed %41, killed %44, killed %45
%47:fpr32 = ADDVv4i32v killed %46
%48:fpr128 = IMPLICIT_DEF
%49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub
%50:gpr32all = COPY %49.ssub
$w0 = COPY %50
RET_ReallyLR implicit $w0
...