| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -O0 -march=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s |
| --- |
| name: add_v16s8 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $x0 |
| |
| ; CHECK-LABEL: name: add_v16s8 |
| ; CHECK: liveins: $x0 |
| ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>)) |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s8) = G_VECREDUCE_ADD [[LOAD]](<16 x s8>) |
| ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[VECREDUCE_ADD]](s8) |
| ; CHECK: $w0 = COPY [[ANYEXT]](s32) |
| ; CHECK: RET_ReallyLR implicit $w0 |
| %0:_(p0) = COPY $x0 |
| %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>)) |
| %2:_(s8) = G_VECREDUCE_ADD %1(<16 x s8>) |
| %3:_(s32) = G_ANYEXT %2(s8) |
| $w0 = COPY %3(s32) |
| RET_ReallyLR implicit $w0 |
| |
| ... |
| --- |
| name: add_v8s16 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $x0 |
| |
| ; CHECK-LABEL: name: add_v8s16 |
| ; CHECK: liveins: $x0 |
| ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>)) |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s16) = G_VECREDUCE_ADD [[LOAD]](<8 x s16>) |
| ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[VECREDUCE_ADD]](s16) |
| ; CHECK: $w0 = COPY [[ANYEXT]](s32) |
| ; CHECK: RET_ReallyLR implicit $w0 |
| %0:_(p0) = COPY $x0 |
| %1:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>)) |
| %2:_(s16) = G_VECREDUCE_ADD %1(<8 x s16>) |
| %3:_(s32) = G_ANYEXT %2(s16) |
| $w0 = COPY %3(s32) |
| RET_ReallyLR implicit $w0 |
| |
| ... |
| --- |
| name: add_v4s32 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $x0 |
| |
| ; CHECK-LABEL: name: add_v4s32 |
| ; CHECK: liveins: $x0 |
| ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[LOAD]](<4 x s32>) |
| ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32) |
| ; CHECK: RET_ReallyLR implicit $w0 |
| %0:_(p0) = COPY $x0 |
| %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>)) |
| %2:_(s32) = G_VECREDUCE_ADD %1(<4 x s32>) |
| $w0 = COPY %2(s32) |
| RET_ReallyLR implicit $w0 |
| |
| ... |
| --- |
| name: add_v2s64 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $x0 |
| |
| ; CHECK-LABEL: name: add_v2s64 |
| ; CHECK: liveins: $x0 |
| ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[LOAD]](<2 x s64>) |
| ; CHECK: $x0 = COPY [[VECREDUCE_ADD]](s64) |
| ; CHECK: RET_ReallyLR implicit $x0 |
| %0:_(p0) = COPY $x0 |
| %1:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>)) |
| %2:_(s64) = G_VECREDUCE_ADD %1(<2 x s64>) |
| $x0 = COPY %2(s64) |
| RET_ReallyLR implicit $x0 |
| |
| ... |
| --- |
| name: add_v2s32 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $x0 |
| |
| ; CHECK-LABEL: name: add_v2s32 |
| ; CHECK: liveins: $x0 |
| ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 |
| ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[LOAD]](<2 x s32>) |
| ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32) |
| ; CHECK: RET_ReallyLR implicit $w0 |
| %0:_(p0) = COPY $x0 |
| %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) |
| %2:_(s32) = G_VECREDUCE_ADD %1(<2 x s32>) |
| $w0 = COPY %2(s32) |
| RET_ReallyLR implicit $w0 |
| |
| ... |
| --- |
| name: test_v8i64 |
| alignment: 4 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $q0, $q1, $q2, $q3 |
| ; This is a power-of-2 legalization, so use a tree reduction. |
| ; CHECK-LABEL: name: test_v8i64 |
| ; CHECK: liveins: $q0, $q1, $q2, $q3 |
| ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 |
| ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 |
| ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 |
| ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 |
| ; CHECK: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY]], [[COPY1]] |
| ; CHECK: [[ADD1:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY2]], [[COPY3]] |
| ; CHECK: [[ADD2:%[0-9]+]]:_(<2 x s64>) = G_ADD [[ADD]], [[ADD1]] |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[ADD2]](<2 x s64>) |
| ; CHECK: $x0 = COPY [[VECREDUCE_ADD]](s64) |
| ; CHECK: RET_ReallyLR implicit $x0 |
| %0:_(<2 x s64>) = COPY $q0 |
| %1:_(<2 x s64>) = COPY $q1 |
| %2:_(<2 x s64>) = COPY $q2 |
| %3:_(<2 x s64>) = COPY $q3 |
| %4:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>) |
| %5:_(<4 x s64>) = G_CONCAT_VECTORS %2(<2 x s64>), %3(<2 x s64>) |
| %6:_(<8 x s64>) = G_CONCAT_VECTORS %4(<4 x s64>), %5(<4 x s64>) |
| %7:_(s64) = G_VECREDUCE_ADD %6(<8 x s64>) |
| $x0 = COPY %7(s64) |
| RET_ReallyLR implicit $x0 |
| |
| ... |
| --- |
| name: test_v6i64 |
| alignment: 4 |
| tracksRegLiveness: true |
| body: | |
| bb.1: |
| liveins: $q0, $q1, $q2, $q3 |
| ; This is a non-power-of-2 legalization, generate multiple vector reductions |
| ; and combine them with scalar ops. |
| ; CHECK-LABEL: name: test_v6i64 |
| ; CHECK: liveins: $q0, $q1, $q2, $q3 |
| ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 |
| ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 |
| ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 |
| ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[COPY]](<2 x s64>) |
| ; CHECK: [[VECREDUCE_ADD1:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[COPY1]](<2 x s64>) |
| ; CHECK: [[VECREDUCE_ADD2:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[COPY2]](<2 x s64>) |
| ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VECREDUCE_ADD]], [[VECREDUCE_ADD1]] |
| ; CHECK: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[VECREDUCE_ADD2]] |
| ; CHECK: $x0 = COPY [[ADD1]](s64) |
| ; CHECK: RET_ReallyLR implicit $x0 |
| %0:_(<2 x s64>) = COPY $q0 |
| %1:_(<2 x s64>) = COPY $q1 |
| %2:_(<2 x s64>) = COPY $q2 |
| %3:_(<6 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>), %2(<2 x s64>) |
| %4:_(s64) = G_VECREDUCE_ADD %3(<6 x s64>) |
| $x0 = COPY %4(s64) |
| RET_ReallyLR implicit $x0 |
| |
| ... |