| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %} |
| ; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %} |
| |
| define i32 @test() { |
| ; X86-LABEL: @test( |
| ; X86-NEXT: bb: |
| ; X86-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> |
| ; X86-NEXT: [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer |
| ; X86-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; X86-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
| ; X86-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) |
| ; X86-NEXT: ret i32 [[TMP5]] |
| ; |
| ; AARCH64-LABEL: @test( |
| ; AARCH64-NEXT: bb: |
| ; AARCH64-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> |
| ; AARCH64-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
| ; AARCH64-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer |
| ; AARCH64-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) |
| ; AARCH64-NEXT: ret i32 [[TMP3]] |
| ; |
| bb: |
| %0 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7> |
| %1 = extractelement <4 x i32> %0, i32 3 |
| %2 = extractelement <4 x i32> %0, i32 2 |
| %3 = extractelement <4 x i32> %0, i32 1 |
| %4 = extractelement <4 x i32> %0, i32 0 |
| %inst514 = or i32 %4, 0 |
| %inst494 = or i32 %3, 0 |
| %inst474 = or i32 %2, 0 |
| %inst454 = or i32 %1, 0 |
| %inst458 = add i32 %1, %inst454 |
| %inst477 = add i32 %inst458, %2 |
| %inst478 = add i32 %inst477, %inst474 |
| %inst497 = add i32 %inst478, %3 |
| %inst498 = add i32 %inst497, %inst494 |
| %inst517 = add i32 %inst498, %4 |
| %inst518 = add i32 %inst517, %inst514 |
| ret i32 %inst518 |
| } |