blob: c90e76c6d00f74b04e67ad7370b878b813d9daa3 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %}
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
define i32 @test() {
; X86-LABEL: @test(
; X86-NEXT: bb:
; X86-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; X86-NEXT: [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
; X86-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; X86-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; X86-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
; X86-NEXT: ret i32 [[TMP5]]
;
; AARCH64-LABEL: @test(
; AARCH64-NEXT: bb:
; AARCH64-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; AARCH64-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; AARCH64-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
; AARCH64-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
; AARCH64-NEXT: ret i32 [[TMP3]]
;
bb:
%0 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
%1 = extractelement <4 x i32> %0, i32 3
%2 = extractelement <4 x i32> %0, i32 2
%3 = extractelement <4 x i32> %0, i32 1
%4 = extractelement <4 x i32> %0, i32 0
%inst514 = or i32 %4, 0
%inst494 = or i32 %3, 0
%inst474 = or i32 %2, 0
%inst454 = or i32 %1, 0
%inst458 = add i32 %1, %inst454
%inst477 = add i32 %inst458, %2
%inst478 = add i32 %inst477, %inst474
%inst497 = add i32 %inst478, %3
%inst498 = add i32 %inst497, %inst494
%inst517 = add i32 %inst498, %4
%inst518 = add i32 %inst517, %inst514
ret i32 %inst518
}