llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s
 ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target

 define i64 @test(ptr %p) {
 ; RISCV-LABEL: @test(
 ; RISCV-NEXT:  entry:
 ; RISCV-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4
 ; RISCV-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4
 ; RISCV-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4
 ; RISCV-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 0, i32 0>
 ; RISCV-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[TMP2]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 6, i32 7>
 ; RISCV-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; RISCV-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; RISCV-NEXT:    [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42)
 ; RISCV-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
 ; RISCV-NEXT:    ret i64 [[TMP6]]
 ;
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]])
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
 entry:
   %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1
   %arrayidx.2 = getelementptr inbounds i64, ptr %p, i64 2
   %arrayidx.3 = getelementptr inbounds i64, ptr %p, i64 3
   %arrayidx.4 = getelementptr inbounds i64, ptr %p, i64 4
   %arrayidx.5 = getelementptr inbounds i64, ptr %p, i64 5
   %tmp = load i64, ptr %p, align 4
   %mul = mul i64 %tmp, 42
   %tmp1 = load i64, ptr %arrayidx.1, align 4
   %mul1 = mul i64 %tmp1, 42
   %add = add i64 %mul, %mul1
   %tmp2 = load i64, ptr %arrayidx.2, align 4
   %mul2 = mul i64 %tmp2, 42
   %add1 = add i64 %add, %mul2
   %tmp3 = load i64, ptr %arrayidx.3, align 4
   %mul3 = mul i64 %tmp3, 42
   %add2 = add i64 %add1, %mul3
   %tmp4 = load i64, ptr %arrayidx.4, align 4
   %mul4 = mul i64 %tmp4, 42
   %add3 = add i64 %add2, %mul4
   %tmp5 = load i64, ptr %arrayidx.5, align 4
   %mul5 = mul i64 %tmp5, 42
   %add4 = add i64 %add3, %mul5
   %mul6 = mul i64 %tmp, 42
   %add5 = add i64 %add4, %mul6
   %mul7 = mul i64 %tmp, 42
   %add6 = add i64 %add5, %mul7
   ret i64 %add6
 }
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 \| FileCheck %s --check-prefix=RISCV
	; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 \| FileCheck %s
	; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 \| FileCheck %s
	; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target

	define i64 @test(ptr %p) {
	; RISCV-LABEL: @test(
	; RISCV-NEXT: entry:
	; RISCV-NEXT: [[ARRAYIDX_4:%.]] = getelementptr inbounds i64, ptr [[P:%.]], i64 4
	; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4
	; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4
	; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 0, i32 0>
	; RISCV-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[TMP2]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 6, i32 7>
	; RISCV-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
	; RISCV-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
	; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42)
	; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
	; RISCV-NEXT: ret i64 [[TMP6]]
	;
	; CHECK-LABEL: @test(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load <6 x i64>, ptr [[P:%.]], align 4
	; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 0>
	; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 42)
	; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]])
	; CHECK-NEXT: ret i64 [[TMP3]]
	;
	entry:
	%arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1
	%arrayidx.2 = getelementptr inbounds i64, ptr %p, i64 2
	%arrayidx.3 = getelementptr inbounds i64, ptr %p, i64 3
	%arrayidx.4 = getelementptr inbounds i64, ptr %p, i64 4
	%arrayidx.5 = getelementptr inbounds i64, ptr %p, i64 5
	%tmp = load i64, ptr %p, align 4
	%mul = mul i64 %tmp, 42
	%tmp1 = load i64, ptr %arrayidx.1, align 4
	%mul1 = mul i64 %tmp1, 42
	%add = add i64 %mul, %mul1
	%tmp2 = load i64, ptr %arrayidx.2, align 4
	%mul2 = mul i64 %tmp2, 42
	%add1 = add i64 %add, %mul2
	%tmp3 = load i64, ptr %arrayidx.3, align 4
	%mul3 = mul i64 %tmp3, 42
	%add2 = add i64 %add1, %mul3
	%tmp4 = load i64, ptr %arrayidx.4, align 4
	%mul4 = mul i64 %tmp4, 42
	%add3 = add i64 %add2, %mul4
	%tmp5 = load i64, ptr %arrayidx.5, align 4
	%mul5 = mul i64 %tmp5, 42
	%add4 = add i64 %add3, %mul5
	%mul6 = mul i64 %tmp, 42
	%add5 = add i64 %add4, %mul6
	%mul7 = mul i64 %tmp, 42
	%add6 = add i64 %add5, %mul7
	ret i64 %add6
	}