blob: 61c8084f5d3e27cef9bcbd5e5419f716bbd5d360 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt --passes=slp-vectorizer -S -slp-revec -mtriple=aarch64-pc-windows-gnu < %s | FileCheck %s
define <8 x i64> @test(ptr %0, <8 x i32> %1) {
; CHECK-LABEL: define <8 x i64> @test(
; CHECK-SAME: ptr [[TMP0:%.*]], <8 x i32> [[TMP1:%.*]]) {
; CHECK-NEXT: [[VECTOR_PH:.*:]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 52
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[WIDE_LOAD3]] to <8 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 68
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP4]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 36
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP5]], align 2
; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[WIDE_LOAD2]] to <8 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i32> [[TMP1]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i32> [[TMP7]] to <8 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP10:%.*]] = zext <8 x i32> [[TMP9]] to <8 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i64> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i64 20
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 2
; CHECK-NEXT: [[TMP13:%.*]] = zext <8 x i16> [[WIDE_LOAD1]] to <8 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i32> [[TMP1]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = zext <8 x i32> [[TMP14]] to <8 x i64>
; CHECK-NEXT: [[TMP16:%.*]] = add <8 x i64> [[TMP15]], [[TMP8]]
; CHECK-NEXT: [[TMP17:%.*]] = add <8 x i64> [[TMP16]], [[TMP10]]
; CHECK-NEXT: [[TMP18:%.*]] = zext <8 x i16> [[WIDE_LOAD7]] to <8 x i32>
; CHECK-NEXT: [[TMP19:%.*]] = mul <8 x i32> [[TMP1]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = zext <8 x i32> [[TMP19]] to <8 x i64>
; CHECK-NEXT: [[TMP21:%.*]] = add <8 x i64> [[TMP10]], [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = add <8 x i64> [[TMP11]], [[TMP20]]
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP17]], [[TMP8]]
; CHECK-NEXT: [[BIN_RDX12:%.*]] = add <8 x i64> [[TMP22]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <8 x i64> [[TMP21]], [[BIN_RDX12]]
; CHECK-NEXT: ret <8 x i64> [[BIN_RDX13]]
;
vector.ph:
%2 = getelementptr i8, ptr %0, i64 52
%wide.load3 = load <8 x i16>, ptr %2, align 2
%3 = zext <8 x i16> %wide.load3 to <8 x i32>
%4 = getelementptr i8, ptr %0, i64 68
%wide.load7 = load <8 x i16>, ptr %4, align 2
%5 = getelementptr i8, ptr %0, i64 36
%wide.load2 = load <8 x i16>, ptr %5, align 2
%6 = zext <8 x i16> %wide.load2 to <8 x i32>
%7 = mul <8 x i32> %1, %6
%8 = zext <8 x i32> %7 to <8 x i64>
%9 = mul <8 x i32> %1, %3
%10 = zext <8 x i32> %9 to <8 x i64>
%11 = add <8 x i64> %8, %10
%12 = getelementptr i8, ptr %0, i64 20
%wide.load1 = load <8 x i16>, ptr %12, align 2
%13 = zext <8 x i16> %wide.load1 to <8 x i32>
%14 = mul <8 x i32> %1, %13
%15 = zext <8 x i32> %14 to <8 x i64>
%16 = add <8 x i64> %15, %8
%17 = add <8 x i64> %16, %10
%18 = zext <8 x i16> %wide.load7 to <8 x i32>
%19 = mul <8 x i32> %1, %18
%20 = zext <8 x i32> %19 to <8 x i64>
%21 = add <8 x i64> %10, %20
%22 = add <8 x i64> %11, %20
%bin.rdx = add <8 x i64> %17, %8
%bin.rdx12 = add <8 x i64> %22, %bin.rdx
%bin.rdx13 = add <8 x i64> %21, %bin.rdx12
ret <8 x i64> %bin.rdx13
}