blob: ad362ef2bf900131d30fb0432ee5322378f705ab [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -mattr=+mve.fp -passes=vector-combine -S | FileCheck %s
target triple = "thumbv8.1m.main-arm-none-eabi"
define i16 @add_of_reduce_add(<8 x i16> %v0, <8 x i16> %v1) {
; CHECK-LABEL: define i16 @add_of_reduce_add(
; CHECK-SAME: <8 x i16> [[V0:%.*]], <8 x i16> [[V1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[V0]], [[V1]]
; CHECK-NEXT: [[RES:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP1]])
; CHECK-NEXT: ret i16 [[RES]]
;
%v0_red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v0)
%v1_red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v1)
%res = add i16 %v0_red, %v1_red
ret i16 %res
}
define i16 @reduce_zext_0(<8 x i8> %v0, <8 x i16> %v1) {
; CHECK-LABEL: define i16 @reduce_zext_0(
; CHECK-SAME: <8 x i8> [[V0:%.*]], <8 x i16> [[V1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT_:%.*]] = zext <8 x i8> [[V0]] to <8 x i16>
; CHECK-NEXT: [[V0_RED:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[ZEXT_]])
; CHECK-NEXT: [[V1_RED:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[V1]])
; CHECK-NEXT: [[RES:%.*]] = add i16 [[V0_RED]], [[V1_RED]]
; CHECK-NEXT: ret i16 [[RES]]
;
%zext_ = zext <8 x i8> %v0 to <8 x i16>
%v0_red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %zext_)
%v1_red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v1)
%res = add i16 %v0_red, %v1_red
ret i16 %res
}
define i16 @reduce_zext_1(<8 x i16> %v0, <8 x i8> %v1) {
; CHECK-LABEL: define i16 @reduce_zext_1(
; CHECK-SAME: <8 x i16> [[V0:%.*]], <8 x i8> [[V1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT_:%.*]] = zext <8 x i8> [[V1]] to <8 x i16>
; CHECK-NEXT: [[V0_RED:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[V0]])
; CHECK-NEXT: [[V1_RED:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[ZEXT_]])
; CHECK-NEXT: [[RES:%.*]] = add i16 [[V0_RED]], [[V1_RED]]
; CHECK-NEXT: ret i16 [[RES]]
;
%zext_ = zext <8 x i8> %v1 to <8 x i16>
%v0_red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v0)
%v1_red = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %zext_)
%res = add i16 %v0_red, %v1_red
ret i16 %res
}
define i32 @mul_acc_pattern_0(<8 x i8> %v0, <8 x i8> %v1, <8 x i32> %v2) {
; CHECK-LABEL: define i32 @mul_acc_pattern_0(
; CHECK-SAME: <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> [[V2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[INNER_ZEXT_0:%.*]] = zext <8 x i8> [[V0]] to <8 x i16>
; CHECK-NEXT: [[INNER_ZEXT_1:%.*]] = zext <8 x i8> [[V1]] to <8 x i16>
; CHECK-NEXT: [[MUL_:%.*]] = mul <8 x i16> [[INNER_ZEXT_0]], [[INNER_ZEXT_1]]
; CHECK-NEXT: [[ZEXT_:%.*]] = zext <8 x i16> [[MUL_]] to <8 x i32>
; CHECK-NEXT: [[RED_MUL_ACC_PATTERN:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[ZEXT_]])
; CHECK-NEXT: [[RED:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V2]])
; CHECK-NEXT: [[RES:%.*]] = add i32 [[RED_MUL_ACC_PATTERN]], [[RED]]
; CHECK-NEXT: ret i32 [[RES]]
;
%inner_zext_0 = zext <8 x i8> %v0 to <8 x i16>
%inner_zext_1 = zext <8 x i8> %v1 to <8 x i16>
%mul_ = mul <8 x i16> %inner_zext_0, %inner_zext_1
%zext_ = zext <8 x i16> %mul_ to <8 x i32>
%red_mul_acc_pattern = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %zext_)
%red = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v2)
%res = add i32 %red_mul_acc_pattern, %red
ret i32 %res
}
define i32 @mul_acc_pattern_1(<8 x i8> %v0, <8 x i8> %v1, <8 x i32> %v2) {
; CHECK-LABEL: define i32 @mul_acc_pattern_1(
; CHECK-SAME: <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> [[V2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[INNER_ZEXT_0:%.*]] = zext <8 x i8> [[V0]] to <8 x i16>
; CHECK-NEXT: [[INNER_ZEXT_1:%.*]] = zext <8 x i8> [[V1]] to <8 x i16>
; CHECK-NEXT: [[MUL_:%.*]] = mul <8 x i16> [[INNER_ZEXT_0]], [[INNER_ZEXT_1]]
; CHECK-NEXT: [[ZEXT_:%.*]] = zext <8 x i16> [[MUL_]] to <8 x i32>
; CHECK-NEXT: [[RED_MUL_ACC_PATTERN:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[ZEXT_]])
; CHECK-NEXT: [[RED:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V2]])
; CHECK-NEXT: [[RES:%.*]] = add i32 [[RED]], [[RED_MUL_ACC_PATTERN]]
; CHECK-NEXT: ret i32 [[RES]]
;
%inner_zext_0 = zext <8 x i8> %v0 to <8 x i16>
%inner_zext_1 = zext <8 x i8> %v1 to <8 x i16>
%mul_ = mul <8 x i16> %inner_zext_0, %inner_zext_1
%zext_ = zext <8 x i16> %mul_ to <8 x i32>
%red_mul_acc_pattern = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %zext_)
%red = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v2)
%res = add i32 %red, %red_mul_acc_pattern
ret i32 %res
}