blob: 3a356f00273363f53cded9628c0ad2ef58633682 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x double> @insert1_v2f64_f64_fdiv(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
; CHECK-LABEL: @insert1_v2f64_f64_fdiv(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[V0:%.*]], double [[S0:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[V1:%.*]], double [[S1:%.*]], i64 1
; CHECK-NEXT: [[R:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <2 x double> [[R]]
;
%s = fdiv double %s0, %s1
%v = fdiv <2 x double> %v0, %v1
%r = insertelement <2 x double> %v, double %s, i32 1
ret <2 x double> %r
}
; SSE2 has no fast v4i32 insertelement
define <4 x i32> @insert0_v4i32_i32_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
; SSE2-LABEL: @insert0_v4i32_i32_add(
; SSE2-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
; SSE2-NEXT: [[V:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]]
; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
; SSE2-NEXT: ret <4 x i32> [[R]]
;
; SSE4-LABEL: @insert0_v4i32_i32_add(
; SSE4-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[V0:%.*]], i32 [[S0:%.*]], i64 0
; SSE4-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[V1:%.*]], i32 [[S1:%.*]], i64 0
; SSE4-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; SSE4-NEXT: ret <4 x i32> [[R]]
;
; AVX-LABEL: @insert0_v4i32_i32_add(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[V0:%.*]], i32 [[S0:%.*]], i64 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[V1:%.*]], i32 [[S1:%.*]], i64 0
; AVX-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <4 x i32> [[R]]
;
%s = add i32 %s0, %s1
%v = add <4 x i32> %v0, %v1
%r = insertelement <4 x i32> %v, i32 %s, i32 0
ret <4 x i32> %r
}
; AVX expensive insertion into upper 128-bit vector
define <16 x i16> @insert9_v16i16_i16_add(<16 x i16> %v0, <16 x i16> %v1, i16 %s0, i16 %s1) {
; SSE-LABEL: @insert9_v16i16_i16_add(
; SSE-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> [[V0:%.*]], i16 [[S0:%.*]], i64 9
; SSE-NEXT: [[TMP2:%.*]] = insertelement <16 x i16> [[V1:%.*]], i16 [[S1:%.*]], i64 9
; SSE-NEXT: [[R:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <16 x i16> [[R]]
;
; AVX-LABEL: @insert9_v16i16_i16_add(
; AVX-NEXT: [[S:%.*]] = add i16 [[S0:%.*]], [[S1:%.*]]
; AVX-NEXT: [[V:%.*]] = add <16 x i16> [[V0:%.*]], [[V1:%.*]]
; AVX-NEXT: [[R:%.*]] = insertelement <16 x i16> [[V]], i16 [[S]], i32 9
; AVX-NEXT: ret <16 x i16> [[R]]
;
%s = add i16 %s0, %s1
%v = add <16 x i16> %v0, %v1
%r = insertelement <16 x i16> %v, i16 %s, i32 9
ret <16 x i16> %r
}
; Merge flags
define <4 x float> @insert0_v4f32_f32_fadd_common_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
; CHECK-LABEL: @insert0_v4f32_f32_fadd_common_flags(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[V0:%.*]], float [[S0:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[V1:%.*]], float [[S1:%.*]], i64 0
; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = fadd fast float %s0, %s1
%v = fadd fast <4 x float> %v0, %v1
%r = insertelement <4 x float> %v, float %s, i32 0
ret <4 x float> %r
}
; Merge (shared) flags
define <4 x float> @insert1_v4f32_f32_fsub_mixed_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) {
; CHECK-LABEL: @insert1_v4f32_f32_fsub_mixed_flags(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[V0:%.*]], float [[S0:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[V1:%.*]], float [[S1:%.*]], i64 1
; CHECK-NEXT: [[R:%.*]] = fsub nnan <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%s = fsub nnan nsz float %s0, %s1
%v = fsub nnan ninf <4 x float> %v0, %v1
%r = insertelement <4 x float> %v, float %s, i32 1
ret <4 x float> %r
}
; TODO: Fold equivalent opcodes
define <4 x i32> @insert0_v4i32_i32_or_disjoint_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) {
; CHECK-LABEL: @insert0_v4i32_i32_or_disjoint_add(
; CHECK-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]]
; CHECK-NEXT: [[V:%.*]] = or disjoint <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%s = add i32 %s0, %s1
%v = or disjoint <4 x i32> %v0, %v1
%r = insertelement <4 x i32> %v, i32 %s, i32 0
ret <4 x i32> %r
}
; Negative - multi use
define <2 x double> @insert0_v2f64_f64_fmul_multiuse(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
; CHECK-LABEL: @insert0_v2f64_f64_fmul_multiuse(
; CHECK-NEXT: [[S:%.*]] = fmul double [[S0:%.*]], [[S1:%.*]]
; CHECK-NEXT: [[V:%.*]] = fmul <2 x double> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
; CHECK-NEXT: call void @use_f64(double [[S]])
; CHECK-NEXT: ret <2 x double> [[R]]
;
%s = fmul double %s0, %s1
%v = fmul <2 x double> %v0, %v1
%r = insertelement <2 x double> %v, double %s, i32 0
call void @use_f64(double %s)
ret <2 x double> %r
}
declare void @use_f64(<2 x double>)
; Negative - multi use
define <2 x i64> @insert0_v2i64_i64_add_multiuse(<2 x i64> %v0, <2 x i64> %v1, i64 %s0, i64 %s1) {
; CHECK-LABEL: @insert0_v2i64_i64_add_multiuse(
; CHECK-NEXT: [[S:%.*]] = add i64 [[S0:%.*]], [[S1:%.*]]
; CHECK-NEXT: [[V:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> [[V]], i64 [[S]], i32 0
; CHECK-NEXT: call void @use_v2i64(<2 x i64> [[V]])
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%s = add i64 %s0, %s1
%v = add <2 x i64> %v0, %v1
%r = insertelement <2 x i64> %v, i64 %s, i32 0
call void @use_v2i64(<2 x i64> %v)
ret <2 x i64> %r
}
declare void @use_v2i64(<2 x i64>)
; Negative - binop mismatch
define <2 x double> @insert0_v2f64_f64_fadd_fsub(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) {
; CHECK-LABEL: @insert0_v2f64_f64_fadd_fsub(
; CHECK-NEXT: [[S:%.*]] = fsub double [[S0:%.*]], [[S1:%.*]]
; CHECK-NEXT: [[V:%.*]] = fadd <2 x double> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0
; CHECK-NEXT: ret <2 x double> [[R]]
;
%s = fsub double %s0, %s1
%v = fadd <2 x double> %v0, %v1
%r = insertelement <2 x double> %v, double %s, i32 0
ret <2 x double> %r
}