| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX |
| |
| define <2 x double> @insert1_v2f64_f64_fdiv(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) { |
| ; CHECK-LABEL: @insert1_v2f64_f64_fdiv( |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[V0:%.*]], double [[S0:%.*]], i64 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[V1:%.*]], double [[S1:%.*]], i64 1 |
| ; CHECK-NEXT: [[R:%.*]] = fdiv <2 x double> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: ret <2 x double> [[R]] |
| ; |
| %s = fdiv double %s0, %s1 |
| %v = fdiv <2 x double> %v0, %v1 |
| %r = insertelement <2 x double> %v, double %s, i32 1 |
| ret <2 x double> %r |
| } |
| |
| ; SSE2 has no fast v4i32 insertelement |
| define <4 x i32> @insert0_v4i32_i32_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) { |
| ; SSE2-LABEL: @insert0_v4i32_i32_add( |
| ; SSE2-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]] |
| ; SSE2-NEXT: [[V:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] |
| ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0 |
| ; SSE2-NEXT: ret <4 x i32> [[R]] |
| ; |
| ; SSE4-LABEL: @insert0_v4i32_i32_add( |
| ; SSE4-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[V0:%.*]], i32 [[S0:%.*]], i64 0 |
| ; SSE4-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[V1:%.*]], i32 [[S1:%.*]], i64 0 |
| ; SSE4-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] |
| ; SSE4-NEXT: ret <4 x i32> [[R]] |
| ; |
| ; AVX-LABEL: @insert0_v4i32_i32_add( |
| ; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[V0:%.*]], i32 [[S0:%.*]], i64 0 |
| ; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[V1:%.*]], i32 [[S1:%.*]], i64 0 |
| ; AVX-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] |
| ; AVX-NEXT: ret <4 x i32> [[R]] |
| ; |
| %s = add i32 %s0, %s1 |
| %v = add <4 x i32> %v0, %v1 |
| %r = insertelement <4 x i32> %v, i32 %s, i32 0 |
| ret <4 x i32> %r |
| } |
| |
| ; AVX expensive insertion into upper 128-bit vector |
| define <16 x i16> @insert9_v16i16_i16_add(<16 x i16> %v0, <16 x i16> %v1, i16 %s0, i16 %s1) { |
| ; SSE-LABEL: @insert9_v16i16_i16_add( |
| ; SSE-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> [[V0:%.*]], i16 [[S0:%.*]], i64 9 |
| ; SSE-NEXT: [[TMP2:%.*]] = insertelement <16 x i16> [[V1:%.*]], i16 [[S1:%.*]], i64 9 |
| ; SSE-NEXT: [[R:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] |
| ; SSE-NEXT: ret <16 x i16> [[R]] |
| ; |
| ; AVX-LABEL: @insert9_v16i16_i16_add( |
| ; AVX-NEXT: [[S:%.*]] = add i16 [[S0:%.*]], [[S1:%.*]] |
| ; AVX-NEXT: [[V:%.*]] = add <16 x i16> [[V0:%.*]], [[V1:%.*]] |
| ; AVX-NEXT: [[R:%.*]] = insertelement <16 x i16> [[V]], i16 [[S]], i32 9 |
| ; AVX-NEXT: ret <16 x i16> [[R]] |
| ; |
| %s = add i16 %s0, %s1 |
| %v = add <16 x i16> %v0, %v1 |
| %r = insertelement <16 x i16> %v, i16 %s, i32 9 |
| ret <16 x i16> %r |
| } |
| |
| ; Merge flags |
| define <4 x float> @insert0_v4f32_f32_fadd_common_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) { |
| ; CHECK-LABEL: @insert0_v4f32_f32_fadd_common_flags( |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[V0:%.*]], float [[S0:%.*]], i64 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[V1:%.*]], float [[S1:%.*]], i64 0 |
| ; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: ret <4 x float> [[R]] |
| ; |
| %s = fadd fast float %s0, %s1 |
| %v = fadd fast <4 x float> %v0, %v1 |
| %r = insertelement <4 x float> %v, float %s, i32 0 |
| ret <4 x float> %r |
| } |
| |
| ; Merge (shared) flags |
| define <4 x float> @insert1_v4f32_f32_fsub_mixed_flags(<4 x float> %v0, <4 x float> %v1, float %s0, float %s1) { |
| ; CHECK-LABEL: @insert1_v4f32_f32_fsub_mixed_flags( |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[V0:%.*]], float [[S0:%.*]], i64 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[V1:%.*]], float [[S1:%.*]], i64 1 |
| ; CHECK-NEXT: [[R:%.*]] = fsub nnan <4 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: ret <4 x float> [[R]] |
| ; |
| %s = fsub nnan nsz float %s0, %s1 |
| %v = fsub nnan ninf <4 x float> %v0, %v1 |
| %r = insertelement <4 x float> %v, float %s, i32 1 |
| ret <4 x float> %r |
| } |
| |
| ; TODO: Fold equivalent opcodes |
| define <4 x i32> @insert0_v4i32_i32_or_disjoint_add(<4 x i32> %v0, <4 x i32> %v1, i32 %s0, i32 %s1) { |
| ; CHECK-LABEL: @insert0_v4i32_i32_or_disjoint_add( |
| ; CHECK-NEXT: [[S:%.*]] = add i32 [[S0:%.*]], [[S1:%.*]] |
| ; CHECK-NEXT: [[V:%.*]] = or disjoint <4 x i32> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> [[V]], i32 [[S]], i32 0 |
| ; CHECK-NEXT: ret <4 x i32> [[R]] |
| ; |
| %s = add i32 %s0, %s1 |
| %v = or disjoint <4 x i32> %v0, %v1 |
| %r = insertelement <4 x i32> %v, i32 %s, i32 0 |
| ret <4 x i32> %r |
| } |
| |
| ; Negative - multi use |
| define <2 x double> @insert0_v2f64_f64_fmul_multiuse(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) { |
| ; CHECK-LABEL: @insert0_v2f64_f64_fmul_multiuse( |
| ; CHECK-NEXT: [[S:%.*]] = fmul double [[S0:%.*]], [[S1:%.*]] |
| ; CHECK-NEXT: [[V:%.*]] = fmul <2 x double> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0 |
| ; CHECK-NEXT: call void @use_f64(double [[S]]) |
| ; CHECK-NEXT: ret <2 x double> [[R]] |
| ; |
| %s = fmul double %s0, %s1 |
| %v = fmul <2 x double> %v0, %v1 |
| %r = insertelement <2 x double> %v, double %s, i32 0 |
| call void @use_f64(double %s) |
| ret <2 x double> %r |
| } |
| declare void @use_f64(<2 x double>) |
| |
| ; Negative - multi use |
| define <2 x i64> @insert0_v2i64_i64_add_multiuse(<2 x i64> %v0, <2 x i64> %v1, i64 %s0, i64 %s1) { |
| ; CHECK-LABEL: @insert0_v2i64_i64_add_multiuse( |
| ; CHECK-NEXT: [[S:%.*]] = add i64 [[S0:%.*]], [[S1:%.*]] |
| ; CHECK-NEXT: [[V:%.*]] = add <2 x i64> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> [[V]], i64 [[S]], i32 0 |
| ; CHECK-NEXT: call void @use_v2i64(<2 x i64> [[V]]) |
| ; CHECK-NEXT: ret <2 x i64> [[R]] |
| ; |
| %s = add i64 %s0, %s1 |
| %v = add <2 x i64> %v0, %v1 |
| %r = insertelement <2 x i64> %v, i64 %s, i32 0 |
| call void @use_v2i64(<2 x i64> %v) |
| ret <2 x i64> %r |
| } |
| declare void @use_v2i64(<2 x i64>) |
| |
| ; Negative - binop mismatch |
| define <2 x double> @insert0_v2f64_f64_fadd_fsub(<2 x double> %v0, <2 x double> %v1, double %s0, double %s1) { |
| ; CHECK-LABEL: @insert0_v2f64_f64_fadd_fsub( |
| ; CHECK-NEXT: [[S:%.*]] = fsub double [[S0:%.*]], [[S1:%.*]] |
| ; CHECK-NEXT: [[V:%.*]] = fadd <2 x double> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 0 |
| ; CHECK-NEXT: ret <2 x double> [[R]] |
| ; |
| %s = fsub double %s0, %s1 |
| %v = fadd <2 x double> %v0, %v1 |
| %r = insertelement <2 x double> %v, double %s, i32 0 |
| ret <2 x double> %r |
| } |
| |