| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 |
| ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 |
| ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX |
| ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX |
| ; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 |
| ; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 |
| ; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX |
| ; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX |
| |
| ; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle. |
| ; That may require some coordination between VectorCombine, SLP, and other passes. |
| ; The end goal is to get a single "vaddsubps" instruction for x86 with AVX. |
| |
| define <2 x double> @test_addsub_v2f64(<2 x double> %A, <2 x double> %B) { |
| ; CHECK-LABEL: @test_addsub_v2f64( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[A]], [[B]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3> |
| ; CHECK-NEXT: ret <2 x double> [[TMP3]] |
| ; |
| %1 = extractelement <2 x double> %A, i32 0 |
| %2 = extractelement <2 x double> %B, i32 0 |
| %sub = fsub double %1, %2 |
| %3 = extractelement <2 x double> %A, i32 1 |
| %4 = extractelement <2 x double> %B, i32 1 |
| %add = fadd double %3, %4 |
| %vecinsert1 = insertelement <2 x double> undef, double %sub, i32 0 |
| %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add, i32 1 |
| ret <2 x double> %vecinsert2 |
| } |
| |
| define <4 x double> @test_addsub_v4f64(<4 x double> %A, <4 x double> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f64( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x double> [[A]], [[B]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| ; CHECK-NEXT: ret <4 x double> [[TMP3]] |
| ; |
| %1 = extractelement <4 x double> %A, i32 0 |
| %2 = extractelement <4 x double> %B, i32 0 |
| %sub = fsub double %1, %2 |
| %3 = extractelement <4 x double> %A, i32 2 |
| %4 = extractelement <4 x double> %B, i32 2 |
| %sub2 = fsub double %3, %4 |
| %5 = extractelement <4 x double> %A, i32 1 |
| %6 = extractelement <4 x double> %B, i32 1 |
| %add = fadd double %5, %6 |
| %7 = extractelement <4 x double> %A, i32 3 |
| %8 = extractelement <4 x double> %B, i32 3 |
| %add2 = fadd double %7, %8 |
| %vecinsert1 = insertelement <4 x double> undef, double %add, i32 1 |
| %vecinsert2 = insertelement <4 x double> %vecinsert1, double %add2, i32 3 |
| %vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub, i32 0 |
| %vecinsert4 = insertelement <4 x double> %vecinsert3, double %sub2, i32 2 |
| ret <4 x double> %vecinsert4 |
| } |
| |
| define <8 x double> @test_addsub_v8f64(<8 x double> %A, <8 x double> %B) { |
| ; SSE2-LABEL: @test_addsub_v8f64( |
| ; SSE2-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]] |
| ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; SSE2-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]] |
| ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; SSE2-NEXT: ret <8 x double> [[TMP5]] |
| ; |
| ; SSE4-LABEL: @test_addsub_v8f64( |
| ; SSE4-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]] |
| ; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x double> [[A]], [[B]] |
| ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| ; SSE4-NEXT: ret <8 x double> [[TMP3]] |
| ; |
| ; AVX-LABEL: @test_addsub_v8f64( |
| ; AVX-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]] |
| ; AVX-NEXT: [[TMP2:%.*]] = fadd <8 x double> [[A]], [[B]] |
| ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| ; AVX-NEXT: ret <8 x double> [[TMP3]] |
| ; |
| %1 = extractelement <8 x double> %A, i32 0 |
| %2 = extractelement <8 x double> %B, i32 0 |
| %sub = fsub double %1, %2 |
| %3 = extractelement <8 x double> %A, i32 2 |
| %4 = extractelement <8 x double> %B, i32 2 |
| %sub2 = fsub double %3, %4 |
| %5 = extractelement <8 x double> %A, i32 1 |
| %6 = extractelement <8 x double> %B, i32 1 |
| %add = fadd double %5, %6 |
| %7 = extractelement <8 x double> %A, i32 3 |
| %8 = extractelement <8 x double> %B, i32 3 |
| %add2 = fadd double %7, %8 |
| %9 = extractelement <8 x double> %A, i32 4 |
| %10 = extractelement <8 x double> %B, i32 4 |
| %sub3 = fsub double %9, %10 |
| %11 = extractelement <8 x double> %A, i32 6 |
| %12 = extractelement <8 x double> %B, i32 6 |
| %sub4 = fsub double %11, %12 |
| %13 = extractelement <8 x double> %A, i32 5 |
| %14 = extractelement <8 x double> %B, i32 5 |
| %add3 = fadd double %13, %14 |
| %15 = extractelement <8 x double> %A, i32 7 |
| %16 = extractelement <8 x double> %B, i32 7 |
| %add4 = fadd double %15, %16 |
| %vecinsert1 = insertelement <8 x double> undef, double %add, i32 1 |
| %vecinsert2 = insertelement <8 x double> %vecinsert1, double %add2, i32 3 |
| %vecinsert3 = insertelement <8 x double> %vecinsert2, double %sub, i32 0 |
| %vecinsert4 = insertelement <8 x double> %vecinsert3, double %sub2, i32 2 |
| %vecinsert5 = insertelement <8 x double> %vecinsert4, double %add3, i32 5 |
| %vecinsert6 = insertelement <8 x double> %vecinsert5, double %add4, i32 7 |
| %vecinsert7 = insertelement <8 x double> %vecinsert6, double %sub3, i32 4 |
| %vecinsert8 = insertelement <8 x double> %vecinsert7, double %sub4, i32 6 |
| ret <8 x double> %vecinsert8 |
| } |
| |
| define <2 x float> @test_addsub_v2f32(<2 x float> %v0, <2 x float> %v1) { |
| ; CHECK-LABEL: @test_addsub_v2f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> [[V0:%.*]], [[V1:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[V0]], [[V1]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[TMP3]] |
| ; |
| %v2 = extractelement <2 x float> %v0, i32 0 |
| %v3 = extractelement <2 x float> %v1, i32 0 |
| %v4 = extractelement <2 x float> %v0, i32 1 |
| %v5 = extractelement <2 x float> %v1, i32 1 |
| %sub = fsub float %v2, %v3 |
| %add = fadd float %v5, %v4 |
| %res0 = insertelement <2 x float> undef, float %sub, i32 0 |
| %res1 = insertelement <2 x float> %res0, float %add, i32 1 |
| ret <2 x float> %res1 |
| } |
| |
| define <4 x float> @test_addsub_v4f32(<4 x float> %A, <4 x float> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| ; CHECK-NEXT: ret <4 x float> [[TMP3]] |
| ; |
| %1 = extractelement <4 x float> %A, i32 0 |
| %2 = extractelement <4 x float> %B, i32 0 |
| %sub = fsub float %1, %2 |
| %3 = extractelement <4 x float> %A, i32 2 |
| %4 = extractelement <4 x float> %B, i32 2 |
| %sub2 = fsub float %3, %4 |
| %5 = extractelement <4 x float> %A, i32 1 |
| %6 = extractelement <4 x float> %B, i32 1 |
| %add = fadd float %5, %6 |
| %7 = extractelement <4 x float> %A, i32 3 |
| %8 = extractelement <4 x float> %B, i32 3 |
| %add2 = fadd float %7, %8 |
| %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1 |
| %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3 |
| %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0 |
| %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2 |
| ret <4 x float> %vecinsert4 |
| } |
| |
| define <8 x float> @test_v8f32(<8 x float> %A, <8 x float> %B) { |
| ; SSE2-LABEL: @test_v8f32( |
| ; SSE2-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]] |
| ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; SSE2-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[A]], [[B]] |
| ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; SSE2-NEXT: ret <8 x float> [[TMP5]] |
| ; |
| ; SSE4-LABEL: @test_v8f32( |
| ; SSE4-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]] |
| ; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] |
| ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| ; SSE4-NEXT: ret <8 x float> [[TMP3]] |
| ; |
| ; AVX-LABEL: @test_v8f32( |
| ; AVX-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]] |
| ; AVX-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] |
| ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> |
| ; AVX-NEXT: ret <8 x float> [[TMP3]] |
| ; |
| %1 = extractelement <8 x float> %A, i32 0 |
| %2 = extractelement <8 x float> %B, i32 0 |
| %sub = fsub float %1, %2 |
| %3 = extractelement <8 x float> %A, i32 2 |
| %4 = extractelement <8 x float> %B, i32 2 |
| %sub2 = fsub float %3, %4 |
| %5 = extractelement <8 x float> %A, i32 1 |
| %6 = extractelement <8 x float> %B, i32 1 |
| %add = fadd float %5, %6 |
| %7 = extractelement <8 x float> %A, i32 3 |
| %8 = extractelement <8 x float> %B, i32 3 |
| %add2 = fadd float %7, %8 |
| %9 = extractelement <8 x float> %A, i32 4 |
| %10 = extractelement <8 x float> %B, i32 4 |
| %sub3 = fsub float %9, %10 |
| %11 = extractelement <8 x float> %A, i32 6 |
| %12 = extractelement <8 x float> %B, i32 6 |
| %sub4 = fsub float %11, %12 |
| %13 = extractelement <8 x float> %A, i32 5 |
| %14 = extractelement <8 x float> %B, i32 5 |
| %add3 = fadd float %13, %14 |
| %15 = extractelement <8 x float> %A, i32 7 |
| %16 = extractelement <8 x float> %B, i32 7 |
| %add4 = fadd float %15, %16 |
| %vecinsert1 = insertelement <8 x float> undef, float %add, i32 1 |
| %vecinsert2 = insertelement <8 x float> %vecinsert1, float %add2, i32 3 |
| %vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub, i32 0 |
| %vecinsert4 = insertelement <8 x float> %vecinsert3, float %sub2, i32 2 |
| %vecinsert5 = insertelement <8 x float> %vecinsert4, float %add3, i32 5 |
| %vecinsert6 = insertelement <8 x float> %vecinsert5, float %add4, i32 7 |
| %vecinsert7 = insertelement <8 x float> %vecinsert6, float %sub3, i32 4 |
| %vecinsert8 = insertelement <8 x float> %vecinsert7, float %sub4, i32 6 |
| ret <8 x float> %vecinsert8 |
| } |
| |
| define <16 x float> @test_addsub_v16f32(<16 x float> %A, <16 x float> %B) { |
| ; SSE2-LABEL: @test_addsub_v16f32( |
| ; SSE2-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]] |
| ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| ; SSE2-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]] |
| ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP4]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> |
| ; SSE2-NEXT: ret <16 x float> [[TMP5]] |
| ; |
| ; SSE4-LABEL: @test_addsub_v16f32( |
| ; SSE4-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]] |
| ; SSE4-NEXT: [[TMP2:%.*]] = fadd <16 x float> [[A]], [[B]] |
| ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> |
| ; SSE4-NEXT: ret <16 x float> [[TMP3]] |
| ; |
| ; AVX-LABEL: @test_addsub_v16f32( |
| ; AVX-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]] |
| ; AVX-NEXT: [[TMP2:%.*]] = fadd <16 x float> [[A]], [[B]] |
| ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> |
| ; AVX-NEXT: ret <16 x float> [[TMP3]] |
| ; |
| %1 = extractelement <16 x float> %A, i32 0 |
| %2 = extractelement <16 x float> %B, i32 0 |
| %sub = fsub float %1, %2 |
| %3 = extractelement <16 x float> %A, i32 2 |
| %4 = extractelement <16 x float> %B, i32 2 |
| %sub2 = fsub float %3, %4 |
| %5 = extractelement <16 x float> %A, i32 1 |
| %6 = extractelement <16 x float> %B, i32 1 |
| %add = fadd float %5, %6 |
| %7 = extractelement <16 x float> %A, i32 3 |
| %8 = extractelement <16 x float> %B, i32 3 |
| %add2 = fadd float %7, %8 |
| %9 = extractelement <16 x float> %A, i32 4 |
| %10 = extractelement <16 x float> %B, i32 4 |
| %sub3 = fsub float %9, %10 |
| %11 = extractelement <16 x float> %A, i32 6 |
| %12 = extractelement <16 x float> %B, i32 6 |
| %sub4 = fsub float %11, %12 |
| %13 = extractelement <16 x float> %A, i32 5 |
| %14 = extractelement <16 x float> %B, i32 5 |
| %add3 = fadd float %13, %14 |
| %15 = extractelement <16 x float> %A, i32 7 |
| %16 = extractelement <16 x float> %B, i32 7 |
| %add4 = fadd float %15, %16 |
| %17 = extractelement <16 x float> %A, i32 8 |
| %18 = extractelement <16 x float> %B, i32 8 |
| %sub5 = fsub float %17, %18 |
| %19 = extractelement <16 x float> %A, i32 10 |
| %20 = extractelement <16 x float> %B, i32 10 |
| %sub6 = fsub float %19, %20 |
| %21 = extractelement <16 x float> %A, i32 9 |
| %22 = extractelement <16 x float> %B, i32 9 |
| %add5 = fadd float %21, %22 |
| %23 = extractelement <16 x float> %A, i32 11 |
| %24 = extractelement <16 x float> %B, i32 11 |
| %add6 = fadd float %23, %24 |
| %25 = extractelement <16 x float> %A, i32 12 |
| %26 = extractelement <16 x float> %B, i32 12 |
| %sub7 = fsub float %25, %26 |
| %27 = extractelement <16 x float> %A, i32 14 |
| %28 = extractelement <16 x float> %B, i32 14 |
| %sub8 = fsub float %27, %28 |
| %29 = extractelement <16 x float> %A, i32 13 |
| %30 = extractelement <16 x float> %B, i32 13 |
| %add7 = fadd float %29, %30 |
| %31 = extractelement <16 x float> %A, i32 15 |
| %32 = extractelement <16 x float> %B, i32 15 |
| %add8 = fadd float %31, %32 |
| %vecinsert1 = insertelement <16 x float> undef, float %add, i32 1 |
| %vecinsert2 = insertelement <16 x float> %vecinsert1, float %add2, i32 3 |
| %vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub, i32 0 |
| %vecinsert4 = insertelement <16 x float> %vecinsert3, float %sub2, i32 2 |
| %vecinsert5 = insertelement <16 x float> %vecinsert4, float %add3, i32 5 |
| %vecinsert6 = insertelement <16 x float> %vecinsert5, float %add4, i32 7 |
| %vecinsert7 = insertelement <16 x float> %vecinsert6, float %sub3, i32 4 |
| %vecinsert8 = insertelement <16 x float> %vecinsert7, float %sub4, i32 6 |
| %vecinsert9 = insertelement <16 x float> %vecinsert8, float %add5, i32 9 |
| %vecinsert10 = insertelement <16 x float> %vecinsert9, float %add6, i32 11 |
| %vecinsert11 = insertelement <16 x float> %vecinsert10, float %sub5, i32 8 |
| %vecinsert12 = insertelement <16 x float> %vecinsert11, float %sub6, i32 10 |
| %vecinsert13 = insertelement <16 x float> %vecinsert12, float %add7, i32 13 |
| %vecinsert14 = insertelement <16 x float> %vecinsert13, float %add8, i32 15 |
| %vecinsert15 = insertelement <16 x float> %vecinsert14, float %sub7, i32 12 |
| %vecinsert16 = insertelement <16 x float> %vecinsert15, float %sub8, i32 14 |
| ret <16 x float> %vecinsert16 |
| } |
| |
| ; Test that non-sequential / partial add-sub patterns are still folded. |
| |
| define <4 x float> @test_addsub_v4f32_shuffle_1302(<4 x float> %A, <4 x float> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f32_shuffle_1302( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| ; CHECK-NEXT: ret <4 x float> [[TMP3]] |
| ; |
| %1 = extractelement <4 x float> %A, i32 0 |
| %2 = extractelement <4 x float> %B, i32 0 |
| %sub = fsub float %1, %2 |
| %3 = extractelement <4 x float> %A, i32 2 |
| %4 = extractelement <4 x float> %B, i32 2 |
| %sub2 = fsub float %3, %4 |
| %5 = extractelement <4 x float> %A, i32 1 |
| %6 = extractelement <4 x float> %B, i32 1 |
| %add = fadd float %5, %6 |
| %7 = extractelement <4 x float> %A, i32 3 |
| %8 = extractelement <4 x float> %B, i32 3 |
| %add2 = fadd float %7, %8 |
| %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1 |
| %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3 |
| %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub, i32 0 |
| %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2 |
| ret <4 x float> %vecinsert4 |
| } |
| |
| define <4 x float> @test_addsub_v4f32_partial_23(<4 x float> %A, <4 x float> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f32_partial_23( |
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 0, i32 3, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> <float undef, float undef, float poison, float poison>, <4 x i32> <i32 4, i32 5, i32 0, i32 1> |
| ; CHECK-NEXT: ret <4 x float> [[VECINSERT21]] |
| ; |
| %1 = extractelement <4 x float> %A, i32 2 |
| %2 = extractelement <4 x float> %B, i32 2 |
| %sub2 = fsub float %1, %2 |
| %3 = extractelement <4 x float> %A, i32 3 |
| %4 = extractelement <4 x float> %B, i32 3 |
| %add2 = fadd float %3, %4 |
| %vecinsert1 = insertelement <4 x float> undef, float %sub2, i32 2 |
| %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3 |
| ret <4 x float> %vecinsert2 |
| } |
| |
| define <4 x float> @test_addsub_v4f32_partial_03(<4 x float> %A, <4 x float> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f32_partial_03( |
| ; CHECK-NEXT: [[FOLDEXTEXTBINOP:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[FOLDEXTEXTBINOP2:%.*]] = fadd <4 x float> [[A]], [[B]] |
| ; CHECK-NEXT: [[VECINSERT1:%.*]] = shufflevector <4 x float> [[FOLDEXTEXTBINOP]], <4 x float> <float poison, float undef, float undef, float poison>, <4 x i32> <i32 0, i32 5, i32 6, i32 poison> |
| ; CHECK-NEXT: [[VECINSERT2:%.*]] = shufflevector <4 x float> [[VECINSERT1]], <4 x float> [[FOLDEXTEXTBINOP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> |
| ; CHECK-NEXT: ret <4 x float> [[VECINSERT2]] |
| ; |
| %1 = extractelement <4 x float> %A, i32 0 |
| %2 = extractelement <4 x float> %B, i32 0 |
| %sub = fsub float %1, %2 |
| %3 = extractelement <4 x float> %A, i32 3 |
| %4 = extractelement <4 x float> %B, i32 3 |
| %add = fadd float %4, %3 |
| %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0 |
| %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 3 |
| ret <4 x float> %vecinsert2 |
| } |
| |
| define <4 x float> @test_addsub_v4f32_partial_12(<4 x float> %A, <4 x float> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f32_partial_12( |
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 0, i32 3, i32 poison, i32 poison> |
| ; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> <float undef, float poison, float poison, float undef>, <4 x i32> <i32 4, i32 0, i32 1, i32 7> |
| ; CHECK-NEXT: ret <4 x float> [[VECINSERT21]] |
| ; |
| %1 = extractelement <4 x float> %A, i32 2 |
| %2 = extractelement <4 x float> %B, i32 2 |
| %sub = fsub float %1, %2 |
| %3 = extractelement <4 x float> %A, i32 1 |
| %4 = extractelement <4 x float> %B, i32 1 |
| %add = fadd float %3, %4 |
| %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2 |
| %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 1 |
| ret <4 x float> %vecinsert2 |
| } |
| |
| define <4 x float> @test_addsub_v4f32_partial_01(<4 x float> %A, <4 x float> %B) { |
| ; CHECK-LABEL: @test_addsub_v4f32_partial_01( |
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3> |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <4 x float> [[TMP6]] |
| ; |
| %1 = extractelement <4 x float> %A, i32 0 |
| %2 = extractelement <4 x float> %B, i32 0 |
| %sub2 = fsub float %1, %2 |
| %3 = extractelement <4 x float> %A, i32 1 |
| %4 = extractelement <4 x float> %B, i32 1 |
| %add2 = fadd float %3, %4 |
| %vecinsert1 = insertelement <4 x float> undef, float %sub2, i32 0 |
| %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 1 |
| ret <4 x float> %vecinsert2 |
| } |
| |
| define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) { |
| ; CHECK-LABEL: @PR45015( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]] |
| ; CHECK-NEXT: [[T16:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> |
| ; CHECK-NEXT: ret <4 x float> [[T16]] |
| ; |
| %t = extractelement <4 x float> %arg, i32 0 |
| %t2 = extractelement <4 x float> %arg1, i32 0 |
| %t3 = fsub float %t, %t2 |
| %t4 = insertelement <4 x float> undef, float %t3, i32 0 |
| %t5 = extractelement <4 x float> %arg, i32 1 |
| %t6 = extractelement <4 x float> %arg1, i32 1 |
| %t7 = fadd float %t5, %t6 |
| %t8 = insertelement <4 x float> %t4, float %t7, i32 1 |
| %t9 = extractelement <4 x float> %arg, i32 2 |
| %t10 = extractelement <4 x float> %arg1, i32 2 |
| %t11 = fsub float %t9, %t10 |
| %t12 = insertelement <4 x float> %t8, float %t11, i32 2 |
| %t13 = extractelement <4 x float> %arg, i32 3 |
| %t14 = extractelement <4 x float> %arg1, i32 3 |
| %t15 = fadd float %t13, %t14 |
| %t16 = insertelement <4 x float> %t12, float %t15, i32 3 |
| ret <4 x float> %t16 |
| } |
| |
| ; PR42022 - https://bugs.llvm.org/show_bug.cgi?id=42022 |
| |
| %struct.Vector4 = type { float, float, float, float } |
| |
| define { <2 x float>, <2 x float> } @add_aggregate(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1) { |
| ; CHECK-LABEL: @add_aggregate( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]] |
| ; CHECK-NEXT: [[FCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP1]], 0 |
| ; CHECK-NEXT: [[FCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[FCA_0_INSERT]], <2 x float> [[TMP2]], 1 |
| ; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[FCA_1_INSERT]] |
| ; |
| %a00 = extractelement <2 x float> %a0, i32 0 |
| %b00 = extractelement <2 x float> %b0, i32 0 |
| %add = fadd float %a00, %b00 |
| %retval.0.0.insert = insertelement <2 x float> undef, float %add, i32 0 |
| %a01 = extractelement <2 x float> %a0, i32 1 |
| %b01 = extractelement <2 x float> %b0, i32 1 |
| %add4 = fadd float %a01, %b01 |
| %retval.0.1.insert = insertelement <2 x float> %retval.0.0.insert, float %add4, i32 1 |
| %a10 = extractelement <2 x float> %a1, i32 0 |
| %b10 = extractelement <2 x float> %b1, i32 0 |
| %add7 = fadd float %a10, %b10 |
| %retval.1.0.insert = insertelement <2 x float> undef, float %add7, i32 0 |
| %a11 = extractelement <2 x float> %a1, i32 1 |
| %b11 = extractelement <2 x float> %b1, i32 1 |
| %add10 = fadd float %a11, %b11 |
| %retval.1.1.insert = insertelement <2 x float> %retval.1.0.insert, float %add10, i32 1 |
| %fca.0.insert = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> %retval.0.1.insert, 0 |
| %fca.1.insert = insertvalue { <2 x float>, <2 x float> } %fca.0.insert, <2 x float> %retval.1.1.insert, 1 |
| ret { <2 x float>, <2 x float> } %fca.1.insert |
| } |
| |
| define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1, ptr nocapture dereferenceable(16) %r) { |
| ; CHECK-LABEL: @add_aggregate_store( |
| ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[R:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %a00 = extractelement <2 x float> %a0, i32 0 |
| %b00 = extractelement <2 x float> %b0, i32 0 |
| %add = fadd float %a00, %b00 |
| store float %add, ptr %r, align 4 |
| %a01 = extractelement <2 x float> %a0, i32 1 |
| %b01 = extractelement <2 x float> %b0, i32 1 |
| %add4 = fadd float %a01, %b01 |
| %r1 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 1 |
| store float %add4, ptr %r1, align 4 |
| %a10 = extractelement <2 x float> %a1, i32 0 |
| %b10 = extractelement <2 x float> %b1, i32 0 |
| %add7 = fadd float %a10, %b10 |
| %r2 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 2 |
| store float %add7, ptr %r2, align 4 |
| %a11 = extractelement <2 x float> %a1, i32 1 |
| %b11 = extractelement <2 x float> %b1, i32 1 |
| %add10 = fadd float %a11, %b11 |
| %r3 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 3 |
| store float %add10, ptr %r3, align 4 |
| ret void |
| } |
| |
| ; PR58139 |
| define <2 x double> @_mm_complexmult_pd_naive(<2 x double> %a, <2 x double> %b) { |
| ; SSE-LABEL: @_mm_complexmult_pd_naive( |
| ; SSE-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 1 |
| ; SSE-NEXT: [[TMP1:%.*]] = fneg double [[B1]] |
| ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 1> |
| ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 poison, i32 0> |
| ; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TMP1]], i64 0 |
| ; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] |
| ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; SSE-NEXT: [[TMP7:%.*]] = tail call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> [[B]], <2 x double> [[TMP5]]) |
| ; SSE-NEXT: ret <2 x double> [[TMP7]] |
| ; |
| ; AVX-LABEL: @_mm_complexmult_pd_naive( |
| ; AVX-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 |
| ; AVX-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i64 1 |
| ; AVX-NEXT: [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 |
| ; AVX-NEXT: [[B1:%.*]] = extractelement <2 x double> [[B]], i64 1 |
| ; AVX-NEXT: [[MUL10:%.*]] = fmul double [[A1]], [[B0]] |
| ; AVX-NEXT: [[TMP1:%.*]] = fneg double [[B1]] |
| ; AVX-NEXT: [[NEG11:%.*]] = fmul double [[A1]], [[TMP1]] |
| ; AVX-NEXT: [[MADD0:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[NEG11]]) |
| ; AVX-NEXT: [[MADD1:%.*]] = tail call double @llvm.fmuladd.f64(double [[A0]], double [[B1]], double [[MUL10]]) |
| ; AVX-NEXT: [[RES0:%.*]] = insertelement <2 x double> poison, double [[MADD0]], i64 0 |
| ; AVX-NEXT: [[RES1:%.*]] = insertelement <2 x double> [[RES0]], double [[MADD1]], i64 1 |
| ; AVX-NEXT: ret <2 x double> [[RES1]] |
| ; |
| %a0 = extractelement <2 x double> %a, i32 0 |
| %a1 = extractelement <2 x double> %a, i32 1 |
| %b0 = extractelement <2 x double> %b, i32 0 |
| %b1 = extractelement <2 x double> %b, i32 1 |
| %mul10 = fmul double %a1, %b0 |
| %mul11 = fmul double %a1, %b1 |
| %neg11 = fneg double %mul11 |
| %madd0 = call double @llvm.fmuladd.f64(double %a0, double %b0, double %neg11) |
| %madd1 = call double @llvm.fmuladd.f64(double %a0, double %b1, double %mul10) |
| %res0 = insertelement <2 x double> undef, double %madd0, i32 0 |
| %res1 = insertelement <2 x double> %res0, double %madd1, i32 1 |
| ret <2 x double> %res1 |
| } |