| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=lower-matrix-intrinsics -matrix-allow-contract -force-fuse-matrix -fuse-matrix-tile-size=2 %s -S | FileCheck %s |
| |
| ; REQUIRES: aarch64-registered-target |
| |
| target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" |
| target triple = "aarch64-apple-ios" |
| |
| define void @multiply_6x6x6(ptr noalias %A, ptr noalias %B, ptr noalias %C) { |
| ; CHECK-LABEL: define void @multiply_6x6x6( |
| ; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[A]], i64 0 |
| ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP0]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[B]], i64 0 |
| ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP1]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[TMP1]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP7]], align 8 |
| ; CHECK-NEXT: [[BLOCK85:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT86:%.*]] = insertelement <2 x double> poison, double [[TMP62]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT87:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT86]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP63:%.*]] = fmul contract <2 x double> [[BLOCK85]], [[SPLAT_SPLAT87]] |
| ; CHECK-NEXT: [[BLOCK88:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP64:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT89:%.*]] = insertelement <2 x double> poison, double [[TMP64]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT90:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT89]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP65:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK88]], <2 x double> [[SPLAT_SPLAT90]], <2 x double> [[TMP63]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP65]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK98:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP72:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT99:%.*]] = insertelement <2 x double> poison, double [[TMP72]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT100:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT99]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP73:%.*]] = fmul contract <2 x double> [[BLOCK98]], [[SPLAT_SPLAT100]] |
| ; CHECK-NEXT: [[BLOCK101:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT102:%.*]] = insertelement <2 x double> poison, double [[TMP74]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT103:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT102]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP75:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK101]], <2 x double> [[SPLAT_SPLAT103]], <2 x double> [[TMP73]]) |
| ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP75]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP12]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[A]], i64 12 |
| ; CHECK-NEXT: [[COL_LOAD14:%.*]] = load <2 x double>, ptr [[TMP14]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP15:%.*]] = getelementptr double, ptr [[TMP14]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD16:%.*]] = load <2 x double>, ptr [[VEC_GEP15]], align 8 |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[B]], i64 2 |
| ; CHECK-NEXT: [[COL_LOAD17:%.*]] = load <2 x double>, ptr [[TMP15]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr double, ptr [[TMP15]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD19:%.*]] = load <2 x double>, ptr [[VEC_GEP18]], align 8 |
| ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <2 x double> [[COL_LOAD14]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP76:%.*]] = extractelement <2 x double> [[COL_LOAD17]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT105:%.*]] = insertelement <2 x double> poison, double [[TMP76]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT106:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT105]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP77:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK21]], <2 x double> [[SPLAT_SPLAT106]], <2 x double> [[BLOCK20]]) |
| ; CHECK-NEXT: [[BLOCK107:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP78:%.*]] = extractelement <2 x double> [[COL_LOAD17]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT108:%.*]] = insertelement <2 x double> poison, double [[TMP78]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT109:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT108]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP79:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK107]], <2 x double> [[SPLAT_SPLAT109]], <2 x double> [[TMP77]]) |
| ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP79]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP20]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK28:%.*]] = shufflevector <2 x double> [[COL_LOAD14]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD19]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT29]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP23:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK28]], <2 x double> [[SPLAT_SPLAT30]], <2 x double> [[BLOCK27]]) |
| ; CHECK-NEXT: [[BLOCK31:%.*]] = shufflevector <2 x double> [[COL_LOAD16]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[COL_LOAD19]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT32:%.*]] = insertelement <2 x double> poison, double [[TMP24]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT32]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP25:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK31]], <2 x double> [[SPLAT_SPLAT33]], <2 x double> [[TMP23]]) |
| ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x double> [[TMP25]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP26]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[A]], i64 24 |
| ; CHECK-NEXT: [[COL_LOAD34:%.*]] = load <2 x double>, ptr [[TMP82]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP111:%.*]] = getelementptr double, ptr [[TMP82]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD36:%.*]] = load <2 x double>, ptr [[VEC_GEP111]], align 8 |
| ; CHECK-NEXT: [[TMP83:%.*]] = getelementptr double, ptr [[B]], i64 4 |
| ; CHECK-NEXT: [[COL_LOAD113:%.*]] = load <2 x double>, ptr [[TMP83]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP114:%.*]] = getelementptr double, ptr [[TMP83]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD39:%.*]] = load <2 x double>, ptr [[VEC_GEP114]], align 8 |
| ; CHECK-NEXT: [[BLOCK120:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK121:%.*]] = shufflevector <2 x double> [[COL_LOAD34]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP84:%.*]] = extractelement <2 x double> [[COL_LOAD113]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT122:%.*]] = insertelement <2 x double> poison, double [[TMP84]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT123:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT122]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP85:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK121]], <2 x double> [[SPLAT_SPLAT123]], <2 x double> [[BLOCK120]]) |
| ; CHECK-NEXT: [[BLOCK124:%.*]] = shufflevector <2 x double> [[COL_LOAD36]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP86:%.*]] = extractelement <2 x double> [[COL_LOAD113]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT125:%.*]] = insertelement <2 x double> poison, double [[TMP86]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT126:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT125]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP87:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK124]], <2 x double> [[SPLAT_SPLAT126]], <2 x double> [[TMP85]]) |
| ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <2 x double> [[TMP87]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> [[TMP34]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK127:%.*]] = shufflevector <2 x double> [[TMP27]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK128:%.*]] = shufflevector <2 x double> [[COL_LOAD34]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP90:%.*]] = extractelement <2 x double> [[COL_LOAD39]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT129:%.*]] = insertelement <2 x double> poison, double [[TMP90]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT130:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT129]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP91:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK128]], <2 x double> [[SPLAT_SPLAT130]], <2 x double> [[BLOCK127]]) |
| ; CHECK-NEXT: [[BLOCK131:%.*]] = shufflevector <2 x double> [[COL_LOAD36]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP92:%.*]] = extractelement <2 x double> [[COL_LOAD39]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT132:%.*]] = insertelement <2 x double> poison, double [[TMP92]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT133:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT132]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP93:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK131]], <2 x double> [[SPLAT_SPLAT133]], <2 x double> [[TMP91]]) |
| ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x double> [[TMP93]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x double> [[TMP27]], <2 x double> [[TMP40]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr double, ptr [[C]], i64 0 |
| ; CHECK-NEXT: store <2 x double> [[TMP35]], ptr [[TMP42]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP54:%.*]] = getelementptr double, ptr [[TMP42]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP41]], ptr [[VEC_GEP54]], align 8 |
| ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[A]], i64 2 |
| ; CHECK-NEXT: [[COL_LOAD55:%.*]] = load <2 x double>, ptr [[TMP43]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP56:%.*]] = getelementptr double, ptr [[TMP43]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD57:%.*]] = load <2 x double>, ptr [[VEC_GEP56]], align 8 |
| ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[B]], i64 0 |
| ; CHECK-NEXT: [[COL_LOAD115:%.*]] = load <2 x double>, ptr [[TMP44]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP59:%.*]] = getelementptr double, ptr [[TMP44]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD60:%.*]] = load <2 x double>, ptr [[VEC_GEP59]], align 8 |
| ; CHECK-NEXT: [[BLOCK61:%.*]] = shufflevector <2 x double> [[COL_LOAD55]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP96:%.*]] = extractelement <2 x double> [[COL_LOAD115]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT136:%.*]] = insertelement <2 x double> poison, double [[TMP96]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT137:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT136]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP97:%.*]] = fmul contract <2 x double> [[BLOCK61]], [[SPLAT_SPLAT137]] |
| ; CHECK-NEXT: [[BLOCK138:%.*]] = shufflevector <2 x double> [[COL_LOAD57]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP98:%.*]] = extractelement <2 x double> [[COL_LOAD115]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT139:%.*]] = insertelement <2 x double> poison, double [[TMP98]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT140:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT139]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP99:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK138]], <2 x double> [[SPLAT_SPLAT140]], <2 x double> [[TMP97]]) |
| ; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <2 x double> [[TMP99]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP49]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK67:%.*]] = shufflevector <2 x double> [[COL_LOAD55]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <2 x double> [[COL_LOAD60]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT143:%.*]] = insertelement <2 x double> poison, double [[TMP102]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT144:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT143]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP103:%.*]] = fmul contract <2 x double> [[BLOCK67]], [[SPLAT_SPLAT144]] |
| ; CHECK-NEXT: [[BLOCK145:%.*]] = shufflevector <2 x double> [[COL_LOAD57]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP104:%.*]] = extractelement <2 x double> [[COL_LOAD60]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT146:%.*]] = insertelement <2 x double> poison, double [[TMP104]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT147:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT146]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP105:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK145]], <2 x double> [[SPLAT_SPLAT147]], <2 x double> [[TMP103]]) |
| ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <2 x double> [[TMP105]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP55]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr double, ptr [[A]], i64 14 |
| ; CHECK-NEXT: [[COL_LOAD73:%.*]] = load <2 x double>, ptr [[TMP57]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP74:%.*]] = getelementptr double, ptr [[TMP57]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD75:%.*]] = load <2 x double>, ptr [[VEC_GEP74]], align 8 |
| ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr double, ptr [[B]], i64 2 |
| ; CHECK-NEXT: [[COL_LOAD117:%.*]] = load <2 x double>, ptr [[TMP58]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP77:%.*]] = getelementptr double, ptr [[TMP58]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD78:%.*]] = load <2 x double>, ptr [[VEC_GEP77]], align 8 |
| ; CHECK-NEXT: [[BLOCK148:%.*]] = shufflevector <2 x double> [[TMP50]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK164:%.*]] = shufflevector <2 x double> [[COL_LOAD73]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x double> [[COL_LOAD117]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT150:%.*]] = insertelement <2 x double> poison, double [[TMP108]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT151:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT150]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP109:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK164]], <2 x double> [[SPLAT_SPLAT151]], <2 x double> [[BLOCK148]]) |
| ; CHECK-NEXT: [[BLOCK152:%.*]] = shufflevector <2 x double> [[COL_LOAD75]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP110:%.*]] = extractelement <2 x double> [[COL_LOAD117]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT153:%.*]] = insertelement <2 x double> poison, double [[TMP110]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT154:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT153]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP111:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK152]], <2 x double> [[SPLAT_SPLAT154]], <2 x double> [[TMP109]]) |
| ; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <2 x double> [[TMP111]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <2 x double> [[TMP50]], <2 x double> [[TMP66]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK155:%.*]] = shufflevector <2 x double> [[TMP56]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK156:%.*]] = shufflevector <2 x double> [[COL_LOAD73]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP114:%.*]] = extractelement <2 x double> [[COL_LOAD78]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT157:%.*]] = insertelement <2 x double> poison, double [[TMP114]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT158:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT157]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP115:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK156]], <2 x double> [[SPLAT_SPLAT158]], <2 x double> [[BLOCK155]]) |
| ; CHECK-NEXT: [[BLOCK159:%.*]] = shufflevector <2 x double> [[COL_LOAD75]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP116:%.*]] = extractelement <2 x double> [[COL_LOAD78]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT160:%.*]] = insertelement <2 x double> poison, double [[TMP116]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT161:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT160]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP117:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK159]], <2 x double> [[SPLAT_SPLAT161]], <2 x double> [[TMP115]]) |
| ; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <2 x double> [[TMP117]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <2 x double> [[TMP56]], <2 x double> [[TMP69]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr double, ptr [[A]], i64 26 |
| ; CHECK-NEXT: [[COL_LOAD93:%.*]] = load <2 x double>, ptr [[TMP71]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP94:%.*]] = getelementptr double, ptr [[TMP71]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD95:%.*]] = load <2 x double>, ptr [[VEC_GEP94]], align 8 |
| ; CHECK-NEXT: [[TMP80:%.*]] = getelementptr double, ptr [[B]], i64 4 |
| ; CHECK-NEXT: [[COL_LOAD119:%.*]] = load <2 x double>, ptr [[TMP80]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP97:%.*]] = getelementptr double, ptr [[TMP80]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD98:%.*]] = load <2 x double>, ptr [[VEC_GEP97]], align 8 |
| ; CHECK-NEXT: [[BLOCK162:%.*]] = shufflevector <2 x double> [[TMP67]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK163:%.*]] = shufflevector <2 x double> [[COL_LOAD93]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP120:%.*]] = extractelement <2 x double> [[COL_LOAD119]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT164:%.*]] = insertelement <2 x double> poison, double [[TMP120]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT165:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT164]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP121:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK163]], <2 x double> [[SPLAT_SPLAT165]], <2 x double> [[BLOCK162]]) |
| ; CHECK-NEXT: [[BLOCK166:%.*]] = shufflevector <2 x double> [[COL_LOAD95]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP122:%.*]] = extractelement <2 x double> [[COL_LOAD119]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT167:%.*]] = insertelement <2 x double> poison, double [[TMP122]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT168:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT167]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP123:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK166]], <2 x double> [[SPLAT_SPLAT168]], <2 x double> [[TMP121]]) |
| ; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <2 x double> [[TMP123]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <2 x double> [[TMP67]], <2 x double> [[TMP81]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK169:%.*]] = shufflevector <2 x double> [[TMP70]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK170:%.*]] = shufflevector <2 x double> [[COL_LOAD93]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP126:%.*]] = extractelement <2 x double> [[COL_LOAD98]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT171:%.*]] = insertelement <2 x double> poison, double [[TMP126]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT172:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT171]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP127:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK170]], <2 x double> [[SPLAT_SPLAT172]], <2 x double> [[BLOCK169]]) |
| ; CHECK-NEXT: [[BLOCK173:%.*]] = shufflevector <2 x double> [[COL_LOAD95]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP128:%.*]] = extractelement <2 x double> [[COL_LOAD98]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT174:%.*]] = insertelement <2 x double> poison, double [[TMP128]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT175:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT174]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP129:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK173]], <2 x double> [[SPLAT_SPLAT175]], <2 x double> [[TMP127]]) |
| ; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x double> [[TMP129]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP94:%.*]] = shufflevector <2 x double> [[TMP70]], <2 x double> [[TMP89]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP132:%.*]] = getelementptr double, ptr [[C]], i64 2 |
| ; CHECK-NEXT: store <2 x double> [[TMP88]], ptr [[TMP132]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP176:%.*]] = getelementptr double, ptr [[TMP132]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP94]], ptr [[VEC_GEP176]], align 8 |
| ; CHECK-NEXT: [[TMP133:%.*]] = getelementptr double, ptr [[A]], i64 4 |
| ; CHECK-NEXT: [[COL_LOAD179:%.*]] = load <2 x double>, ptr [[TMP133]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP180:%.*]] = getelementptr double, ptr [[TMP133]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD181:%.*]] = load <2 x double>, ptr [[VEC_GEP180]], align 8 |
| ; CHECK-NEXT: [[TMP134:%.*]] = getelementptr double, ptr [[B]], i64 0 |
| ; CHECK-NEXT: [[COL_LOAD118:%.*]] = load <2 x double>, ptr [[TMP134]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP187:%.*]] = getelementptr double, ptr [[TMP134]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD120:%.*]] = load <2 x double>, ptr [[VEC_GEP187]], align 8 |
| ; CHECK-NEXT: [[BLOCK193:%.*]] = shufflevector <2 x double> [[COL_LOAD179]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP135:%.*]] = extractelement <2 x double> [[COL_LOAD118]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT194:%.*]] = insertelement <2 x double> poison, double [[TMP135]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT195:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT194]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP136:%.*]] = fmul contract <2 x double> [[BLOCK193]], [[SPLAT_SPLAT195]] |
| ; CHECK-NEXT: [[BLOCK196:%.*]] = shufflevector <2 x double> [[COL_LOAD181]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP137:%.*]] = extractelement <2 x double> [[COL_LOAD118]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT197:%.*]] = insertelement <2 x double> poison, double [[TMP137]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT198:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT197]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP138:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK196]], <2 x double> [[SPLAT_SPLAT198]], <2 x double> [[TMP136]]) |
| ; CHECK-NEXT: [[TMP143:%.*]] = shufflevector <2 x double> [[TMP138]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP144:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP143]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK205:%.*]] = shufflevector <2 x double> [[COL_LOAD179]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP145:%.*]] = extractelement <2 x double> [[COL_LOAD120]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT206:%.*]] = insertelement <2 x double> poison, double [[TMP145]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT207:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT206]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP146:%.*]] = fmul contract <2 x double> [[BLOCK205]], [[SPLAT_SPLAT207]] |
| ; CHECK-NEXT: [[BLOCK208:%.*]] = shufflevector <2 x double> [[COL_LOAD181]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP147:%.*]] = extractelement <2 x double> [[COL_LOAD120]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT209:%.*]] = insertelement <2 x double> poison, double [[TMP147]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT210:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT209]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP148:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK208]], <2 x double> [[SPLAT_SPLAT210]], <2 x double> [[TMP146]]) |
| ; CHECK-NEXT: [[TMP153:%.*]] = shufflevector <2 x double> [[TMP148]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP154:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP153]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP100:%.*]] = getelementptr double, ptr [[A]], i64 16 |
| ; CHECK-NEXT: [[COL_LOAD132:%.*]] = load <2 x double>, ptr [[TMP100]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP133:%.*]] = getelementptr double, ptr [[TMP100]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD134:%.*]] = load <2 x double>, ptr [[VEC_GEP133]], align 8 |
| ; CHECK-NEXT: [[TMP101:%.*]] = getelementptr double, ptr [[B]], i64 2 |
| ; CHECK-NEXT: [[COL_LOAD135:%.*]] = load <2 x double>, ptr [[TMP101]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP136:%.*]] = getelementptr double, ptr [[TMP101]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD137:%.*]] = load <2 x double>, ptr [[VEC_GEP136]], align 8 |
| ; CHECK-NEXT: [[BLOCK140:%.*]] = shufflevector <2 x double> [[TMP144]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK139:%.*]] = shufflevector <2 x double> [[COL_LOAD132]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP155:%.*]] = extractelement <2 x double> [[COL_LOAD135]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT218:%.*]] = insertelement <2 x double> poison, double [[TMP155]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT141:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT218]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP139:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK139]], <2 x double> [[SPLAT_SPLAT141]], <2 x double> [[BLOCK140]]) |
| ; CHECK-NEXT: [[BLOCK142:%.*]] = shufflevector <2 x double> [[COL_LOAD134]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP140:%.*]] = extractelement <2 x double> [[COL_LOAD135]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT144:%.*]] = insertelement <2 x double> poison, double [[TMP140]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT145:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT144]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP141:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK142]], <2 x double> [[SPLAT_SPLAT145]], <2 x double> [[TMP139]]) |
| ; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <2 x double> [[TMP141]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <2 x double> [[TMP144]], <2 x double> [[TMP106]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK147:%.*]] = shufflevector <2 x double> [[TMP154]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK146:%.*]] = shufflevector <2 x double> [[COL_LOAD132]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP142:%.*]] = extractelement <2 x double> [[COL_LOAD137]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT147:%.*]] = insertelement <2 x double> poison, double [[TMP142]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT148:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT147]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP149:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK146]], <2 x double> [[SPLAT_SPLAT148]], <2 x double> [[BLOCK147]]) |
| ; CHECK-NEXT: [[BLOCK149:%.*]] = shufflevector <2 x double> [[COL_LOAD134]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP151:%.*]] = extractelement <2 x double> [[COL_LOAD137]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT151:%.*]] = insertelement <2 x double> poison, double [[TMP151]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT152:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT151]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP152:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK149]], <2 x double> [[SPLAT_SPLAT152]], <2 x double> [[TMP149]]) |
| ; CHECK-NEXT: [[TMP112:%.*]] = shufflevector <2 x double> [[TMP152]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP113:%.*]] = shufflevector <2 x double> [[TMP154]], <2 x double> [[TMP112]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP159:%.*]] = getelementptr double, ptr [[A]], i64 28 |
| ; CHECK-NEXT: [[COL_LOAD152:%.*]] = load <2 x double>, ptr [[TMP159]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP153:%.*]] = getelementptr double, ptr [[TMP159]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD154:%.*]] = load <2 x double>, ptr [[VEC_GEP153]], align 8 |
| ; CHECK-NEXT: [[TMP160:%.*]] = getelementptr double, ptr [[B]], i64 4 |
| ; CHECK-NEXT: [[COL_LOAD155:%.*]] = load <2 x double>, ptr [[TMP160]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP156:%.*]] = getelementptr double, ptr [[TMP160]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD157:%.*]] = load <2 x double>, ptr [[VEC_GEP156]], align 8 |
| ; CHECK-NEXT: [[BLOCK158:%.*]] = shufflevector <2 x double> [[TMP107]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK160:%.*]] = shufflevector <2 x double> [[COL_LOAD152]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP164:%.*]] = extractelement <2 x double> [[COL_LOAD155]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT161:%.*]] = insertelement <2 x double> poison, double [[TMP164]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT162:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT161]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP167:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK160]], <2 x double> [[SPLAT_SPLAT162]], <2 x double> [[BLOCK158]]) |
| ; CHECK-NEXT: [[BLOCK167:%.*]] = shufflevector <2 x double> [[COL_LOAD154]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP118:%.*]] = extractelement <2 x double> [[COL_LOAD155]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT163:%.*]] = insertelement <2 x double> poison, double [[TMP118]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT164:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT163]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP119:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK167]], <2 x double> [[SPLAT_SPLAT164]], <2 x double> [[TMP167]]) |
| ; CHECK-NEXT: [[TMP168:%.*]] = shufflevector <2 x double> [[TMP119]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP169:%.*]] = shufflevector <2 x double> [[TMP107]], <2 x double> [[TMP168]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK165:%.*]] = shufflevector <2 x double> [[TMP113]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK168:%.*]] = shufflevector <2 x double> [[COL_LOAD152]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP170:%.*]] = extractelement <2 x double> [[COL_LOAD157]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT168:%.*]] = insertelement <2 x double> poison, double [[TMP170]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT169:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT168]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP213:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK168]], <2 x double> [[SPLAT_SPLAT169]], <2 x double> [[BLOCK165]]) |
| ; CHECK-NEXT: [[BLOCK171:%.*]] = shufflevector <2 x double> [[COL_LOAD154]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP124:%.*]] = extractelement <2 x double> [[COL_LOAD157]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT170:%.*]] = insertelement <2 x double> poison, double [[TMP124]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT171:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT170]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP125:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK171]], <2 x double> [[SPLAT_SPLAT171]], <2 x double> [[TMP213]]) |
| ; CHECK-NEXT: [[TMP223:%.*]] = shufflevector <2 x double> [[TMP125]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP231:%.*]] = shufflevector <2 x double> [[TMP113]], <2 x double> [[TMP223]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP232:%.*]] = getelementptr double, ptr [[C]], i64 4 |
| ; CHECK-NEXT: store <2 x double> [[TMP169]], ptr [[TMP232]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP172:%.*]] = getelementptr double, ptr [[TMP232]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP231]], ptr [[VEC_GEP172]], align 8 |
| ; CHECK-NEXT: [[TMP233:%.*]] = getelementptr double, ptr [[A]], i64 0 |
| ; CHECK-NEXT: [[COL_LOAD173:%.*]] = load <2 x double>, ptr [[TMP233]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP174:%.*]] = getelementptr double, ptr [[TMP233]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD185:%.*]] = load <2 x double>, ptr [[VEC_GEP174]], align 8 |
| ; CHECK-NEXT: [[TMP130:%.*]] = getelementptr double, ptr [[B]], i64 12 |
| ; CHECK-NEXT: [[COL_LOAD176:%.*]] = load <2 x double>, ptr [[TMP130]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP177:%.*]] = getelementptr double, ptr [[TMP130]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD178:%.*]] = load <2 x double>, ptr [[VEC_GEP177]], align 8 |
| ; CHECK-NEXT: [[BLOCK217:%.*]] = shufflevector <2 x double> [[COL_LOAD173]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP131:%.*]] = extractelement <2 x double> [[COL_LOAD176]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT180:%.*]] = insertelement <2 x double> poison, double [[TMP131]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT219:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT180]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP156:%.*]] = fmul contract <2 x double> [[BLOCK217]], [[SPLAT_SPLAT219]] |
| ; CHECK-NEXT: [[BLOCK226:%.*]] = shufflevector <2 x double> [[COL_LOAD185]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP161:%.*]] = extractelement <2 x double> [[COL_LOAD176]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT227:%.*]] = insertelement <2 x double> poison, double [[TMP161]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT228:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT227]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP162:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK226]], <2 x double> [[SPLAT_SPLAT228]], <2 x double> [[TMP156]]) |
| ; CHECK-NEXT: [[TMP163:%.*]] = shufflevector <2 x double> [[TMP162]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP308:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP163]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK229:%.*]] = shufflevector <2 x double> [[COL_LOAD173]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP165:%.*]] = extractelement <2 x double> [[COL_LOAD178]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT230:%.*]] = insertelement <2 x double> poison, double [[TMP165]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT231:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT230]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP166:%.*]] = fmul contract <2 x double> [[BLOCK229]], [[SPLAT_SPLAT231]] |
| ; CHECK-NEXT: [[BLOCK238:%.*]] = shufflevector <2 x double> [[COL_LOAD185]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP171:%.*]] = extractelement <2 x double> [[COL_LOAD178]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT239:%.*]] = insertelement <2 x double> poison, double [[TMP171]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT240:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT239]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP172:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK238]], <2 x double> [[SPLAT_SPLAT240]], <2 x double> [[TMP166]]) |
| ; CHECK-NEXT: [[TMP173:%.*]] = shufflevector <2 x double> [[TMP172]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP174:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP173]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP175:%.*]] = getelementptr double, ptr [[A]], i64 12 |
| ; CHECK-NEXT: [[COL_LOAD241:%.*]] = load <2 x double>, ptr [[TMP175]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP242:%.*]] = getelementptr double, ptr [[TMP175]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD243:%.*]] = load <2 x double>, ptr [[VEC_GEP242]], align 8 |
| ; CHECK-NEXT: [[TMP176:%.*]] = getelementptr double, ptr [[B]], i64 14 |
| ; CHECK-NEXT: [[COL_LOAD244:%.*]] = load <2 x double>, ptr [[TMP176]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP245:%.*]] = getelementptr double, ptr [[TMP176]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD246:%.*]] = load <2 x double>, ptr [[VEC_GEP245]], align 8 |
| ; CHECK-NEXT: [[BLOCK251:%.*]] = shufflevector <2 x double> [[TMP308]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK252:%.*]] = shufflevector <2 x double> [[COL_LOAD241]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP177:%.*]] = extractelement <2 x double> [[COL_LOAD244]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT253:%.*]] = insertelement <2 x double> poison, double [[TMP177]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT254:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT253]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP178:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK252]], <2 x double> [[SPLAT_SPLAT254]], <2 x double> [[BLOCK251]]) |
| ; CHECK-NEXT: [[BLOCK255:%.*]] = shufflevector <2 x double> [[COL_LOAD243]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP179:%.*]] = extractelement <2 x double> [[COL_LOAD244]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT256:%.*]] = insertelement <2 x double> poison, double [[TMP179]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT257:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT256]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP180:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK255]], <2 x double> [[SPLAT_SPLAT257]], <2 x double> [[TMP178]]) |
| ; CHECK-NEXT: [[TMP181:%.*]] = shufflevector <2 x double> [[TMP180]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP150:%.*]] = shufflevector <2 x double> [[TMP308]], <2 x double> [[TMP181]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK258:%.*]] = shufflevector <2 x double> [[TMP174]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK259:%.*]] = shufflevector <2 x double> [[COL_LOAD241]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP183:%.*]] = extractelement <2 x double> [[COL_LOAD246]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT260:%.*]] = insertelement <2 x double> poison, double [[TMP183]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT261:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT260]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP184:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK259]], <2 x double> [[SPLAT_SPLAT261]], <2 x double> [[BLOCK258]]) |
| ; CHECK-NEXT: [[BLOCK262:%.*]] = shufflevector <2 x double> [[COL_LOAD243]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP185:%.*]] = extractelement <2 x double> [[COL_LOAD246]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT263:%.*]] = insertelement <2 x double> poison, double [[TMP185]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT264:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT263]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP186:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK262]], <2 x double> [[SPLAT_SPLAT264]], <2 x double> [[TMP184]]) |
| ; CHECK-NEXT: [[TMP187:%.*]] = shufflevector <2 x double> [[TMP186]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP262:%.*]] = shufflevector <2 x double> [[TMP174]], <2 x double> [[TMP187]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP157:%.*]] = getelementptr double, ptr [[A]], i64 24 |
| ; CHECK-NEXT: [[COL_LOAD211:%.*]] = load <2 x double>, ptr [[TMP157]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP212:%.*]] = getelementptr double, ptr [[TMP157]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD213:%.*]] = load <2 x double>, ptr [[VEC_GEP212]], align 8 |
| ; CHECK-NEXT: [[TMP158:%.*]] = getelementptr double, ptr [[B]], i64 16 |
| ; CHECK-NEXT: [[COL_LOAD248:%.*]] = load <2 x double>, ptr [[TMP158]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP215:%.*]] = getelementptr double, ptr [[TMP158]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD250:%.*]] = load <2 x double>, ptr [[VEC_GEP215]], align 8 |
| ; CHECK-NEXT: [[BLOCK265:%.*]] = shufflevector <2 x double> [[TMP150]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK266:%.*]] = shufflevector <2 x double> [[COL_LOAD211]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x double> [[COL_LOAD248]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT267:%.*]] = insertelement <2 x double> poison, double [[TMP189]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT268:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT267]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP190:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK266]], <2 x double> [[SPLAT_SPLAT268]], <2 x double> [[BLOCK265]]) |
| ; CHECK-NEXT: [[BLOCK269:%.*]] = shufflevector <2 x double> [[COL_LOAD213]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP191:%.*]] = extractelement <2 x double> [[COL_LOAD248]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT270:%.*]] = insertelement <2 x double> poison, double [[TMP191]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT271:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT270]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP192:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK269]], <2 x double> [[SPLAT_SPLAT271]], <2 x double> [[TMP190]]) |
| ; CHECK-NEXT: [[TMP193:%.*]] = shufflevector <2 x double> [[TMP192]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP182:%.*]] = shufflevector <2 x double> [[TMP150]], <2 x double> [[TMP193]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK272:%.*]] = shufflevector <2 x double> [[TMP262]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK273:%.*]] = shufflevector <2 x double> [[COL_LOAD211]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP195:%.*]] = extractelement <2 x double> [[COL_LOAD250]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT274:%.*]] = insertelement <2 x double> poison, double [[TMP195]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT275:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT274]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP196:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK273]], <2 x double> [[SPLAT_SPLAT275]], <2 x double> [[BLOCK272]]) |
| ; CHECK-NEXT: [[BLOCK276:%.*]] = shufflevector <2 x double> [[COL_LOAD213]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP197:%.*]] = extractelement <2 x double> [[COL_LOAD250]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT277:%.*]] = insertelement <2 x double> poison, double [[TMP197]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT278:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT277]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP198:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK276]], <2 x double> [[SPLAT_SPLAT278]], <2 x double> [[TMP196]]) |
| ; CHECK-NEXT: [[TMP199:%.*]] = shufflevector <2 x double> [[TMP198]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP188:%.*]] = shufflevector <2 x double> [[TMP262]], <2 x double> [[TMP199]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP201:%.*]] = getelementptr double, ptr [[C]], i64 12 |
| ; CHECK-NEXT: store <2 x double> [[TMP182]], ptr [[TMP201]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP279:%.*]] = getelementptr double, ptr [[TMP201]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP188]], ptr [[VEC_GEP279]], align 8 |
| ; CHECK-NEXT: [[TMP263:%.*]] = getelementptr double, ptr [[A]], i64 2 |
| ; CHECK-NEXT: [[COL_LOAD232:%.*]] = load <2 x double>, ptr [[TMP263]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP233:%.*]] = getelementptr double, ptr [[TMP263]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD234:%.*]] = load <2 x double>, ptr [[VEC_GEP233]], align 8 |
| ; CHECK-NEXT: [[TMP268:%.*]] = getelementptr double, ptr [[B]], i64 12 |
| ; CHECK-NEXT: [[COL_LOAD235:%.*]] = load <2 x double>, ptr [[TMP268]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP236:%.*]] = getelementptr double, ptr [[TMP268]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD237:%.*]] = load <2 x double>, ptr [[VEC_GEP236]], align 8 |
| ; CHECK-NEXT: [[BLOCK239:%.*]] = shufflevector <2 x double> [[COL_LOAD232]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP269:%.*]] = extractelement <2 x double> [[COL_LOAD235]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT240:%.*]] = insertelement <2 x double> poison, double [[TMP269]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT241:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT240]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP277:%.*]] = fmul contract <2 x double> [[BLOCK239]], [[SPLAT_SPLAT241]] |
| ; CHECK-NEXT: [[BLOCK241:%.*]] = shufflevector <2 x double> [[COL_LOAD234]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP287:%.*]] = extractelement <2 x double> [[COL_LOAD235]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT242:%.*]] = insertelement <2 x double> poison, double [[TMP287]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT243:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT242]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP288:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK241]], <2 x double> [[SPLAT_SPLAT243]], <2 x double> [[TMP277]]) |
| ; CHECK-NEXT: [[TMP289:%.*]] = shufflevector <2 x double> [[TMP288]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP290:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP289]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK244:%.*]] = shufflevector <2 x double> [[COL_LOAD232]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP309:%.*]] = extractelement <2 x double> [[COL_LOAD237]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT245:%.*]] = insertelement <2 x double> poison, double [[TMP309]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT246:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT245]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP317:%.*]] = fmul contract <2 x double> [[BLOCK244]], [[SPLAT_SPLAT246]] |
| ; CHECK-NEXT: [[BLOCK247:%.*]] = shufflevector <2 x double> [[COL_LOAD234]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP318:%.*]] = extractelement <2 x double> [[COL_LOAD237]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT248:%.*]] = insertelement <2 x double> poison, double [[TMP318]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT249:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT248]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP329:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK247]], <2 x double> [[SPLAT_SPLAT249]], <2 x double> [[TMP317]]) |
| ; CHECK-NEXT: [[TMP344:%.*]] = shufflevector <2 x double> [[TMP329]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP345:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP344]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP346:%.*]] = getelementptr double, ptr [[A]], i64 14 |
| ; CHECK-NEXT: [[COL_LOAD251:%.*]] = load <2 x double>, ptr [[TMP346]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP251:%.*]] = getelementptr double, ptr [[TMP346]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD252:%.*]] = load <2 x double>, ptr [[VEC_GEP251]], align 8 |
| ; CHECK-NEXT: [[TMP347:%.*]] = getelementptr double, ptr [[B]], i64 14 |
| ; CHECK-NEXT: [[COL_LOAD253:%.*]] = load <2 x double>, ptr [[TMP347]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP254:%.*]] = getelementptr double, ptr [[TMP347]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD255:%.*]] = load <2 x double>, ptr [[VEC_GEP254]], align 8 |
| ; CHECK-NEXT: [[BLOCK256:%.*]] = shufflevector <2 x double> [[TMP290]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK257:%.*]] = shufflevector <2 x double> [[COL_LOAD251]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP348:%.*]] = extractelement <2 x double> [[COL_LOAD253]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT258:%.*]] = insertelement <2 x double> poison, double [[TMP348]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT259:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT258]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP349:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK257]], <2 x double> [[SPLAT_SPLAT259]], <2 x double> [[BLOCK256]]) |
| ; CHECK-NEXT: [[BLOCK260:%.*]] = shufflevector <2 x double> [[COL_LOAD252]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP350:%.*]] = extractelement <2 x double> [[COL_LOAD253]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT261:%.*]] = insertelement <2 x double> poison, double [[TMP350]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT262:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT261]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP351:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK260]], <2 x double> [[SPLAT_SPLAT262]], <2 x double> [[TMP349]]) |
| ; CHECK-NEXT: [[TMP352:%.*]] = shufflevector <2 x double> [[TMP351]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP353:%.*]] = shufflevector <2 x double> [[TMP290]], <2 x double> [[TMP352]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK263:%.*]] = shufflevector <2 x double> [[TMP345]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK264:%.*]] = shufflevector <2 x double> [[COL_LOAD251]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP354:%.*]] = extractelement <2 x double> [[COL_LOAD255]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT265:%.*]] = insertelement <2 x double> poison, double [[TMP354]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT266:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT265]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP355:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK264]], <2 x double> [[SPLAT_SPLAT266]], <2 x double> [[BLOCK263]]) |
| ; CHECK-NEXT: [[BLOCK267:%.*]] = shufflevector <2 x double> [[COL_LOAD252]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP356:%.*]] = extractelement <2 x double> [[COL_LOAD255]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT268:%.*]] = insertelement <2 x double> poison, double [[TMP356]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT269:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT268]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP357:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK267]], <2 x double> [[SPLAT_SPLAT269]], <2 x double> [[TMP355]]) |
| ; CHECK-NEXT: [[TMP372:%.*]] = shufflevector <2 x double> [[TMP357]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP373:%.*]] = shufflevector <2 x double> [[TMP345]], <2 x double> [[TMP372]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP374:%.*]] = getelementptr double, ptr [[A]], i64 26 |
| ; CHECK-NEXT: [[COL_LOAD270:%.*]] = load <2 x double>, ptr [[TMP374]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP271:%.*]] = getelementptr double, ptr [[TMP374]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD272:%.*]] = load <2 x double>, ptr [[VEC_GEP271]], align 8 |
| ; CHECK-NEXT: [[TMP375:%.*]] = getelementptr double, ptr [[B]], i64 16 |
| ; CHECK-NEXT: [[COL_LOAD273:%.*]] = load <2 x double>, ptr [[TMP375]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP274:%.*]] = getelementptr double, ptr [[TMP375]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD275:%.*]] = load <2 x double>, ptr [[VEC_GEP274]], align 8 |
| ; CHECK-NEXT: [[BLOCK278:%.*]] = shufflevector <2 x double> [[TMP353]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK277:%.*]] = shufflevector <2 x double> [[COL_LOAD270]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP376:%.*]] = extractelement <2 x double> [[COL_LOAD273]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT278:%.*]] = insertelement <2 x double> poison, double [[TMP376]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT279:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT278]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP377:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK277]], <2 x double> [[SPLAT_SPLAT279]], <2 x double> [[BLOCK278]]) |
| ; CHECK-NEXT: [[BLOCK280:%.*]] = shufflevector <2 x double> [[COL_LOAD272]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP378:%.*]] = extractelement <2 x double> [[COL_LOAD273]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT281:%.*]] = insertelement <2 x double> poison, double [[TMP378]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT282:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT281]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP379:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK280]], <2 x double> [[SPLAT_SPLAT282]], <2 x double> [[TMP377]]) |
| ; CHECK-NEXT: [[TMP380:%.*]] = shufflevector <2 x double> [[TMP379]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP194:%.*]] = shufflevector <2 x double> [[TMP353]], <2 x double> [[TMP380]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK283:%.*]] = shufflevector <2 x double> [[TMP373]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK284:%.*]] = shufflevector <2 x double> [[COL_LOAD270]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP208:%.*]] = extractelement <2 x double> [[COL_LOAD275]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT285:%.*]] = insertelement <2 x double> poison, double [[TMP208]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT286:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT285]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP209:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK284]], <2 x double> [[SPLAT_SPLAT286]], <2 x double> [[BLOCK283]]) |
| ; CHECK-NEXT: [[BLOCK287:%.*]] = shufflevector <2 x double> [[COL_LOAD272]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP210:%.*]] = extractelement <2 x double> [[COL_LOAD275]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT288:%.*]] = insertelement <2 x double> poison, double [[TMP210]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT289:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT288]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP211:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK287]], <2 x double> [[SPLAT_SPLAT289]], <2 x double> [[TMP209]]) |
| ; CHECK-NEXT: [[TMP212:%.*]] = shufflevector <2 x double> [[TMP211]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP200:%.*]] = shufflevector <2 x double> [[TMP373]], <2 x double> [[TMP212]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[VEC_GEP280:%.*]] = getelementptr double, ptr [[C]], i64 14 |
| ; CHECK-NEXT: store <2 x double> [[TMP194]], ptr [[VEC_GEP280]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP281:%.*]] = getelementptr double, ptr [[VEC_GEP280]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP200]], ptr [[VEC_GEP281]], align 8 |
| ; CHECK-NEXT: [[TMP202:%.*]] = getelementptr double, ptr [[A]], i64 4 |
| ; CHECK-NEXT: [[COL_LOAD291:%.*]] = load <2 x double>, ptr [[TMP202]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP283:%.*]] = getelementptr double, ptr [[TMP202]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD293:%.*]] = load <2 x double>, ptr [[VEC_GEP283]], align 8 |
| ; CHECK-NEXT: [[TMP203:%.*]] = getelementptr double, ptr [[B]], i64 12 |
| ; CHECK-NEXT: [[COL_LOAD294:%.*]] = load <2 x double>, ptr [[TMP203]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP290:%.*]] = getelementptr double, ptr [[TMP203]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD296:%.*]] = load <2 x double>, ptr [[VEC_GEP290]], align 8 |
| ; CHECK-NEXT: [[BLOCK292:%.*]] = shufflevector <2 x double> [[COL_LOAD291]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP204:%.*]] = extractelement <2 x double> [[COL_LOAD294]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT293:%.*]] = insertelement <2 x double> poison, double [[TMP204]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT294:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT293]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP205:%.*]] = fmul contract <2 x double> [[BLOCK292]], [[SPLAT_SPLAT294]] |
| ; CHECK-NEXT: [[BLOCK295:%.*]] = shufflevector <2 x double> [[COL_LOAD293]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP206:%.*]] = extractelement <2 x double> [[COL_LOAD294]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT296:%.*]] = insertelement <2 x double> poison, double [[TMP206]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT297:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT296]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP207:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK295]], <2 x double> [[SPLAT_SPLAT297]], <2 x double> [[TMP205]]) |
| ; CHECK-NEXT: [[TMP381:%.*]] = shufflevector <2 x double> [[TMP207]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP222:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP381]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK305:%.*]] = shufflevector <2 x double> [[COL_LOAD291]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP214:%.*]] = extractelement <2 x double> [[COL_LOAD296]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT306:%.*]] = insertelement <2 x double> poison, double [[TMP214]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT307:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT306]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP215:%.*]] = fmul contract <2 x double> [[BLOCK305]], [[SPLAT_SPLAT307]] |
| ; CHECK-NEXT: [[BLOCK308:%.*]] = shufflevector <2 x double> [[COL_LOAD293]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP216:%.*]] = extractelement <2 x double> [[COL_LOAD296]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT309:%.*]] = insertelement <2 x double> poison, double [[TMP216]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT310:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT309]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP217:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK308]], <2 x double> [[SPLAT_SPLAT310]], <2 x double> [[TMP215]]) |
| ; CHECK-NEXT: [[TMP382:%.*]] = shufflevector <2 x double> [[TMP217]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP228:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP382]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP229:%.*]] = getelementptr double, ptr [[A]], i64 16 |
| ; CHECK-NEXT: [[COL_LOAD309:%.*]] = load <2 x double>, ptr [[TMP229]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP310:%.*]] = getelementptr double, ptr [[TMP229]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD311:%.*]] = load <2 x double>, ptr [[VEC_GEP310]], align 8 |
| ; CHECK-NEXT: [[TMP230:%.*]] = getelementptr double, ptr [[B]], i64 14 |
| ; CHECK-NEXT: [[COL_LOAD312:%.*]] = load <2 x double>, ptr [[TMP230]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP313:%.*]] = getelementptr double, ptr [[TMP230]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD314:%.*]] = load <2 x double>, ptr [[VEC_GEP313]], align 8 |
| ; CHECK-NEXT: [[BLOCK315:%.*]] = shufflevector <2 x double> [[TMP222]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK316:%.*]] = shufflevector <2 x double> [[COL_LOAD309]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP218:%.*]] = extractelement <2 x double> [[COL_LOAD312]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT312:%.*]] = insertelement <2 x double> poison, double [[TMP218]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT313:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT312]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP219:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK316]], <2 x double> [[SPLAT_SPLAT313]], <2 x double> [[BLOCK315]]) |
| ; CHECK-NEXT: [[BLOCK314:%.*]] = shufflevector <2 x double> [[COL_LOAD311]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP220:%.*]] = extractelement <2 x double> [[COL_LOAD312]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT315:%.*]] = insertelement <2 x double> poison, double [[TMP220]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT316:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT315]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP221:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK314]], <2 x double> [[SPLAT_SPLAT316]], <2 x double> [[TMP219]]) |
| ; CHECK-NEXT: [[TMP383:%.*]] = shufflevector <2 x double> [[TMP221]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP384:%.*]] = shufflevector <2 x double> [[TMP222]], <2 x double> [[TMP383]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK322:%.*]] = shufflevector <2 x double> [[TMP228]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK323:%.*]] = shufflevector <2 x double> [[COL_LOAD309]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP224:%.*]] = extractelement <2 x double> [[COL_LOAD314]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT318:%.*]] = insertelement <2 x double> poison, double [[TMP224]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT325:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT318]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP385:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK323]], <2 x double> [[SPLAT_SPLAT325]], <2 x double> [[BLOCK322]]) |
| ; CHECK-NEXT: [[BLOCK326:%.*]] = shufflevector <2 x double> [[COL_LOAD311]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP386:%.*]] = extractelement <2 x double> [[COL_LOAD314]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT327:%.*]] = insertelement <2 x double> poison, double [[TMP386]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT328:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT327]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP387:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK326]], <2 x double> [[SPLAT_SPLAT328]], <2 x double> [[TMP385]]) |
| ; CHECK-NEXT: [[TMP388:%.*]] = shufflevector <2 x double> [[TMP387]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP242:%.*]] = shufflevector <2 x double> [[TMP228]], <2 x double> [[TMP388]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP243:%.*]] = getelementptr double, ptr [[A]], i64 28 |
| ; CHECK-NEXT: [[COL_LOAD329:%.*]] = load <2 x double>, ptr [[TMP243]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP330:%.*]] = getelementptr double, ptr [[TMP243]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD331:%.*]] = load <2 x double>, ptr [[VEC_GEP330]], align 8 |
| ; CHECK-NEXT: [[TMP389:%.*]] = getelementptr double, ptr [[B]], i64 16 |
| ; CHECK-NEXT: [[COL_LOAD332:%.*]] = load <2 x double>, ptr [[TMP389]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP333:%.*]] = getelementptr double, ptr [[TMP389]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD334:%.*]] = load <2 x double>, ptr [[VEC_GEP333]], align 8 |
| ; CHECK-NEXT: [[BLOCK335:%.*]] = shufflevector <2 x double> [[TMP384]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK336:%.*]] = shufflevector <2 x double> [[COL_LOAD329]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP390:%.*]] = extractelement <2 x double> [[COL_LOAD332]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT338:%.*]] = insertelement <2 x double> poison, double [[TMP390]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT339:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT338]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP391:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK336]], <2 x double> [[SPLAT_SPLAT339]], <2 x double> [[BLOCK335]]) |
| ; CHECK-NEXT: [[BLOCK340:%.*]] = shufflevector <2 x double> [[COL_LOAD331]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP392:%.*]] = extractelement <2 x double> [[COL_LOAD332]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT341:%.*]] = insertelement <2 x double> poison, double [[TMP392]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT342:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT341]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP393:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK340]], <2 x double> [[SPLAT_SPLAT342]], <2 x double> [[TMP391]]) |
| ; CHECK-NEXT: [[TMP394:%.*]] = shufflevector <2 x double> [[TMP393]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP250:%.*]] = shufflevector <2 x double> [[TMP384]], <2 x double> [[TMP394]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK342:%.*]] = shufflevector <2 x double> [[TMP242]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK343:%.*]] = shufflevector <2 x double> [[COL_LOAD329]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP251:%.*]] = extractelement <2 x double> [[COL_LOAD334]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT344:%.*]] = insertelement <2 x double> poison, double [[TMP251]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT345:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT344]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP395:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK343]], <2 x double> [[SPLAT_SPLAT345]], <2 x double> [[BLOCK342]]) |
| ; CHECK-NEXT: [[BLOCK346:%.*]] = shufflevector <2 x double> [[COL_LOAD331]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP396:%.*]] = extractelement <2 x double> [[COL_LOAD334]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT347:%.*]] = insertelement <2 x double> poison, double [[TMP396]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT348:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT347]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP397:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK346]], <2 x double> [[SPLAT_SPLAT348]], <2 x double> [[TMP395]]) |
| ; CHECK-NEXT: [[TMP398:%.*]] = shufflevector <2 x double> [[TMP397]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP256:%.*]] = shufflevector <2 x double> [[TMP242]], <2 x double> [[TMP398]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP257:%.*]] = getelementptr double, ptr [[C]], i64 16 |
| ; CHECK-NEXT: store <2 x double> [[TMP250]], ptr [[TMP257]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP349:%.*]] = getelementptr double, ptr [[TMP257]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP256]], ptr [[VEC_GEP349]], align 8 |
| ; CHECK-NEXT: [[TMP399:%.*]] = getelementptr double, ptr [[A]], i64 0 |
| ; CHECK-NEXT: [[COL_LOAD350:%.*]] = load <2 x double>, ptr [[TMP399]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP351:%.*]] = getelementptr double, ptr [[TMP399]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD352:%.*]] = load <2 x double>, ptr [[VEC_GEP351]], align 8 |
| ; CHECK-NEXT: [[TMP400:%.*]] = getelementptr double, ptr [[B]], i64 24 |
| ; CHECK-NEXT: [[COL_LOAD353:%.*]] = load <2 x double>, ptr [[TMP400]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP354:%.*]] = getelementptr double, ptr [[TMP400]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD355:%.*]] = load <2 x double>, ptr [[VEC_GEP354]], align 8 |
| ; CHECK-NEXT: [[BLOCK317:%.*]] = shufflevector <2 x double> [[COL_LOAD350]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP401:%.*]] = extractelement <2 x double> [[COL_LOAD353]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT358:%.*]] = insertelement <2 x double> poison, double [[TMP401]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT319:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT358]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP225:%.*]] = fmul contract <2 x double> [[BLOCK317]], [[SPLAT_SPLAT319]] |
| ; CHECK-NEXT: [[BLOCK320:%.*]] = shufflevector <2 x double> [[COL_LOAD352]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP226:%.*]] = extractelement <2 x double> [[COL_LOAD353]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT321:%.*]] = insertelement <2 x double> poison, double [[TMP226]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT322:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT321]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP227:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK320]], <2 x double> [[SPLAT_SPLAT322]], <2 x double> [[TMP225]]) |
| ; CHECK-NEXT: [[TMP402:%.*]] = shufflevector <2 x double> [[TMP227]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP403:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP402]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK330:%.*]] = shufflevector <2 x double> [[COL_LOAD350]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP234:%.*]] = extractelement <2 x double> [[COL_LOAD355]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT331:%.*]] = insertelement <2 x double> poison, double [[TMP234]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT332:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT331]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP235:%.*]] = fmul contract <2 x double> [[BLOCK330]], [[SPLAT_SPLAT332]] |
| ; CHECK-NEXT: [[BLOCK333:%.*]] = shufflevector <2 x double> [[COL_LOAD352]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP236:%.*]] = extractelement <2 x double> [[COL_LOAD355]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT334:%.*]] = insertelement <2 x double> poison, double [[TMP236]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT335:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT334]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP237:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK333]], <2 x double> [[SPLAT_SPLAT335]], <2 x double> [[TMP235]]) |
| ; CHECK-NEXT: [[TMP404:%.*]] = shufflevector <2 x double> [[TMP237]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP405:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP404]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP406:%.*]] = getelementptr double, ptr [[A]], i64 12 |
| ; CHECK-NEXT: [[COL_LOAD368:%.*]] = load <2 x double>, ptr [[TMP406]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP369:%.*]] = getelementptr double, ptr [[TMP406]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD370:%.*]] = load <2 x double>, ptr [[VEC_GEP369]], align 8 |
| ; CHECK-NEXT: [[TMP407:%.*]] = getelementptr double, ptr [[B]], i64 26 |
| ; CHECK-NEXT: [[COL_LOAD371:%.*]] = load <2 x double>, ptr [[TMP407]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP372:%.*]] = getelementptr double, ptr [[TMP407]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD373:%.*]] = load <2 x double>, ptr [[VEC_GEP372]], align 8 |
| ; CHECK-NEXT: [[BLOCK374:%.*]] = shufflevector <2 x double> [[TMP403]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK375:%.*]] = shufflevector <2 x double> [[COL_LOAD368]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP238:%.*]] = extractelement <2 x double> [[COL_LOAD371]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT337:%.*]] = insertelement <2 x double> poison, double [[TMP238]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT338:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT337]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP239:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK375]], <2 x double> [[SPLAT_SPLAT338]], <2 x double> [[BLOCK374]]) |
| ; CHECK-NEXT: [[BLOCK339:%.*]] = shufflevector <2 x double> [[COL_LOAD370]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP240:%.*]] = extractelement <2 x double> [[COL_LOAD371]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT340:%.*]] = insertelement <2 x double> poison, double [[TMP240]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT341:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT340]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP241:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK339]], <2 x double> [[SPLAT_SPLAT341]], <2 x double> [[TMP239]]) |
| ; CHECK-NEXT: [[TMP278:%.*]] = shufflevector <2 x double> [[TMP241]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP279:%.*]] = shufflevector <2 x double> [[TMP403]], <2 x double> [[TMP278]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK381:%.*]] = shufflevector <2 x double> [[TMP405]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK382:%.*]] = shufflevector <2 x double> [[COL_LOAD368]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP280:%.*]] = extractelement <2 x double> [[COL_LOAD373]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT383:%.*]] = insertelement <2 x double> poison, double [[TMP280]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT384:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT383]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP408:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK382]], <2 x double> [[SPLAT_SPLAT384]], <2 x double> [[BLOCK381]]) |
| ; CHECK-NEXT: [[BLOCK385:%.*]] = shufflevector <2 x double> [[COL_LOAD370]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP409:%.*]] = extractelement <2 x double> [[COL_LOAD373]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT386:%.*]] = insertelement <2 x double> poison, double [[TMP409]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT387:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT386]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP410:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK385]], <2 x double> [[SPLAT_SPLAT387]], <2 x double> [[TMP408]]) |
| ; CHECK-NEXT: [[TMP411:%.*]] = shufflevector <2 x double> [[TMP410]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP412:%.*]] = shufflevector <2 x double> [[TMP405]], <2 x double> [[TMP411]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP244:%.*]] = getelementptr double, ptr [[A]], i64 24 |
| ; CHECK-NEXT: [[COL_LOAD388:%.*]] = load <2 x double>, ptr [[TMP244]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP343:%.*]] = getelementptr double, ptr [[TMP244]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD390:%.*]] = load <2 x double>, ptr [[VEC_GEP343]], align 8 |
| ; CHECK-NEXT: [[TMP245:%.*]] = getelementptr double, ptr [[B]], i64 28 |
| ; CHECK-NEXT: [[COL_LOAD345:%.*]] = load <2 x double>, ptr [[TMP245]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP346:%.*]] = getelementptr double, ptr [[TMP245]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD393:%.*]] = load <2 x double>, ptr [[VEC_GEP346]], align 8 |
| ; CHECK-NEXT: [[BLOCK348:%.*]] = shufflevector <2 x double> [[TMP279]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK364:%.*]] = shufflevector <2 x double> [[COL_LOAD388]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP246:%.*]] = extractelement <2 x double> [[COL_LOAD345]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT350:%.*]] = insertelement <2 x double> poison, double [[TMP246]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT351:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT350]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP247:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK364]], <2 x double> [[SPLAT_SPLAT351]], <2 x double> [[BLOCK348]]) |
| ; CHECK-NEXT: [[BLOCK352:%.*]] = shufflevector <2 x double> [[COL_LOAD390]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP248:%.*]] = extractelement <2 x double> [[COL_LOAD345]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT353:%.*]] = insertelement <2 x double> poison, double [[TMP248]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT354:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT353]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP249:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK352]], <2 x double> [[SPLAT_SPLAT354]], <2 x double> [[TMP247]]) |
| ; CHECK-NEXT: [[TMP413:%.*]] = shufflevector <2 x double> [[TMP249]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP414:%.*]] = shufflevector <2 x double> [[TMP279]], <2 x double> [[TMP413]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK355:%.*]] = shufflevector <2 x double> [[TMP412]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK356:%.*]] = shufflevector <2 x double> [[COL_LOAD388]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP252:%.*]] = extractelement <2 x double> [[COL_LOAD393]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT357:%.*]] = insertelement <2 x double> poison, double [[TMP252]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT358:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT357]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP253:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK356]], <2 x double> [[SPLAT_SPLAT358]], <2 x double> [[BLOCK355]]) |
| ; CHECK-NEXT: [[BLOCK359:%.*]] = shufflevector <2 x double> [[COL_LOAD390]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP254:%.*]] = extractelement <2 x double> [[COL_LOAD393]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT360:%.*]] = insertelement <2 x double> poison, double [[TMP254]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT361:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT360]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP255:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK359]], <2 x double> [[SPLAT_SPLAT361]], <2 x double> [[TMP253]]) |
| ; CHECK-NEXT: [[TMP415:%.*]] = shufflevector <2 x double> [[TMP255]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP416:%.*]] = shufflevector <2 x double> [[TMP412]], <2 x double> [[TMP415]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP417:%.*]] = getelementptr double, ptr [[C]], i64 24 |
| ; CHECK-NEXT: store <2 x double> [[TMP414]], ptr [[TMP417]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP408:%.*]] = getelementptr double, ptr [[TMP417]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP416]], ptr [[VEC_GEP408]], align 8 |
| ; CHECK-NEXT: [[TMP418:%.*]] = getelementptr double, ptr [[A]], i64 2 |
| ; CHECK-NEXT: [[COL_LOAD409:%.*]] = load <2 x double>, ptr [[TMP418]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP410:%.*]] = getelementptr double, ptr [[TMP418]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD412:%.*]] = load <2 x double>, ptr [[VEC_GEP410]], align 8 |
| ; CHECK-NEXT: [[TMP419:%.*]] = getelementptr double, ptr [[B]], i64 24 |
| ; CHECK-NEXT: [[COL_LOAD347:%.*]] = load <2 x double>, ptr [[TMP419]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP413:%.*]] = getelementptr double, ptr [[TMP419]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD415:%.*]] = load <2 x double>, ptr [[VEC_GEP413]], align 8 |
| ; CHECK-NEXT: [[BLOCK415:%.*]] = shufflevector <2 x double> [[COL_LOAD409]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP258:%.*]] = extractelement <2 x double> [[COL_LOAD347]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT364:%.*]] = insertelement <2 x double> poison, double [[TMP258]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT365:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT364]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP259:%.*]] = fmul contract <2 x double> [[BLOCK415]], [[SPLAT_SPLAT365]] |
| ; CHECK-NEXT: [[BLOCK366:%.*]] = shufflevector <2 x double> [[COL_LOAD412]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP260:%.*]] = extractelement <2 x double> [[COL_LOAD347]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT367:%.*]] = insertelement <2 x double> poison, double [[TMP260]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT368:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT367]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP261:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK366]], <2 x double> [[SPLAT_SPLAT368]], <2 x double> [[TMP259]]) |
| ; CHECK-NEXT: [[TMP420:%.*]] = shufflevector <2 x double> [[TMP261]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP421:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP420]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK422:%.*]] = shufflevector <2 x double> [[COL_LOAD409]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP264:%.*]] = extractelement <2 x double> [[COL_LOAD415]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT371:%.*]] = insertelement <2 x double> poison, double [[TMP264]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT372:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT371]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP310:%.*]] = fmul contract <2 x double> [[BLOCK422]], [[SPLAT_SPLAT372]] |
| ; CHECK-NEXT: [[BLOCK426:%.*]] = shufflevector <2 x double> [[COL_LOAD412]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP311:%.*]] = extractelement <2 x double> [[COL_LOAD415]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT425:%.*]] = insertelement <2 x double> poison, double [[TMP311]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT426:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT425]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP312:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK426]], <2 x double> [[SPLAT_SPLAT426]], <2 x double> [[TMP310]]) |
| ; CHECK-NEXT: [[TMP313:%.*]] = shufflevector <2 x double> [[TMP312]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP314:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP313]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP315:%.*]] = getelementptr double, ptr [[A]], i64 14 |
| ; CHECK-NEXT: [[COL_LOAD427:%.*]] = load <2 x double>, ptr [[TMP315]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP428:%.*]] = getelementptr double, ptr [[TMP315]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD429:%.*]] = load <2 x double>, ptr [[VEC_GEP428]], align 8 |
| ; CHECK-NEXT: [[TMP316:%.*]] = getelementptr double, ptr [[B]], i64 26 |
| ; CHECK-NEXT: [[COL_LOAD430:%.*]] = load <2 x double>, ptr [[TMP316]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP432:%.*]] = getelementptr double, ptr [[TMP316]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD432:%.*]] = load <2 x double>, ptr [[VEC_GEP432]], align 8 |
| ; CHECK-NEXT: [[TMP265:%.*]] = shufflevector <2 x double> [[TMP421]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK373:%.*]] = shufflevector <2 x double> [[COL_LOAD427]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP266:%.*]] = extractelement <2 x double> [[COL_LOAD430]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT374:%.*]] = insertelement <2 x double> poison, double [[TMP266]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT375:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT374]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP267:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK373]], <2 x double> [[SPLAT_SPLAT375]], <2 x double> [[TMP265]]) |
| ; CHECK-NEXT: [[BLOCK437:%.*]] = shufflevector <2 x double> [[COL_LOAD429]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP319:%.*]] = extractelement <2 x double> [[COL_LOAD430]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT438:%.*]] = insertelement <2 x double> poison, double [[TMP319]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT439:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT438]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP320:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK437]], <2 x double> [[SPLAT_SPLAT439]], <2 x double> [[TMP267]]) |
| ; CHECK-NEXT: [[TMP321:%.*]] = shufflevector <2 x double> [[TMP320]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP322:%.*]] = shufflevector <2 x double> [[TMP421]], <2 x double> [[TMP321]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK440:%.*]] = shufflevector <2 x double> [[TMP314]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK441:%.*]] = shufflevector <2 x double> [[COL_LOAD427]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP323:%.*]] = extractelement <2 x double> [[COL_LOAD432]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT442:%.*]] = insertelement <2 x double> poison, double [[TMP323]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT443:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT442]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP324:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK441]], <2 x double> [[SPLAT_SPLAT443]], <2 x double> [[BLOCK440]]) |
| ; CHECK-NEXT: [[BLOCK444:%.*]] = shufflevector <2 x double> [[COL_LOAD429]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP325:%.*]] = extractelement <2 x double> [[COL_LOAD432]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT445:%.*]] = insertelement <2 x double> poison, double [[TMP325]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT446:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT445]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP326:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK444]], <2 x double> [[SPLAT_SPLAT446]], <2 x double> [[TMP324]]) |
| ; CHECK-NEXT: [[TMP327:%.*]] = shufflevector <2 x double> [[TMP326]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP328:%.*]] = shufflevector <2 x double> [[TMP314]], <2 x double> [[TMP327]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP270:%.*]] = getelementptr double, ptr [[A]], i64 26 |
| ; CHECK-NEXT: [[COL_LOAD447:%.*]] = load <2 x double>, ptr [[TMP270]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP376:%.*]] = getelementptr double, ptr [[TMP270]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD449:%.*]] = load <2 x double>, ptr [[VEC_GEP376]], align 8 |
| ; CHECK-NEXT: [[TMP330:%.*]] = getelementptr double, ptr [[B]], i64 28 |
| ; CHECK-NEXT: [[COL_LOAD450:%.*]] = load <2 x double>, ptr [[TMP330]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP451:%.*]] = getelementptr double, ptr [[TMP330]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD452:%.*]] = load <2 x double>, ptr [[VEC_GEP451]], align 8 |
| ; CHECK-NEXT: [[BLOCK453:%.*]] = shufflevector <2 x double> [[TMP322]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK454:%.*]] = shufflevector <2 x double> [[COL_LOAD447]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP331:%.*]] = extractelement <2 x double> [[COL_LOAD450]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT455:%.*]] = insertelement <2 x double> poison, double [[TMP331]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT456:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT455]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP332:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK454]], <2 x double> [[SPLAT_SPLAT456]], <2 x double> [[BLOCK453]]) |
| ; CHECK-NEXT: [[BLOCK457:%.*]] = shufflevector <2 x double> [[COL_LOAD449]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP333:%.*]] = extractelement <2 x double> [[COL_LOAD450]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT458:%.*]] = insertelement <2 x double> poison, double [[TMP333]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT459:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT458]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP334:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK457]], <2 x double> [[SPLAT_SPLAT459]], <2 x double> [[TMP332]]) |
| ; CHECK-NEXT: [[TMP335:%.*]] = shufflevector <2 x double> [[TMP334]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP336:%.*]] = shufflevector <2 x double> [[TMP322]], <2 x double> [[TMP335]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK460:%.*]] = shufflevector <2 x double> [[TMP328]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK461:%.*]] = shufflevector <2 x double> [[COL_LOAD447]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP337:%.*]] = extractelement <2 x double> [[COL_LOAD452]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT462:%.*]] = insertelement <2 x double> poison, double [[TMP337]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT463:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT462]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP338:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK461]], <2 x double> [[SPLAT_SPLAT463]], <2 x double> [[BLOCK460]]) |
| ; CHECK-NEXT: [[BLOCK464:%.*]] = shufflevector <2 x double> [[COL_LOAD449]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP339:%.*]] = extractelement <2 x double> [[COL_LOAD452]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT465:%.*]] = insertelement <2 x double> poison, double [[TMP339]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT466:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT465]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP340:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK464]], <2 x double> [[SPLAT_SPLAT466]], <2 x double> [[TMP338]]) |
| ; CHECK-NEXT: [[TMP341:%.*]] = shufflevector <2 x double> [[TMP340]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP342:%.*]] = shufflevector <2 x double> [[TMP328]], <2 x double> [[TMP341]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP343:%.*]] = getelementptr double, ptr [[C]], i64 26 |
| ; CHECK-NEXT: store <2 x double> [[TMP336]], ptr [[TMP343]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP467:%.*]] = getelementptr double, ptr [[TMP343]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP342]], ptr [[VEC_GEP467]], align 8 |
| ; CHECK-NEXT: [[TMP271:%.*]] = getelementptr double, ptr [[A]], i64 4 |
| ; CHECK-NEXT: [[COL_LOAD377:%.*]] = load <2 x double>, ptr [[TMP271]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP378:%.*]] = getelementptr double, ptr [[TMP271]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD379:%.*]] = load <2 x double>, ptr [[VEC_GEP378]], align 8 |
| ; CHECK-NEXT: [[TMP272:%.*]] = getelementptr double, ptr [[B]], i64 24 |
| ; CHECK-NEXT: [[COL_LOAD471:%.*]] = load <2 x double>, ptr [[TMP272]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP385:%.*]] = getelementptr double, ptr [[TMP272]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD473:%.*]] = load <2 x double>, ptr [[VEC_GEP385]], align 8 |
| ; CHECK-NEXT: [[BLOCK387:%.*]] = shufflevector <2 x double> [[COL_LOAD377]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP273:%.*]] = extractelement <2 x double> [[COL_LOAD471]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT388:%.*]] = insertelement <2 x double> poison, double [[TMP273]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT389:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT388]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP274:%.*]] = fmul contract <2 x double> [[BLOCK387]], [[SPLAT_SPLAT389]] |
| ; CHECK-NEXT: [[BLOCK390:%.*]] = shufflevector <2 x double> [[COL_LOAD379]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP275:%.*]] = extractelement <2 x double> [[COL_LOAD471]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT391:%.*]] = insertelement <2 x double> poison, double [[TMP275]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT392:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT391]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP276:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK390]], <2 x double> [[SPLAT_SPLAT392]], <2 x double> [[TMP274]]) |
| ; CHECK-NEXT: [[TMP281:%.*]] = shufflevector <2 x double> [[TMP276]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP282:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP281]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK399:%.*]] = shufflevector <2 x double> [[COL_LOAD377]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP283:%.*]] = extractelement <2 x double> [[COL_LOAD473]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT400:%.*]] = insertelement <2 x double> poison, double [[TMP283]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT401:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT400]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP284:%.*]] = fmul contract <2 x double> [[BLOCK399]], [[SPLAT_SPLAT401]] |
| ; CHECK-NEXT: [[BLOCK402:%.*]] = shufflevector <2 x double> [[COL_LOAD379]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP285:%.*]] = extractelement <2 x double> [[COL_LOAD473]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT403:%.*]] = insertelement <2 x double> poison, double [[TMP285]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT404:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT403]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP286:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK402]], <2 x double> [[SPLAT_SPLAT404]], <2 x double> [[TMP284]]) |
| ; CHECK-NEXT: [[TMP291:%.*]] = shufflevector <2 x double> [[TMP286]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP292:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP291]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP358:%.*]] = getelementptr double, ptr [[A]], i64 16 |
| ; CHECK-NEXT: [[COL_LOAD486:%.*]] = load <2 x double>, ptr [[TMP358]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP487:%.*]] = getelementptr double, ptr [[TMP358]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD488:%.*]] = load <2 x double>, ptr [[VEC_GEP487]], align 8 |
| ; CHECK-NEXT: [[TMP359:%.*]] = getelementptr double, ptr [[B]], i64 26 |
| ; CHECK-NEXT: [[COL_LOAD489:%.*]] = load <2 x double>, ptr [[TMP359]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP490:%.*]] = getelementptr double, ptr [[TMP359]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD491:%.*]] = load <2 x double>, ptr [[VEC_GEP490]], align 8 |
| ; CHECK-NEXT: [[BLOCK492:%.*]] = shufflevector <2 x double> [[TMP282]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK493:%.*]] = shufflevector <2 x double> [[COL_LOAD486]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP360:%.*]] = extractelement <2 x double> [[COL_LOAD489]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT494:%.*]] = insertelement <2 x double> poison, double [[TMP360]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT495:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT494]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP361:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK493]], <2 x double> [[SPLAT_SPLAT495]], <2 x double> [[BLOCK492]]) |
| ; CHECK-NEXT: [[BLOCK496:%.*]] = shufflevector <2 x double> [[COL_LOAD488]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP362:%.*]] = extractelement <2 x double> [[COL_LOAD489]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT497:%.*]] = insertelement <2 x double> poison, double [[TMP362]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT498:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT497]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP363:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK496]], <2 x double> [[SPLAT_SPLAT498]], <2 x double> [[TMP361]]) |
| ; CHECK-NEXT: [[TMP364:%.*]] = shufflevector <2 x double> [[TMP363]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP365:%.*]] = shufflevector <2 x double> [[TMP282]], <2 x double> [[TMP364]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK499:%.*]] = shufflevector <2 x double> [[TMP292]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK500:%.*]] = shufflevector <2 x double> [[COL_LOAD486]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP366:%.*]] = extractelement <2 x double> [[COL_LOAD491]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT501:%.*]] = insertelement <2 x double> poison, double [[TMP366]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT502:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT501]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP367:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK500]], <2 x double> [[SPLAT_SPLAT502]], <2 x double> [[BLOCK499]]) |
| ; CHECK-NEXT: [[BLOCK503:%.*]] = shufflevector <2 x double> [[COL_LOAD488]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP368:%.*]] = extractelement <2 x double> [[COL_LOAD491]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT504:%.*]] = insertelement <2 x double> poison, double [[TMP368]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT505:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT504]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP369:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK503]], <2 x double> [[SPLAT_SPLAT505]], <2 x double> [[TMP367]]) |
| ; CHECK-NEXT: [[TMP370:%.*]] = shufflevector <2 x double> [[TMP369]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP371:%.*]] = shufflevector <2 x double> [[TMP292]], <2 x double> [[TMP370]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP293:%.*]] = getelementptr double, ptr [[A]], i64 28 |
| ; CHECK-NEXT: [[COL_LOAD411:%.*]] = load <2 x double>, ptr [[TMP293]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP412:%.*]] = getelementptr double, ptr [[TMP293]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD413:%.*]] = load <2 x double>, ptr [[VEC_GEP412]], align 8 |
| ; CHECK-NEXT: [[TMP294:%.*]] = getelementptr double, ptr [[B]], i64 28 |
| ; CHECK-NEXT: [[COL_LOAD414:%.*]] = load <2 x double>, ptr [[TMP294]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP415:%.*]] = getelementptr double, ptr [[TMP294]], i64 6 |
| ; CHECK-NEXT: [[COL_LOAD416:%.*]] = load <2 x double>, ptr [[VEC_GEP415]], align 8 |
| ; CHECK-NEXT: [[BLOCK417:%.*]] = shufflevector <2 x double> [[TMP365]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK418:%.*]] = shufflevector <2 x double> [[COL_LOAD411]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP295:%.*]] = extractelement <2 x double> [[COL_LOAD414]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT419:%.*]] = insertelement <2 x double> poison, double [[TMP295]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT420:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT419]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP296:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK418]], <2 x double> [[SPLAT_SPLAT420]], <2 x double> [[BLOCK417]]) |
| ; CHECK-NEXT: [[BLOCK421:%.*]] = shufflevector <2 x double> [[COL_LOAD413]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP297:%.*]] = extractelement <2 x double> [[COL_LOAD414]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT422:%.*]] = insertelement <2 x double> poison, double [[TMP297]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT423:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT422]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP298:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK421]], <2 x double> [[SPLAT_SPLAT423]], <2 x double> [[TMP296]]) |
| ; CHECK-NEXT: [[TMP299:%.*]] = shufflevector <2 x double> [[TMP298]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP300:%.*]] = shufflevector <2 x double> [[TMP365]], <2 x double> [[TMP299]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK424:%.*]] = shufflevector <2 x double> [[TMP371]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK425:%.*]] = shufflevector <2 x double> [[COL_LOAD411]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP301:%.*]] = extractelement <2 x double> [[COL_LOAD416]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT426:%.*]] = insertelement <2 x double> poison, double [[TMP301]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT427:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT426]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP302:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK425]], <2 x double> [[SPLAT_SPLAT427]], <2 x double> [[BLOCK424]]) |
| ; CHECK-NEXT: [[BLOCK428:%.*]] = shufflevector <2 x double> [[COL_LOAD413]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP303:%.*]] = extractelement <2 x double> [[COL_LOAD416]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT429:%.*]] = insertelement <2 x double> poison, double [[TMP303]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT430:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT429]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP304:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK428]], <2 x double> [[SPLAT_SPLAT430]], <2 x double> [[TMP302]]) |
| ; CHECK-NEXT: [[TMP305:%.*]] = shufflevector <2 x double> [[TMP304]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP306:%.*]] = shufflevector <2 x double> [[TMP371]], <2 x double> [[TMP305]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[TMP307:%.*]] = getelementptr double, ptr [[C]], i64 28 |
| ; CHECK-NEXT: store <2 x double> [[TMP300]], ptr [[TMP307]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP431:%.*]] = getelementptr double, ptr [[TMP307]], i64 6 |
| ; CHECK-NEXT: store <2 x double> [[TMP306]], ptr [[VEC_GEP431]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %a = load <36 x double>, ptr %A, align 8 |
| %b = load <36 x double>, ptr %B, align 8 |
| %c = call <36 x double> @llvm.matrix.multiply.v36f64.v36f64.v36f64(<36 x double> %a, <36 x double> %b, i32 6, i32 6, i32 6) |
| store <36 x double> %c, ptr %C, align 8 |
| ret void |
| } |
| |
| define void @multiply_8x8x8(ptr noalias %A, ptr noalias %B, ptr noalias %C) { |
| ; CHECK-LABEL: define void @multiply_8x8x8( |
| ; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[COLS_HEADER:.*]] |
| ; CHECK: [[COLS_HEADER]]: |
| ; CHECK-NEXT: [[COLS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[COLS_STEP:%.*]], %[[COLS_LATCH:.*]] ] |
| ; CHECK-NEXT: br label %[[COLS_BODY:.*]] |
| ; CHECK: [[COLS_BODY]]: |
| ; CHECK-NEXT: br label %[[ROWS_HEADER:.*]] |
| ; CHECK: [[ROWS_HEADER]]: |
| ; CHECK-NEXT: [[ROWS_IV:%.*]] = phi i64 [ 0, %[[COLS_BODY]] ], [ [[ROWS_STEP:%.*]], %[[ROWS_LATCH:.*]] ] |
| ; CHECK-NEXT: br label %[[ROWS_BODY:.*]] |
| ; CHECK: [[ROWS_BODY]]: |
| ; CHECK-NEXT: br label %[[INNER_HEADER:.*]] |
| ; CHECK: [[INNER_HEADER]]: |
| ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, %[[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], %[[INNER_LATCH:.*]] ] |
| ; CHECK-NEXT: [[TMP912:%.*]] = phi <2 x double> [ zeroinitializer, %[[ROWS_BODY]] ], [ [[TMP921:%.*]], %[[INNER_LATCH]] ] |
| ; CHECK-NEXT: [[TMP913:%.*]] = phi <2 x double> [ zeroinitializer, %[[ROWS_BODY]] ], [ [[TMP927:%.*]], %[[INNER_LATCH]] ] |
| ; CHECK-NEXT: br label %[[INNER_BODY:.*]] |
| ; CHECK: [[INNER_BODY]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 8 |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] |
| ; CHECK-NEXT: [[TMP914:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP1]] |
| ; CHECK-NEXT: [[COL_LOAD1240:%.*]] = load <2 x double>, ptr [[TMP914]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP1241:%.*]] = getelementptr double, ptr [[TMP914]], i64 8 |
| ; CHECK-NEXT: [[COL_LOAD1243:%.*]] = load <2 x double>, ptr [[VEC_GEP1241]], align 8 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[COLS_IV]], 8 |
| ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] |
| ; CHECK-NEXT: [[TMP915:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP4]] |
| ; CHECK-NEXT: [[COL_LOAD1245:%.*]] = load <2 x double>, ptr [[TMP915]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP1244:%.*]] = getelementptr double, ptr [[TMP915]], i64 8 |
| ; CHECK-NEXT: [[COL_LOAD1246:%.*]] = load <2 x double>, ptr [[VEC_GEP1244]], align 8 |
| ; CHECK-NEXT: [[BLOCK1247:%.*]] = shufflevector <2 x double> [[TMP912]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK1248:%.*]] = shufflevector <2 x double> [[COL_LOAD1240]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP916:%.*]] = extractelement <2 x double> [[COL_LOAD1245]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT1248:%.*]] = insertelement <2 x double> poison, double [[TMP916]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT1249:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT1248]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP917:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK1248]], <2 x double> [[SPLAT_SPLAT1249]], <2 x double> [[BLOCK1247]]) |
| ; CHECK-NEXT: [[BLOCK1250:%.*]] = shufflevector <2 x double> [[COL_LOAD1243]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP918:%.*]] = extractelement <2 x double> [[COL_LOAD1245]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT1251:%.*]] = insertelement <2 x double> poison, double [[TMP918]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT1252:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT1251]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP919:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK1250]], <2 x double> [[SPLAT_SPLAT1252]], <2 x double> [[TMP917]]) |
| ; CHECK-NEXT: [[TMP920:%.*]] = shufflevector <2 x double> [[TMP919]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP921]] = shufflevector <2 x double> [[TMP912]], <2 x double> [[TMP920]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[BLOCK1254:%.*]] = shufflevector <2 x double> [[TMP913]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[BLOCK1255:%.*]] = shufflevector <2 x double> [[COL_LOAD1240]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP922:%.*]] = extractelement <2 x double> [[COL_LOAD1246]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT1255:%.*]] = insertelement <2 x double> poison, double [[TMP922]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT1256:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT1255]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP923:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK1255]], <2 x double> [[SPLAT_SPLAT1256]], <2 x double> [[BLOCK1254]]) |
| ; CHECK-NEXT: [[BLOCK1257:%.*]] = shufflevector <2 x double> [[COL_LOAD1243]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP924:%.*]] = extractelement <2 x double> [[COL_LOAD1246]], i64 1 |
| ; CHECK-NEXT: [[SPLAT_SPLATINSERT1258:%.*]] = insertelement <2 x double> poison, double [[TMP924]], i64 0 |
| ; CHECK-NEXT: [[SPLAT_SPLAT1259:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT1258]], <2 x double> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP925:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK1257]], <2 x double> [[SPLAT_SPLAT1259]], <2 x double> [[TMP923]]) |
| ; CHECK-NEXT: [[TMP926:%.*]] = shufflevector <2 x double> [[TMP925]], <2 x double> poison, <2 x i32> <i32 0, i32 1> |
| ; CHECK-NEXT: [[TMP927]] = shufflevector <2 x double> [[TMP913]], <2 x double> [[TMP926]], <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: br label %[[INNER_LATCH]] |
| ; CHECK: [[INNER_LATCH]]: |
| ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 |
| ; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 8 |
| ; CHECK-NEXT: br i1 [[INNER_COND]], label %[[INNER_HEADER]], label %[[ROWS_LATCH]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] |
| ; CHECK: [[ROWS_LATCH]]: |
| ; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 |
| ; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 8 |
| ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[COLS_IV]], 8 |
| ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[ROWS_IV]] |
| ; CHECK-NEXT: [[TMP928:%.*]] = getelementptr double, ptr [[C]], i64 [[TMP19]] |
| ; CHECK-NEXT: store <2 x double> [[TMP921]], ptr [[TMP928]], align 8 |
| ; CHECK-NEXT: [[VEC_GEP1260:%.*]] = getelementptr double, ptr [[TMP928]], i64 8 |
| ; CHECK-NEXT: store <2 x double> [[TMP927]], ptr [[VEC_GEP1260]], align 8 |
| ; CHECK-NEXT: br i1 [[ROWS_COND]], label %[[ROWS_HEADER]], label %[[COLS_LATCH]], !prof [[PROF0]] |
| ; CHECK: [[COLS_LATCH]]: |
| ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 |
| ; CHECK-NEXT: [[COLS_COND:%.*]] = icmp ne i64 [[COLS_STEP]], 8 |
| ; CHECK-NEXT: br i1 [[COLS_COND]], label %[[COLS_HEADER]], label %[[CONTINUE:.*]], !prof [[PROF0]] |
| ; CHECK: [[CONTINUE]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %a = load <64 x double>, ptr %A, align 8 |
| %b = load <64 x double>, ptr %B, align 8 |
| %c = call <64 x double> @llvm.matrix.multiply.v64f64.v64f64.v64f64(<64 x double> %a, <64 x double> %b, i32 8, i32 8, i32 8) |
| store <64 x double> %c, ptr %C, align 8 |
| ret void |
| } |
| ;. |
| ; CHECK: [[PROF0]] = !{!"branch_weights", i32 4, i32 1} |
| ; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]} |
| ; CHECK: [[META2]] = !{!"llvm.loop.unroll.count", i32 4} |
| ;. |