| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5 |
| ; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck --check-prefixes=VF2 %s |
| ; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck --check-prefixes=VF4 %s |
| |
| target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" |
| target triple = "arm64-apple-macosx" |
| |
| define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi64_unary_op_load_interleave_group( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; VF2-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF2-NEXT: [[TMP12:%.*]] = shl nsw i64 [[TMP10]], 1 |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP1]] |
| ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP12]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP13]], align 8 |
| ; VF2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[TMP3:%.*]] = fneg <2 x double> [[STRIDED_VEC]] |
| ; VF2-NEXT: [[TMP14:%.*]] = fneg <2 x double> [[STRIDED_VEC3]] |
| ; VF2-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[STRIDED_VEC1]] |
| ; VF2-NEXT: [[TMP9:%.*]] = fneg <2 x double> [[STRIDED_VEC4]] |
| ; VF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP13]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_unary_op_load_interleave_group( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP1]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; VF4-NEXT: [[TMP3:%.*]] = fneg <4 x double> [[STRIDED_VEC]] |
| ; VF4-NEXT: [[TMP4:%.*]] = fneg <4 x double> [[STRIDED_VEC1]] |
| ; VF4-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds double, ptr %data, i64 %1 |
| %l.0 = load double, ptr %data.0, align 8 |
| %neg.0 = fneg double %l.0 |
| store double %neg.0, ptr %data.0, align 8 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds double, ptr %data, i64 %3 |
| %l.1 = load double, ptr %data.1, align 8 |
| %neg.1 = fneg double %l.1 |
| store double %neg.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_unary_op_wide_load(ptr noalias %data, ptr noalias %A, ptr noalias %B) { |
| ; VF2-LABEL: define void @test_2xi64_unary_op_wide_load( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; VF2-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF2-NEXT: [[TMP16:%.*]] = shl nsw i64 [[TMP14]], 1 |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0 |
| ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 2 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 |
| ; VF2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP18]], align 8 |
| ; VF2-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[WIDE_LOAD]] |
| ; VF2-NEXT: [[TMP19:%.*]] = fneg <2 x double> [[WIDE_LOAD2]] |
| ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP1]] |
| ; VF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP16]] |
| ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 0 |
| ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 2 |
| ; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr [[TMP13]], align 8 |
| ; VF2-NEXT: [[TMP8:%.*]] = fneg <2 x double> [[WIDE_LOAD1]] |
| ; VF2-NEXT: [[TMP15:%.*]] = fneg <2 x double> [[WIDE_LOAD3]] |
| ; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8 |
| ; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC4]], ptr [[TMP20]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_unary_op_wide_load( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8 |
| ; VF4-NEXT: [[TMP4:%.*]] = fneg <4 x double> [[WIDE_LOAD]] |
| ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP1]] |
| ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP7]], align 8 |
| ; VF4-NEXT: [[TMP8:%.*]] = fneg <4 x double> [[WIDE_LOAD1]] |
| ; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %1 = shl nsw i64 %iv, 1 |
| %A.0 = getelementptr inbounds double, ptr %A, i64 %iv |
| %l.0 = load double, ptr %A.0, align 8 |
| %neg.0 = fneg double %l.0 |
| %data.0 = getelementptr inbounds double, ptr %data, i64 %1 |
| store double %neg.0, ptr %data.0, align 8 |
| %3 = or disjoint i64 %1, 1 |
| %B.0 = getelementptr inbounds double, ptr %B, i64 %iv |
| %l.1 = load double, ptr %B.0, align 8 |
| %neg.1 = fneg double %l.1 |
| %data.1 = getelementptr inbounds double, ptr %data, i64 %3 |
| store double %neg.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi64( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP23:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]] |
| ; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP23]] |
| ; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP24]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP10:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[TMP19:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP41:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; VF4-NEXT: [[TMP20:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]] |
| ; VF4-NEXT: [[TMP42:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP41]] |
| ; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP20]], <4 x i64> [[TMP42]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i64, ptr %arrayidx, align 8 |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| %l.0 = load i64, ptr %data.0, align 8 |
| %mul.0 = mul i64 %l.factor, %l.0 |
| store i64 %mul.0, ptr %data.0, align 8 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %mul.1 = mul i64 %l.factor, %l.1 |
| store i64 %mul.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_different_opcodes(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi64_different_opcodes( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] |
| ; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] |
| ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 |
| ; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_different_opcodes( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP4]], align 8 |
| ; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; VF4-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] |
| ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] |
| ; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 |
| ; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i64, ptr %arrayidx, align 8 |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| %l.0 = load i64, ptr %data.0, align 8 |
| %mul.0 = mul i64 %l.factor, %l.0 |
| store i64 %mul.0, ptr %data.0, align 8 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %add.1 = add i64 %l.factor, %l.1 |
| store i64 %add.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_interleave_loads_order_flipped(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi64_interleave_loads_order_flipped( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP15:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP15]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP16]], align 8 |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP20:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[TMP21:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP20]] |
| ; VF2-NEXT: [[TMP24:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]] |
| ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP21]], <2 x i64> [[TMP24]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP16]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_interleave_loads_order_flipped( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP27:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP27]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP28]], align 8 |
| ; VF4-NEXT: [[TMP19:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP36:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; VF4-NEXT: [[TMP37:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP36]] |
| ; VF4-NEXT: [[TMP42:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]] |
| ; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP37]], <4 x i64> [[TMP42]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i64, ptr %arrayidx, align 8 |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| %l.0 = load i64, ptr %data.0, align 8 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %mul.0 = mul i64 %l.factor, %l.1 |
| store i64 %mul.0, ptr %data.0, align 8 |
| %mul.1 = mul i64 %l.factor, %l.0 |
| store i64 %mul.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_store_order_flipped_1(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi64_store_order_flipped_1( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP21:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]] |
| ; VF2-NEXT: [[TMP22:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP21]] |
| ; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP22]], <2 x i64> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_store_order_flipped_1( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP10:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[TMP19:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP37:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; VF4-NEXT: [[TMP20:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]] |
| ; VF4-NEXT: [[TMP38:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP37]] |
| ; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP38]], <4 x i64> [[TMP20]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i64, ptr %arrayidx, align 8 |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| %l.0 = load i64, ptr %data.0, align 8 |
| %mul.0 = mul i64 %l.factor, %l.0 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %mul.1 = mul i64 %l.factor, %l.1 |
| store i64 %mul.1, ptr %data.0, align 8 |
| store i64 %mul.0, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_store_order_flipped_2(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi64_store_order_flipped_2( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP21:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> |
| ; VF2-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP11]] |
| ; VF2-NEXT: [[TMP22:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP21]] |
| ; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP22]], <2 x i64> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_store_order_flipped_2( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP10:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[TMP19:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP37:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| ; VF4-NEXT: [[TMP20:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP19]] |
| ; VF4-NEXT: [[TMP38:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP37]] |
| ; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP38]], <4 x i64> [[TMP20]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i64, ptr %arrayidx, align 8 |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| %l.0 = load i64, ptr %data.0, align 8 |
| %mul.0 = mul i64 %l.factor, %l.0 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %mul.1 = mul i64 %l.factor, %l.1 |
| store i64 %mul.0, ptr %data.1, align 8 |
| store i64 %mul.1, ptr %data.0, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_different_loads_feeding_fmul(ptr noalias %data, ptr noalias %src.0, ptr noalias %src.1) { |
| ; VF2-LABEL: define void @test_2xi64_different_loads_feeding_fmul( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[SRC_0:%.*]], ptr noalias [[SRC_1:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[SRC_0]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF2-NEXT: [[TMP14:%.*]] = or disjoint i64 [[TMP6]], 1 |
| ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP14]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP15]], align 8 |
| ; VF2-NEXT: [[TMP19:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[SRC_1]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP21]], align 8 |
| ; VF2-NEXT: [[TMP22:%.*]] = mul <2 x i64> [[WIDE_LOAD2]], [[TMP19]] |
| ; VF2-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 |
| ; VF2-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_different_loads_feeding_fmul( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[SRC_0:%.*]], ptr noalias [[SRC_1:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[SRC_0]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP10:%.*]] = shl nsw i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP10]] |
| ; VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP12:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF4-NEXT: [[TMP24:%.*]] = or disjoint i64 [[TMP10]], 1 |
| ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP24]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP25]], align 8 |
| ; VF4-NEXT: [[TMP33:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[SRC_1]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[TMP34]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP35]], align 8 |
| ; VF4-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[WIDE_LOAD2]], [[TMP33]] |
| ; VF4-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP36]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 |
| ; VF4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.src.0 = getelementptr inbounds i64, ptr %src.0, i64 %iv |
| %l.src.0 = load i64, ptr %gep.src.0, align 8 |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| %l.0 = load i64, ptr %gep.src.0, align 8 |
| %mul.0 = mul i64 %l.src.0, %l.0 |
| store i64 %mul.0, ptr %data.0, align 8 |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %gep.src.1 = getelementptr inbounds i64, ptr %src.1, i64 %iv |
| %l.src.1 = load i64, ptr %gep.src.1, align 8 |
| %mul.1 = mul i64 %l.src.1, %l.1 |
| store i64 %mul.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_3xi64(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_3xi64( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 |
| ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[INDEX]], i32 0 |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <6 x i64>, ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[TMP9:%.*]] = shufflevector <6 x i64> [[WIDE_VEC]], <6 x i64> poison, <2 x i32> <i32 0, i32 3> |
| ; VF2-NEXT: [[TMP18:%.*]] = shufflevector <6 x i64> [[WIDE_VEC]], <6 x i64> poison, <2 x i32> <i32 1, i32 4> |
| ; VF2-NEXT: [[TMP27:%.*]] = shufflevector <6 x i64> [[WIDE_VEC]], <6 x i64> poison, <2 x i32> <i32 2, i32 5> |
| ; VF2-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP9]] |
| ; VF2-NEXT: [[TMP19:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP18]] |
| ; VF2-NEXT: [[TMP28:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP27]] |
| ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP19]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP28]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> |
| ; VF2-NEXT: store <6 x i64> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_3xi64( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 |
| ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds { i64, i64, i64 }, ptr [[DATA]], i64 [[INDEX]], i32 0 |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP6]], align 8 |
| ; VF4-NEXT: [[TMP17:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| ; VF4-NEXT: [[TMP34:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> |
| ; VF4-NEXT: [[TMP51:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> |
| ; VF4-NEXT: [[TMP18:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP17]] |
| ; VF4-NEXT: [[TMP35:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP34]] |
| ; VF4-NEXT: [[TMP52:%.*]] = mul <4 x i64> [[WIDE_LOAD]], [[TMP51]] |
| ; VF4-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP18]], <4 x i64> [[TMP35]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP52]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; VF4-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP8]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP9]], <12 x i64> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| ; VF4-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i64, ptr %arrayidx, align 8 |
| %data.0 = getelementptr inbounds { i64 , i64, i64 }, ptr %data, i64 %iv, i32 0 |
| %l.0 = load i64, ptr %data.0, align 8 |
| %mul.0 = mul i64 %l.factor, %l.0 |
| store i64 %mul.0, ptr %data.0, align 8 |
| %data.1 = getelementptr inbounds { i64 , i64, i64 }, ptr %data, i64 %iv, i32 1 |
| %l.1 = load i64, ptr %data.1, align 8 |
| %mul.1 = mul i64 %l.factor, %l.1 |
| store i64 %mul.1, ptr %data.1, align 8 |
| %data.2 = getelementptr inbounds { i64 , i64, i64 }, ptr %data, i64 %iv, i32 2 |
| %l.2 = load i64, ptr %data.2, align 8 |
| %mul.2 = mul i64 %l.factor, %l.2 |
| store i64 %mul.2, ptr %data.2, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_2xi32( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i32>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[WIDE_VEC]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0 |
| ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_VEC1:%.*]] = load <6 x i32>, ptr [[TMP8]], align 8 |
| ; VF2-NEXT: [[TMP13:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 0, i32 3> |
| ; VF2-NEXT: [[TMP22:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 1, i32 4> |
| ; VF2-NEXT: [[TMP14:%.*]] = mul <2 x i32> [[TMP7]], [[TMP13]] |
| ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0 |
| ; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8 |
| ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1 |
| ; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8 |
| ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1 |
| ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1 |
| ; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]] |
| ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0 |
| ; VF2-NEXT: store i32 [[TMP24]], ptr [[TMP17]], align 8 |
| ; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP23]], i32 1 |
| ; VF2-NEXT: store i32 [[TMP25]], ptr [[TMP18]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 |
| ; VF2-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi32( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 |
| ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 |
| ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 |
| ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8 |
| ; VF4-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0 |
| ; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0 |
| ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0 |
| ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0 |
| ; VF4-NEXT: [[WIDE_VEC1:%.*]] = load <12 x i32>, ptr [[TMP16]], align 8 |
| ; VF4-NEXT: [[TMP27:%.*]] = shufflevector <12 x i32> [[WIDE_VEC1]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| ; VF4-NEXT: [[TMP44:%.*]] = shufflevector <12 x i32> [[WIDE_VEC1]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> |
| ; VF4-NEXT: [[TMP28:%.*]] = mul <4 x i32> [[TMP15]], [[TMP27]] |
| ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP28]], i32 0 |
| ; VF4-NEXT: store i32 [[TMP29]], ptr [[TMP16]], align 8 |
| ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP28]], i32 1 |
| ; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8 |
| ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP28]], i32 2 |
| ; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8 |
| ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP28]], i32 3 |
| ; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8 |
| ; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1 |
| ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1 |
| ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1 |
| ; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1 |
| ; VF4-NEXT: [[TMP45:%.*]] = mul <4 x i32> [[TMP15]], [[TMP44]] |
| ; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP45]], i32 0 |
| ; VF4-NEXT: store i32 [[TMP46]], ptr [[TMP33]], align 8 |
| ; VF4-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP45]], i32 1 |
| ; VF4-NEXT: store i32 [[TMP47]], ptr [[TMP34]], align 8 |
| ; VF4-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP45]], i32 2 |
| ; VF4-NEXT: store i32 [[TMP48]], ptr [[TMP35]], align 8 |
| ; VF4-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP45]], i32 3 |
| ; VF4-NEXT: store i32 [[TMP49]], ptr [[TMP36]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 |
| ; VF4-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i32 , ptr %arrayidx, align 8 |
| %data.0 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 0 |
| %l.0 = load i32, ptr %data.0, align 8 |
| %mul.0 = mul i32 %l.factor, %l.0 |
| store i32 %mul.0, ptr %data.0, align 8 |
| %data.1 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 1 |
| %l.1 = load i32, ptr %data.1, align 8 |
| %mul.1 = mul i32 %l.factor, %l.1 |
| store i32%mul.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_3xi32(ptr noalias %data, ptr noalias %factor) { |
| ; VF2-LABEL: define void @test_3xi32( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i32>, ptr [[TMP3]], align 8 |
| ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[WIDE_VEC]], <4 x i32> poison, <2 x i32> <i32 0, i32 2> |
| ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0 |
| ; VF2-NEXT: [[WIDE_VEC1:%.*]] = load <6 x i32>, ptr [[TMP9]], align 8 |
| ; VF2-NEXT: [[TMP13:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 0, i32 3> |
| ; VF2-NEXT: [[TMP22:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 1, i32 4> |
| ; VF2-NEXT: [[TMP31:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 2, i32 5> |
| ; VF2-NEXT: [[TMP14:%.*]] = mul <2 x i32> [[TMP7]], [[TMP13]] |
| ; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]] |
| ; VF2-NEXT: [[TMP32:%.*]] = mul <2 x i32> [[TMP7]], [[TMP31]] |
| ; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP32]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| ; VF2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP11]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <6 x i32> [[TMP10]], <6 x i32> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> |
| ; VF2-NEXT: store <6 x i32> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98 |
| ; VF2-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_3xi32( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] |
| ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 8 |
| ; VF4-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0 |
| ; VF4-NEXT: [[WIDE_VEC1:%.*]] = load <12 x i32>, ptr [[TMP19]], align 8 |
| ; VF4-NEXT: [[TMP27:%.*]] = shufflevector <12 x i32> [[WIDE_VEC1]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| ; VF4-NEXT: [[TMP44:%.*]] = shufflevector <12 x i32> [[WIDE_VEC1]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> |
| ; VF4-NEXT: [[TMP61:%.*]] = shufflevector <12 x i32> [[WIDE_VEC1]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> |
| ; VF4-NEXT: [[TMP28:%.*]] = mul <4 x i32> [[TMP15]], [[TMP27]] |
| ; VF4-NEXT: [[TMP45:%.*]] = mul <4 x i32> [[TMP15]], [[TMP44]] |
| ; VF4-NEXT: [[TMP62:%.*]] = mul <4 x i32> [[TMP15]], [[TMP61]] |
| ; VF4-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP28]], <4 x i32> [[TMP45]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP62]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| ; VF4-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP10]], <12 x i32> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> |
| ; VF4-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP19]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF4-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 |
| ; VF4-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv |
| %l.factor = load i32 , ptr %arrayidx, align 8 |
| %data.0 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 0 |
| %l.0 = load i32, ptr %data.0, align 8 |
| %mul.0 = mul i32 %l.factor, %l.0 |
| store i32 %mul.0, ptr %data.0, align 8 |
| %data.1 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 1 |
| %l.1 = load i32, ptr %data.1, align 8 |
| %mul.1 = mul i32 %l.factor, %l.1 |
| store i32%mul.1, ptr %data.1, align 8 |
| %data.2 = getelementptr inbounds { i32, i32, i32 }, ptr %data, i64 %iv, i32 2 |
| %l.2 = load i32, ptr %data.2, align 8 |
| %mul.2 = mul i32 %l.factor, %l.2 |
| store i32 %mul.2, ptr %data.2, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_sub_of_wide_loads(ptr noalias %data, ptr noalias %A, ptr noalias %B) { |
| ; VF2-LABEL: define void @test_2xi64_sub_of_wide_loads( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 |
| ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 |
| ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 |
| ; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 |
| ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 2 |
| ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8 |
| ; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8 |
| ; VF2-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] |
| ; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]] |
| ; VF2-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF2-NEXT: [[TMP20:%.*]] = shl nsw i64 [[TMP1]], 1 |
| ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]] |
| ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]] |
| ; VF2-NEXT: [[TMP14:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] |
| ; VF2-NEXT: [[TMP15:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]] |
| ; VF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP16]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA_0]], align 8 |
| ; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC4]], ptr [[DATA_1]], align 8 |
| ; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[EC]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_sub_of_wide_loads( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 |
| ; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF4-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF4-NEXT: [[TMP8:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 |
| ; VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.A = getelementptr inbounds i64, ptr %A, i64 %iv |
| %l.A = load i64, ptr %gep.A, align 8 |
| %gep.B = getelementptr inbounds i64, ptr %B, i64 %iv |
| %l.B = load i64, ptr %gep.B, align 8 |
| %sub.0 = sub i64 %l.A, %l.B |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| store i64 %sub.0, ptr %data.0, align 8 |
| %sub.1 = sub i64 %l.A, %l.B |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| store i64 %sub.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_sub_of_wide_loads_ops_swapped(ptr noalias %data, ptr noalias %A, ptr noalias %B) { |
| ; VF2-LABEL: define void @test_2xi64_sub_of_wide_loads_ops_swapped( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 |
| ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 |
| ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 |
| ; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 |
| ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 2 |
| ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8 |
| ; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8 |
| ; VF2-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] |
| ; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]] |
| ; VF2-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF2-NEXT: [[TMP20:%.*]] = shl nsw i64 [[TMP1]], 1 |
| ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]] |
| ; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]] |
| ; VF2-NEXT: [[TMP14:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]] |
| ; VF2-NEXT: [[TMP15:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]] |
| ; VF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP16]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA_0]], align 8 |
| ; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC4]], ptr [[DATA_1]], align 8 |
| ; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[EC]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_sub_of_wide_loads_ops_swapped( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 |
| ; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF4-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF4-NEXT: [[TMP8:%.*]] = sub <4 x i64> [[WIDE_LOAD1]], [[WIDE_LOAD]] |
| ; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 |
| ; VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.A = getelementptr inbounds i64, ptr %A, i64 %iv |
| %l.A = load i64, ptr %gep.A, align 8 |
| %gep.B = getelementptr inbounds i64, ptr %B, i64 %iv |
| %l.B = load i64, ptr %gep.B, align 8 |
| %sub.0 = sub i64 %l.A, %l.B |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| store i64 %sub.0, ptr %data.0, align 8 |
| %sub.1 = sub i64 %l.B, %l.A |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| store i64 %sub.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_2xi64_sub_of_wide_loads_with_different_base_ptrs(ptr noalias %data, ptr noalias %A, ptr noalias %B, ptr noalias %C) { |
| ; VF2-LABEL: define void @test_2xi64_sub_of_wide_loads_with_different_base_ptrs( |
| ; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { |
| ; VF2-NEXT: [[ENTRY:.*:]] |
| ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF2: [[VECTOR_PH]]: |
| ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF2: [[VECTOR_BODY]]: |
| ; VF2-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 |
| ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 |
| ; VF2-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[C]], i64 [[TMP0]] |
| ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 |
| ; VF2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8 |
| ; VF2-NEXT: [[TMP10:%.*]] = sub <2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; VF2-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 |
| ; VF2-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF2-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] |
| ; VF2: [[MIDDLE_BLOCK]]: |
| ; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF2: [[SCALAR_PH]]: |
| ; |
| ; VF4-LABEL: define void @test_2xi64_sub_of_wide_loads_with_different_base_ptrs( |
| ; VF4-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { |
| ; VF4-NEXT: [[ENTRY:.*:]] |
| ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; VF4: [[VECTOR_PH]]: |
| ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; VF4: [[VECTOR_BODY]]: |
| ; VF4-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 |
| ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 |
| ; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; VF4-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1 |
| ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] |
| ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[C]], i64 [[TMP0]] |
| ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 |
| ; VF4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 |
| ; VF4-NEXT: [[TMP10:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; VF4-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 |
| ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 |
| ; VF4-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| ; VF4-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] |
| ; VF4: [[MIDDLE_BLOCK]]: |
| ; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; VF4: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.A = getelementptr inbounds i64, ptr %A, i64 %iv |
| %l.A = load i64, ptr %gep.A, align 8 |
| %gep.B = getelementptr inbounds i64, ptr %B, i64 %iv |
| %l.B = load i64, ptr %gep.B, align 8 |
| %sub.0 = sub i64 %l.A, %l.B |
| %1 = shl nsw i64 %iv, 1 |
| %data.0 = getelementptr inbounds i64, ptr %data, i64 %1 |
| store i64 %sub.0, ptr %data.0, align 8 |
| %gep.C = getelementptr inbounds i64, ptr %C, i64 %iv |
| %l.C = load i64, ptr %gep.C, align 8 |
| %sub.1 = sub i64 %l.A, %l.C |
| %3 = or disjoint i64 %1, 1 |
| %data.1 = getelementptr inbounds i64, ptr %data, i64 %3 |
| store i64 %sub.1, ptr %data.1, align 8 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |