| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=loop-vectorize -mtriple=thumbv7s-apple-ios6.0.0 -S -enable-interleaved-mem-accesses=false < %s | FileCheck %s |
| |
| target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" |
| |
| @kernel = global [512 x float] zeroinitializer, align 4 |
| @kernel2 = global [512 x float] zeroinitializer, align 4 |
| @kernel3 = global [512 x float] zeroinitializer, align 4 |
| @kernel4 = global [512 x float] zeroinitializer, align 4 |
| @src_data = global [1536 x float] zeroinitializer, align 4 |
| |
| ; We don't want to vectorize most loops containing gathers because they are |
| ; expensive. |
| ; Make sure we don't vectorize it. |
| |
| define float @_Z4testmm(i64 %size, i64 %offset) { |
| ; CHECK-LABEL: define float @_Z4testmm( |
| ; CHECK-SAME: i64 [[SIZE:%.*]], i64 [[OFFSET:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_0:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_0_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_1_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED_2:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[OFFSET]] |
| ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ADD]], 3 |
| ; CHECK-NEXT: [[GEP_SRC_DATA:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[MUL]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[GEP_SRC_DATA]], align 4 |
| ; CHECK-NEXT: [[GEP_KERNEL:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[GEP_KERNEL]], align 4 |
| ; CHECK-NEXT: [[MUL3:%.*]] = fmul fast float [[TMP0]], [[TMP1]] |
| ; CHECK-NEXT: [[GEP_KERNEL2:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_KERNEL2]], align 4 |
| ; CHECK-NEXT: [[MUL5:%.*]] = fmul fast float [[MUL3]], [[TMP2]] |
| ; CHECK-NEXT: [[GEP_KERNEL3:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_KERNEL3]], align 4 |
| ; CHECK-NEXT: [[MUL7:%.*]] = fmul fast float [[MUL5]], [[TMP3]] |
| ; CHECK-NEXT: [[GEP_KERNEL4:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_KERNEL4]], align 4 |
| ; CHECK-NEXT: [[MUL9:%.*]] = fmul fast float [[MUL7]], [[TMP4]] |
| ; CHECK-NEXT: [[RDX_0_NEXT]] = fadd fast float [[RDX_0]], [[MUL9]] |
| ; CHECK-NEXT: [[GEP_SRC_DATA_SUM:%.*]] = add i64 [[MUL]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[GEP_SRC_DATA_SUM]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX11]], align 4 |
| ; CHECK-NEXT: [[MUL13:%.*]] = fmul fast float [[TMP1]], [[TMP5]] |
| ; CHECK-NEXT: [[MUL15:%.*]] = fmul fast float [[TMP2]], [[MUL13]] |
| ; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[TMP3]], [[MUL15]] |
| ; CHECK-NEXT: [[MUL19:%.*]] = fmul fast float [[TMP4]], [[MUL17]] |
| ; CHECK-NEXT: [[RDX_1_NEXT]] = fadd fast float [[RDX_1]], [[MUL19]] |
| ; CHECK-NEXT: [[GEP_SRC_DATA_SUM52:%.*]] = add i64 [[MUL]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[GEP_SRC_DATA_SUM52]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX21]], align 4 |
| ; CHECK-NEXT: [[MUL23:%.*]] = fmul fast float [[TMP1]], [[TMP6]] |
| ; CHECK-NEXT: [[MUL25:%.*]] = fmul fast float [[TMP2]], [[MUL23]] |
| ; CHECK-NEXT: [[MUL27:%.*]] = fmul fast float [[TMP3]], [[MUL25]] |
| ; CHECK-NEXT: [[MUL29:%.*]] = fmul fast float [[TMP4]], [[MUL27]] |
| ; CHECK-NEXT: [[RDX_2_NEXT]] = fadd fast float [[RED_2]], [[MUL29]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]] |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RDX_0_NEXT_LCSSA:%.*]] = phi float [ [[RDX_0_NEXT]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_1_NEXT_LCSSA:%.*]] = phi float [ [[RDX_1_NEXT]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RDX_2_NEXT_LCSSA:%.*]] = phi float [ [[RDX_2_NEXT]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RES_0:%.*]] = fadd float [[RDX_0_NEXT_LCSSA]], [[RDX_1_NEXT_LCSSA]] |
| ; CHECK-NEXT: [[RES_1:%.*]] = fadd float [[RES_0]], [[RDX_2_NEXT_LCSSA]] |
| ; CHECK-NEXT: ret float [[RES_1]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx.0 = phi float [ 0.000000e+00, %entry ], [ %rdx.0.next, %loop ] |
| %rdx.1 = phi float [ 0.000000e+00, %entry ], [ %rdx.1.next, %loop ] |
| %red.2 = phi float [ 0.000000e+00, %entry ], [ %rdx.2.next, %loop ] |
| %add = add i64 %iv, %offset |
| %mul = mul i64 %add, 3 |
| %gep.src_data = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %mul |
| %0 = load float, ptr %gep.src_data, align 4 |
| %gep.kernel = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 %iv |
| %1 = load float, ptr %gep.kernel, align 4 |
| %mul3 = fmul fast float %0, %1 |
| %gep.kernel2 = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 %iv |
| %2 = load float, ptr %gep.kernel2, align 4 |
| %mul5 = fmul fast float %mul3, %2 |
| %gep.kernel3 = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 %iv |
| %3 = load float, ptr %gep.kernel3, align 4 |
| %mul7 = fmul fast float %mul5, %3 |
| %gep.kernel4 = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 %iv |
| %4 = load float, ptr %gep.kernel4, align 4 |
| %mul9 = fmul fast float %mul7, %4 |
| %rdx.0.next = fadd fast float %rdx.0, %mul9 |
| %gep.src_data.sum = add i64 %mul, 1 |
| %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %gep.src_data.sum |
| %5 = load float, ptr %arrayidx11, align 4 |
| %mul13 = fmul fast float %1, %5 |
| %mul15 = fmul fast float %2, %mul13 |
| %mul17 = fmul fast float %3, %mul15 |
| %mul19 = fmul fast float %4, %mul17 |
| %rdx.1.next = fadd fast float %rdx.1, %mul19 |
| %gep.src_data.sum52 = add i64 %mul, 2 |
| %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %gep.src_data.sum52 |
| %6 = load float, ptr %arrayidx21, align 4 |
| %mul23 = fmul fast float %1, %6 |
| %mul25 = fmul fast float %2, %mul23 |
| %mul27 = fmul fast float %3, %mul25 |
| %mul29 = fmul fast float %4, %mul27 |
| %rdx.2.next = fadd fast float %red.2, %mul29 |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, %size |
| br i1 %exitcond, label %loop, label %exit |
| |
| exit: |
| %res.0 = fadd float %rdx.0.next, %rdx.1.next |
| %res.1 = fadd float %res.0, %rdx.2.next |
| ret float %res.1 |
| } |