blob: f2cc0a5a6f8ffb693124712e33a9e380617debf0 [file] [log] [blame]
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
; RUN: opt -mtriple=thumbv7em %s -S -loop-reduce -lsr-complexity-limit=65536 -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
; RUN: opt -mtriple=thumbv7em %s -S -loop-reduce -lsr-complexity-limit=2147483647 -o - | FileCheck %s --check-prefix=CHECK-COMPLEX
; CHECK-DEFAULT: phi i32
; CHECK-DEFAULT: [[LSR_IV:%[^ ]+]] = phi i32 [ [[LSR_IV_NEXT:%[^ ]+]], ], [ 0, ]
; CHECK-DEFAULT: phi i32
; CHECK-DEFAULT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 8
; CHECK-COMPLEX: phi i32
; CHECK-COMPLEX: [[LSR_IV6:%[^ ]+]] = phi i16* [ [[SCEVGEP7:%[^ ]+]], ], [ [[SCEVGEP5:%[^ ]+]], ]
; CHECK-COMPLEX: [[LSR_IV:%[^ ]+]] = phi i16* [ [[SCEVGEP1:%[^ ]+]], ], [ [[SCEVGEP:%[^ ]+]], ]
; CHECK-COMPLEX: phi i32
; CHECK-COMPLEX: [[SCEVGEP1]] = getelementptr i16, i16* [[LSR_IV]], i32 4
; CHECK-COMPLEX: [[SCEVGEP7]] = getelementptr i16, i16* [[LSR_IV6]], i32 4
define void @convolve(i16** nocapture readonly %input_image, i16** nocapture readonly %filter, i32 %filter_dim, i32 %out_width, i32 %out_height, i32** nocapture readonly %convolved) {
%cmp92 = icmp eq i32 %out_height, 0
br i1 %cmp92, label %for.cond.cleanup, label ; preds = %entry
%xtraiter = and i32 %filter_dim, 3
%unroll_iter = sub i32 %filter_dim, %xtraiter
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.cond.cleanup3,
%res_y.093 = phi i32 [ 0, ], [ %add28, %for.cond.cleanup3 ]
%arrayidx22 = getelementptr inbounds i32*, i32** %convolved, i32 %res_y.093
%tmp3 = load i32*, i32** %arrayidx22, align 4
br label ; preds =, = phi i32 [, ], [ 0, %for.cond1.preheader ]
br label ; preds =, = phi i32 [, ], [ 0, ] = phi i32 [, ], [ 0, ] = add i32, %res_y.093 = getelementptr inbounds i16*, i16** %filter, i32
%tmp5 = load i16*, i16**, align 4 = getelementptr inbounds i16*, i16** %input_image, i32
%tmp6 = load i16*, i16**, align 4
br label ; preds =, = phi i32 [, ], [ 0, ] = phi i32 [, ], [, ]
%niter = phi i32 [ %niter.nsub.3, ], [ %unroll_iter, ] = add i32, = getelementptr inbounds i16, i16* %tmp5, i32
%tmp9 = load i16, i16*, align 2 = sext i16 %tmp9 to i32 = getelementptr inbounds i16, i16* %tmp6, i32
%tmp10 = load i16, i16*, align 2 = sext i16 %tmp10 to i32 = mul nsw i32, = add nsw i32, = or i32, 1 = add i32, = getelementptr inbounds i16, i16* %tmp5, i32
%tmp11 = load i16, i16*, align 2 = sext i16 %tmp11 to i32 = getelementptr inbounds i16, i16* %tmp6, i32
%tmp12 = load i16, i16*, align 2 = sext i16 %tmp12 to i32 = mul nsw i32, = add nsw i32, = or i32, 2 = add i32, = getelementptr inbounds i16, i16* %tmp5, i32
%tmp13 = load i16, i16*, align 2 = sext i16 %tmp13 to i32 = getelementptr inbounds i16, i16* %tmp6, i32
%tmp14 = load i16, i16*, align 2 = sext i16 %tmp14 to i32 = mul nsw i32, = add nsw i32, = or i32, 3 = add i32, = getelementptr inbounds i16, i16* %tmp5, i32
%tmp15 = load i16, i16*, align 2 = sext i16 %tmp15 to i32 = getelementptr inbounds i16, i16* %tmp6, i32
%tmp16 = load i16, i16*, align 2 = sext i16 %tmp16 to i32 = mul nsw i32, = add nsw i32, = add i32, 4
%niter.nsub.3 = add i32 %niter, -4
%niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
br i1 %niter.ncmp.3, label, label ; preds =, = add nuw i32, 1
%exitcond98 = icmp eq i32, %filter_dim
br i1 %exitcond98, label, label ; preds = = getelementptr inbounds i32, i32* %tmp3, i32
store i32, i32*, align 4 = add nuw i32, 1
%exitcond99 = icmp eq i32, %out_width
br i1 %exitcond99, label %for.cond.cleanup3, label
for.cond.cleanup3: ; preds =, %for.cond5.preheader.preheader, %for.cond1.preheader
%add28 = add nuw i32 %res_y.093, 1
%exitcond100 = icmp eq i32 %add28, %out_height
br i1 %exitcond100, label %for.cond.cleanup, label %for.cond1.preheader
for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
ret void