| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
 | ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s | 
 | ; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from | 
 | ; kicking in. | 
 |  | 
 | declare fastcc void @rdft(i32, i32, ptr, ptr, ptr) | 
 |  | 
 | define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, ptr %tmp2, i32 %nfft, ptr %tmp1fft, ptr %tmp2fft, ptr %ip, ptr %w) nounwind { | 
 | ; CHECK-LABEL: mp_sqrt: | 
 | ; CHECK:       # %bb.0: # %entry | 
 | ; CHECK-NEXT:    pushl %ebp | 
 | ; CHECK-NEXT:    pushl %ebx | 
 | ; CHECK-NEXT:    pushl %edi | 
 | ; CHECK-NEXT:    pushl %esi | 
 | ; CHECK-NEXT:    pushl %eax | 
 | ; CHECK-NEXT:    movb $1, %cl | 
 | ; CHECK-NEXT:    movl $1, %ebx | 
 | ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi | 
 | ; CHECK-NEXT:    .p2align 4 | 
 | ; CHECK-NEXT:  .LBB0_1: # %bb.i5 | 
 | ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1 | 
 | ; CHECK-NEXT:    movl %ecx, %eax | 
 | ; CHECK-NEXT:    addl %ebx, %ebx | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    testb $1, %al | 
 | ; CHECK-NEXT:    jne .LBB0_1 | 
 | ; CHECK-NEXT:  # %bb.2: # %mp_unexp_mp2d.exit.i | 
 | ; CHECK-NEXT:    je .LBB0_3 | 
 | ; CHECK-NEXT:  # %bb.5: # %cond_next.i | 
 | ; CHECK-NEXT:    testb $1, %al | 
 | ; CHECK-NEXT:    jne .LBB0_3 | 
 | ; CHECK-NEXT:  # %bb.6: # %cond_next36.i | 
 | ; CHECK-NEXT:    movl $0, 0 | 
 | ; CHECK-NEXT:    movzbl %al, %ebp | 
 | ; CHECK-NEXT:    andl $1, %ebp | 
 | ; CHECK-NEXT:    xorpd %xmm0, %xmm0 | 
 | ; CHECK-NEXT:    xorl %eax, %eax | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    xorpd %xmm1, %xmm1 | 
 | ; CHECK-NEXT:    .p2align 4 | 
 | ; CHECK-NEXT:  .LBB0_7: # %bb.i28.i | 
 | ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1 | 
 | ; CHECK-NEXT:    cvttsd2si %xmm1, %edi | 
 | ; CHECK-NEXT:    cmpl %edx, %edi | 
 | ; CHECK-NEXT:    cmovgel %eax, %edi | 
 | ; CHECK-NEXT:    addl $2, %ecx | 
 | ; CHECK-NEXT:    xorps %xmm2, %xmm2 | 
 | ; CHECK-NEXT:    cvtsi2sd %edi, %xmm2 | 
 | ; CHECK-NEXT:    xorpd %xmm1, %xmm1 | 
 | ; CHECK-NEXT:    subsd %xmm2, %xmm1 | 
 | ; CHECK-NEXT:    mulsd %xmm0, %xmm1 | 
 | ; CHECK-NEXT:    addl $-2, %ebp | 
 | ; CHECK-NEXT:    jne .LBB0_7 | 
 | ; CHECK-NEXT:  # %bb.8: # %mp_unexp_d2mp.exit29.i | 
 | ; CHECK-NEXT:    movl $0, 0 | 
 | ; CHECK-NEXT:    je .LBB0_9 | 
 | ; CHECK-NEXT:  # %bb.10: # %mp_sqrt_init.exit | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl %edx, %edi | 
 | ; CHECK-NEXT:    movl %esi, %edx | 
 | ; CHECK-NEXT:    calll mp_mul_csqu@PLT | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl $-1, %edx | 
 | ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp) | 
 | ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp) | 
 | ; CHECK-NEXT:    pushl $0 | 
 | ; CHECK-NEXT:    calll rdft@PLT | 
 | ; CHECK-NEXT:    addl $12, %esp | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl %edi, (%esp) # 4-byte Spill | 
 | ; CHECK-NEXT:    movl %edi, %edx | 
 | ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi | 
 | ; CHECK-NEXT:    pushl %edi | 
 | ; CHECK-NEXT:    pushl %esi | 
 | ; CHECK-NEXT:    pushl $0 | 
 | ; CHECK-NEXT:    calll mp_mul_d2i@PLT | 
 | ; CHECK-NEXT:    addl $12, %esp | 
 | ; CHECK-NEXT:    testl %ebp, %ebp | 
 | ; CHECK-NEXT:    je .LBB0_11 | 
 | ; CHECK-NEXT:  .LBB0_3: # %cond_true.i | 
 | ; CHECK-NEXT:    addl $4, %esp | 
 | ; CHECK-NEXT:  .LBB0_4: # %cond_true.i | 
 | ; CHECK-NEXT:    popl %esi | 
 | ; CHECK-NEXT:    popl %edi | 
 | ; CHECK-NEXT:    popl %ebx | 
 | ; CHECK-NEXT:    popl %ebp | 
 | ; CHECK-NEXT:    retl | 
 | ; CHECK-NEXT:    .p2align 4 | 
 | ; CHECK-NEXT:  .LBB0_9: # %bb.i.i | 
 | ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1 | 
 | ; CHECK-NEXT:    jmp .LBB0_9 | 
 | ; CHECK-NEXT:  .LBB0_11: # %cond_false.i | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl (%esp), %esi # 4-byte Reload | 
 | ; CHECK-NEXT:    movl %esi, %edx | 
 | ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp) | 
 | ; CHECK-NEXT:    pushl $0 | 
 | ; CHECK-NEXT:    calll mp_round@PLT | 
 | ; CHECK-NEXT:    addl $8, %esp | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl %esi, %edx | 
 | ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp | 
 | ; CHECK-NEXT:    pushl %ebp | 
 | ; CHECK-NEXT:    pushl %edi | 
 | ; CHECK-NEXT:    pushl %ebp | 
 | ; CHECK-NEXT:    calll mp_add@PLT | 
 | ; CHECK-NEXT:    addl $12, %esp | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl %esi, %edx | 
 | ; CHECK-NEXT:    pushl %edi | 
 | ; CHECK-NEXT:    pushl %edi | 
 | ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp) | 
 | ; CHECK-NEXT:    calll mp_sub@PLT | 
 | ; CHECK-NEXT:    addl $12, %esp | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl %esi, %edx | 
 | ; CHECK-NEXT:    pushl %ebp | 
 | ; CHECK-NEXT:    pushl $0 | 
 | ; CHECK-NEXT:    calll mp_round@PLT | 
 | ; CHECK-NEXT:    addl $8, %esp | 
 | ; CHECK-NEXT:    xorl %ecx, %ecx | 
 | ; CHECK-NEXT:    movl %esi, %edx | 
 | ; CHECK-NEXT:    pushl %edi | 
 | ; CHECK-NEXT:    pushl {{[0-9]+}}(%esp) | 
 | ; CHECK-NEXT:    pushl %ebx | 
 | ; CHECK-NEXT:    calll mp_mul_d2i@PLT | 
 | ; CHECK-NEXT:    addl $16, %esp | 
 | ; CHECK-NEXT:    jmp .LBB0_4 | 
 | entry: | 
 | 	br label %bb.i5 | 
 |  | 
 | bb.i5:		; preds = %bb.i5, %entry | 
 | 	%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ]		; <i32> [#uses=1] | 
 | 	%foo = phi i1 [1, %entry], [0, %bb.i5] | 
 | 	%tmp7.i3 = shl i32 %nfft_init.0.i, 1		; <i32> [#uses=2] | 
 | 	br i1 %foo, label %bb.i5, label %mp_unexp_mp2d.exit.i | 
 |  | 
 | mp_unexp_mp2d.exit.i:		; preds = %bb.i5 | 
 | 	br i1 %foo, label %cond_next.i, label %cond_true.i | 
 |  | 
 | cond_true.i:		; preds = %mp_unexp_mp2d.exit.i | 
 | 	ret void | 
 |  | 
 | cond_next.i:		; preds = %mp_unexp_mp2d.exit.i | 
 | 	%tmp22.i = sdiv i32 0, 2		; <i32> [#uses=2] | 
 | 	br i1 %foo, label %cond_true29.i, label %cond_next36.i | 
 |  | 
 | cond_true29.i:		; preds = %cond_next.i | 
 | 	ret void | 
 |  | 
 | cond_next36.i:		; preds = %cond_next.i | 
 | 	store i32 %tmp22.i, ptr null, align 4 | 
 | 	%tmp8.i14.i = select i1 %foo, i32 1, i32 0		; <i32> [#uses=1] | 
 | 	br label %bb.i28.i | 
 |  | 
 | bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i | 
 | 	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2] | 
 | 	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1] | 
 | 	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2] | 
 | 	%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix		; <i1> [#uses=1] | 
 | 	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1] | 
 | 	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0] | 
 | 	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1] | 
 | 	%tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1] | 
 | 	%tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1] | 
 | 	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=2] | 
 | 	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1] | 
 | 	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i | 
 |  | 
 | mp_unexp_d2mp.exit29.i:		; preds = %bb.i28.i | 
 | 	%tmp46.i = sub i32 0, %tmp22.i		; <i32> [#uses=1] | 
 | 	store i32 %tmp46.i, ptr null, align 4 | 
 | 	br i1 %exitcond40.i, label %bb.i.i, label %mp_sqrt_init.exit | 
 |  | 
 | bb.i.i:		; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i | 
 | 	br label %bb.i.i | 
 |  | 
 | mp_sqrt_init.exit:		; preds = %mp_unexp_d2mp.exit29.i | 
 | 	tail call fastcc void @mp_mul_csqu( i32 0, ptr %tmp1fft ) | 
 | 	tail call fastcc void @rdft( i32 0, i32 -1, ptr null, ptr %ip, ptr %w ) | 
 | 	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, ptr %tmp1fft, ptr %tmp2 ) | 
 | 	br i1 %exitcond40.i, label %cond_false.i, label %cond_true36.i | 
 |  | 
 | cond_true36.i:		; preds = %mp_sqrt_init.exit | 
 | 	ret void | 
 |  | 
 | cond_false.i:		; preds = %mp_sqrt_init.exit | 
 | 	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, ptr %out ) | 
 | 	tail call fastcc void @mp_add( i32 0, i32 %radix, ptr %tmp1, ptr %tmp2, ptr %tmp1 ) | 
 | 	tail call fastcc void @mp_sub( i32 0, i32 %radix, ptr %in, ptr %tmp2, ptr %tmp2 ) | 
 | 	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, ptr %tmp1 ) | 
 | 	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, ptr %tmp2fft, ptr %tmp2 ) | 
 | 	ret void | 
 | } | 
 |  | 
 | declare fastcc void @mp_add(i32, i32, ptr, ptr, ptr) | 
 |  | 
 | declare fastcc void @mp_sub(i32, i32, ptr, ptr, ptr) | 
 |  | 
 | declare fastcc void @mp_round(i32, i32, i32, ptr) | 
 |  | 
 | declare fastcc void @mp_mul_csqu(i32, ptr) | 
 |  | 
 | declare fastcc void @mp_mul_d2i(i32, i32, i32, ptr, ptr) |