| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s |
| ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -x86-cmov-converter-force-all=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FORCEALL |
| |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| ;; This test checks that x86-cmov-converter optimization transform CMOV |
| ;; instruction into branches when it is profitable. |
| ;; There are 5 cases below: |
| ;; 1. CmovInCriticalPath: |
| ;; CMOV depends on the condition and it is in the hot path. |
| ;; Thus, it worths transforming. |
| ;; |
| ;; 2. CmovNotInCriticalPath: |
| ;; Similar test like in (1), just that CMOV is not in the hot path. |
| ;; Thus, it does not worth transforming. |
| ;; |
| ;; 3. MaxIndex: |
| ;; Maximum calculation algorithm that is looking for the max index, |
| ;; calculating CMOV value is cheaper than calculating CMOV condition. |
| ;; Thus, it worths transforming. |
| ;; |
| ;; 4. MaxValue: |
| ;; Maximum calculation algorithm that is looking for the max value, |
| ;; calculating CMOV value is not cheaper than calculating CMOV condition. |
| ;; Thus, it does not worth transforming. |
| ;; |
| ;; 5. BinarySearch: |
| ;; Usually, binary search CMOV is not predicted. |
| ;; Thus, it does not worth transforming. |
| ;; |
| ;; Test was created using the following command line: |
| ;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o - |
| ;; Where foo.c is: |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| ;;void CmovInHotPath(int n, int a, int b, int *c, int *d) { |
| ;; for (int i = 0; i < n; i++) { |
| ;; int t = c[i] + 1; |
| ;; if (cptr a > b) |
| ;; t = 10; |
| ;; c[i] = (c[i] + 1) * t; |
| ;; } |
| ;;} |
| ;; |
| ;; |
| ;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) { |
| ;; for (int i = 0; i < n; i++) { |
| ;; int t = c[i]; |
| ;; if (cptr a > b) |
| ;; t = 10; |
| ;; c[i] = t; |
| ;; d[i] /= b; |
| ;; } |
| ;;} |
| ;; |
| ;; |
| ;;int MaxIndex(int n, int *a) { |
| ;; int t = 0; |
| ;; for (int i = 1; i < n; i++) { |
| ;; if (a[i] > a[t]) |
| ;; t = i; |
| ;; } |
| ;; return t; |
| ;;} |
| ;; |
| ;; |
| ;;int MaxValue(int n, int *a) { |
| ;; int t = a[0]; |
| ;; for (int i = 1; i < n; i++) { |
| ;; if (a[i] > t) |
| ;; t = a[i]; |
| ;; } |
| ;; return t; |
| ;;} |
| ;; |
| ;;typedef struct Node Node; |
| ;;struct Node { |
| ;; unsigned Val; |
| ;; Node *Right; |
| ;; Node *Left; |
| ;;}; |
| ;; |
| ;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) { |
| ;; while (Curr->Val > Next->Val) { |
| ;; Curr = Next; |
| ;; if (Mask & (0x1 << Curr->Val)) |
| ;; Next = Curr->Right; |
| ;; else |
| ;; Next = Curr->Left; |
| ;; } |
| ;; return Curr->Val; |
| ;;} |
| ;; |
| ;; |
| ;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) { |
| ;; for (int i = 0; i < n; i++) { |
| ;; int t = c[i]; |
| ;; if (cptr a > b) |
| ;; t = 10; |
| ;; c[i] = t; |
| ;; } |
| ;;} |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| |
| %struct.Node = type { i32, ptr, ptr } |
| |
| define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture readnone %d) #0 { |
| ; CHECK-LABEL: CmovInHotPath: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: testl %edi, %edi |
| ; CHECK-NEXT: jle .LBB0_5 |
| ; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-NEXT: movl %edi, %r8d |
| ; CHECK-NEXT: xorl %edi, %edi |
| ; CHECK-NEXT: .LBB0_2: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movl (%rcx,%rdi,4), %eax |
| ; CHECK-NEXT: leal 1(%rax), %r9d |
| ; CHECK-NEXT: imull %esi, %eax |
| ; CHECK-NEXT: movl $10, %r10d |
| ; CHECK-NEXT: cmpl %edx, %eax |
| ; CHECK-NEXT: jg .LBB0_4 |
| ; CHECK-NEXT: # %bb.3: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 |
| ; CHECK-NEXT: movl %r9d, %r10d |
| ; CHECK-NEXT: .LBB0_4: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 |
| ; CHECK-NEXT: imull %r9d, %r10d |
| ; CHECK-NEXT: movl %r10d, (%rcx,%rdi,4) |
| ; CHECK-NEXT: addq $1, %rdi |
| ; CHECK-NEXT: cmpq %rdi, %r8 |
| ; CHECK-NEXT: jne .LBB0_2 |
| ; CHECK-NEXT: .LBB0_5: # %for.cond.cleanup |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: CmovInHotPath: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: testl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: jle .LBB0_5 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-FORCEALL-NEXT: movl %edi, %r8d |
| ; CHECK-FORCEALL-NEXT: xorl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: .LBB0_2: # %for.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl (%rcx,%rdi,4), %eax |
| ; CHECK-FORCEALL-NEXT: leal 1(%rax), %r9d |
| ; CHECK-FORCEALL-NEXT: imull %esi, %eax |
| ; CHECK-FORCEALL-NEXT: movl $10, %r10d |
| ; CHECK-FORCEALL-NEXT: cmpl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: jg .LBB0_4 |
| ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB0_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %r9d, %r10d |
| ; CHECK-FORCEALL-NEXT: .LBB0_4: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB0_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: imull %r9d, %r10d |
| ; CHECK-FORCEALL-NEXT: movl %r10d, (%rcx,%rdi,4) |
| ; CHECK-FORCEALL-NEXT: addq $1, %rdi |
| ; CHECK-FORCEALL-NEXT: cmpq %rdi, %r8 |
| ; CHECK-FORCEALL-NEXT: jne .LBB0_2 |
| ; CHECK-FORCEALL-NEXT: .LBB0_5: # %for.cond.cleanup |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cmp14 = icmp sgt i32 %n, 0 |
| br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %n to i64 |
| br label %for.body |
| |
| for.cond.cleanup: ; preds = %for.body, %entry |
| ret void |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] |
| %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv |
| %0 = load i32, ptr %arrayidx, align 4 |
| %add = add nsw i32 %0, 1 |
| %mul = mul nsw i32 %0, %a |
| %cmp3 = icmp sgt i32 %mul, %b |
| %. = select i1 %cmp3, i32 10, i32 %add |
| %mul7 = mul nsw i32 %., %add |
| store i32 %mul7, ptr %arrayidx, align 4 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond, label %for.cond.cleanup, label %for.body |
| } |
| |
| define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture %d) #0 { |
| ; CHECK-LABEL: CmovNotInHotPath: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: testl %edi, %edi |
| ; CHECK-NEXT: jle .LBB1_3 |
| ; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-NEXT: movl %edx, %r9d |
| ; CHECK-NEXT: movl %edi, %r10d |
| ; CHECK-NEXT: xorl %edi, %edi |
| ; CHECK-NEXT: movl $10, %r11d |
| ; CHECK-NEXT: .LBB1_2: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movl (%rcx,%rdi,4), %eax |
| ; CHECK-NEXT: movl %eax, %edx |
| ; CHECK-NEXT: imull %esi, %edx |
| ; CHECK-NEXT: cmpl %r9d, %edx |
| ; CHECK-NEXT: cmovgl %r11d, %eax |
| ; CHECK-NEXT: movl %eax, (%rcx,%rdi,4) |
| ; CHECK-NEXT: movl (%r8,%rdi,4), %eax |
| ; CHECK-NEXT: cltd |
| ; CHECK-NEXT: idivl %r9d |
| ; CHECK-NEXT: movl %eax, (%r8,%rdi,4) |
| ; CHECK-NEXT: addq $1, %rdi |
| ; CHECK-NEXT: cmpq %rdi, %r10 |
| ; CHECK-NEXT: jne .LBB1_2 |
| ; CHECK-NEXT: .LBB1_3: # %for.cond.cleanup |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: CmovNotInHotPath: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: testl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: jle .LBB1_5 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-FORCEALL-NEXT: movl %edx, %r9d |
| ; CHECK-FORCEALL-NEXT: movl %edi, %r10d |
| ; CHECK-FORCEALL-NEXT: xorl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: .LBB1_2: # %for.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl (%rcx,%rdi,4), %r11d |
| ; CHECK-FORCEALL-NEXT: movl %r11d, %eax |
| ; CHECK-FORCEALL-NEXT: imull %esi, %eax |
| ; CHECK-FORCEALL-NEXT: movl $10, %edx |
| ; CHECK-FORCEALL-NEXT: cmpl %r9d, %eax |
| ; CHECK-FORCEALL-NEXT: jg .LBB1_4 |
| ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB1_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %r11d, %edx |
| ; CHECK-FORCEALL-NEXT: .LBB1_4: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB1_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %edx, (%rcx,%rdi,4) |
| ; CHECK-FORCEALL-NEXT: movl (%r8,%rdi,4), %eax |
| ; CHECK-FORCEALL-NEXT: cltd |
| ; CHECK-FORCEALL-NEXT: idivl %r9d |
| ; CHECK-FORCEALL-NEXT: movl %eax, (%r8,%rdi,4) |
| ; CHECK-FORCEALL-NEXT: addq $1, %rdi |
| ; CHECK-FORCEALL-NEXT: cmpq %rdi, %r10 |
| ; CHECK-FORCEALL-NEXT: jne .LBB1_2 |
| ; CHECK-FORCEALL-NEXT: .LBB1_5: # %for.cond.cleanup |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cmp18 = icmp sgt i32 %n, 0 |
| br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %n to i64 |
| br label %for.body |
| |
| for.cond.cleanup: ; preds = %for.body, %entry |
| ret void |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] |
| %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv |
| %0 = load i32, ptr %arrayidx, align 4 |
| %mul = mul nsw i32 %0, %a |
| %cmp3 = icmp sgt i32 %mul, %b |
| %. = select i1 %cmp3, i32 10, i32 %0 |
| store i32 %., ptr %arrayidx, align 4 |
| %arrayidx7 = getelementptr inbounds i32, ptr %d, i64 %indvars.iv |
| %1 = load i32, ptr %arrayidx7, align 4 |
| %div = sdiv i32 %1, %b |
| store i32 %div, ptr %arrayidx7, align 4 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond, label %for.cond.cleanup, label %for.body |
| } |
| |
| define i32 @MaxIndex(i32 %n, ptr nocapture readonly %a) #0 { |
| ; CHECK-LABEL: MaxIndex: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl $2, %edi |
| ; CHECK-NEXT: jl .LBB2_5 |
| ; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-NEXT: movl %edi, %r8d |
| ; CHECK-NEXT: xorl %edi, %edi |
| ; CHECK-NEXT: movl $1, %edx |
| ; CHECK-NEXT: .LBB2_2: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movl (%rsi,%rdx,4), %r9d |
| ; CHECK-NEXT: movslq %edi, %rcx |
| ; CHECK-NEXT: movl %edx, %eax |
| ; CHECK-NEXT: cmpl (%rsi,%rcx,4), %r9d |
| ; CHECK-NEXT: jg .LBB2_4 |
| ; CHECK-NEXT: # %bb.3: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: .LBB2_4: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 |
| ; CHECK-NEXT: addq $1, %rdx |
| ; CHECK-NEXT: movl %eax, %edi |
| ; CHECK-NEXT: cmpq %rdx, %r8 |
| ; CHECK-NEXT: jne .LBB2_2 |
| ; CHECK-NEXT: .LBB2_5: # %for.cond.cleanup |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: MaxIndex: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: xorl %eax, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl $2, %edi |
| ; CHECK-FORCEALL-NEXT: jl .LBB2_5 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-FORCEALL-NEXT: movl %edi, %r8d |
| ; CHECK-FORCEALL-NEXT: xorl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: movl $1, %edx |
| ; CHECK-FORCEALL-NEXT: .LBB2_2: # %for.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r9d |
| ; CHECK-FORCEALL-NEXT: movslq %edi, %rcx |
| ; CHECK-FORCEALL-NEXT: movl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rcx,4), %r9d |
| ; CHECK-FORCEALL-NEXT: jg .LBB2_4 |
| ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB2_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: .LBB2_4: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB2_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: addq $1, %rdx |
| ; CHECK-FORCEALL-NEXT: movl %eax, %edi |
| ; CHECK-FORCEALL-NEXT: cmpq %rdx, %r8 |
| ; CHECK-FORCEALL-NEXT: jne .LBB2_2 |
| ; CHECK-FORCEALL-NEXT: .LBB2_5: # %for.cond.cleanup |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cmp14 = icmp sgt i32 %n, 1 |
| br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %n to i64 |
| br label %for.body |
| |
| for.cond.cleanup: ; preds = %for.body, %entry |
| %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ] |
| ret i32 %t.0.lcssa |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] |
| %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] |
| %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv |
| %0 = load i32, ptr %arrayidx, align 4 |
| %idxprom1 = sext i32 %t.015 to i64 |
| %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1 |
| %1 = load i32, ptr %arrayidx2, align 4 |
| %cmp3 = icmp sgt i32 %0, %1 |
| %2 = trunc i64 %indvars.iv to i32 |
| %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond, label %for.cond.cleanup, label %for.body |
| } |
| |
| ; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch. |
| define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 { |
| ; CHECK-LABEL: MaxIndex_unpredictable: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl $2, %edi |
| ; CHECK-NEXT: jl .LBB3_5 |
| ; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-NEXT: movl %edi, %r8d |
| ; CHECK-NEXT: xorl %edi, %edi |
| ; CHECK-NEXT: movl $1, %edx |
| ; CHECK-NEXT: .LBB3_2: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movl (%rsi,%rdx,4), %r9d |
| ; CHECK-NEXT: movslq %edi, %rcx |
| ; CHECK-NEXT: movl %edx, %eax |
| ; CHECK-NEXT: cmpl (%rsi,%rcx,4), %r9d |
| ; CHECK-NEXT: jg .LBB3_4 |
| ; CHECK-NEXT: # %bb.3: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: .LBB3_4: # %for.body |
| ; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 |
| ; CHECK-NEXT: addq $1, %rdx |
| ; CHECK-NEXT: movl %eax, %edi |
| ; CHECK-NEXT: cmpq %rdx, %r8 |
| ; CHECK-NEXT: jne .LBB3_2 |
| ; CHECK-NEXT: .LBB3_5: # %for.cond.cleanup |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: xorl %eax, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl $2, %edi |
| ; CHECK-FORCEALL-NEXT: jl .LBB3_5 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-FORCEALL-NEXT: movl %edi, %r8d |
| ; CHECK-FORCEALL-NEXT: xorl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: movl $1, %edx |
| ; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r9d |
| ; CHECK-FORCEALL-NEXT: movslq %edi, %rcx |
| ; CHECK-FORCEALL-NEXT: movl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rcx,4), %r9d |
| ; CHECK-FORCEALL-NEXT: jg .LBB3_4 |
| ; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: .LBB3_4: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB3_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: addq $1, %rdx |
| ; CHECK-FORCEALL-NEXT: movl %eax, %edi |
| ; CHECK-FORCEALL-NEXT: cmpq %rdx, %r8 |
| ; CHECK-FORCEALL-NEXT: jne .LBB3_2 |
| ; CHECK-FORCEALL-NEXT: .LBB3_5: # %for.cond.cleanup |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cmp14 = icmp sgt i32 %n, 1 |
| br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %n to i64 |
| br label %for.body |
| |
| for.cond.cleanup: ; preds = %for.body, %entry |
| %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ] |
| ret i32 %t.0.lcssa |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] |
| %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] |
| %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv |
| %0 = load i32, ptr %arrayidx, align 4 |
| %idxprom1 = sext i32 %t.015 to i64 |
| %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1 |
| %1 = load i32, ptr %arrayidx2, align 4 |
| %cmp3 = icmp sgt i32 %0, %1 |
| %2 = trunc i64 %indvars.iv to i32 |
| %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond, label %for.cond.cleanup, label %for.body |
| } |
| |
| define i32 @MaxValue(i32 %n, ptr nocapture readonly %a) #0 { |
| ; CHECK-LABEL: MaxValue: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl (%rsi), %eax |
| ; CHECK-NEXT: cmpl $2, %edi |
| ; CHECK-NEXT: jl .LBB4_3 |
| ; CHECK-NEXT: # %bb.1: # %for.body.preheader |
| ; CHECK-NEXT: movl %edi, %ecx |
| ; CHECK-NEXT: movl $1, %edx |
| ; CHECK-NEXT: .LBB4_2: # %for.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movl (%rsi,%rdx,4), %edi |
| ; CHECK-NEXT: cmpl %eax, %edi |
| ; CHECK-NEXT: cmovgl %edi, %eax |
| ; CHECK-NEXT: addq $1, %rdx |
| ; CHECK-NEXT: cmpq %rdx, %rcx |
| ; CHECK-NEXT: jne .LBB4_2 |
| ; CHECK-NEXT: .LBB4_3: # %for.cond.cleanup |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: MaxValue: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rsi), %ecx |
| ; CHECK-FORCEALL-NEXT: cmpl $2, %edi |
| ; CHECK-FORCEALL-NEXT: jge .LBB4_3 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: |
| ; CHECK-FORCEALL-NEXT: movl %ecx, %eax |
| ; CHECK-FORCEALL-NEXT: .LBB4_2: # %for.cond.cleanup |
| ; CHECK-FORCEALL-NEXT: retq |
| ; CHECK-FORCEALL-NEXT: .LBB4_3: # %for.body.preheader |
| ; CHECK-FORCEALL-NEXT: movl %edi, %edi |
| ; CHECK-FORCEALL-NEXT: movl $1, %edx |
| ; CHECK-FORCEALL-NEXT: .LBB4_4: # %for.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %eax |
| ; CHECK-FORCEALL-NEXT: cmpl %ecx, %eax |
| ; CHECK-FORCEALL-NEXT: jg .LBB4_6 |
| ; CHECK-FORCEALL-NEXT: # %bb.5: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB4_4 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %ecx, %eax |
| ; CHECK-FORCEALL-NEXT: .LBB4_6: # %for.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB4_4 Depth=1 |
| ; CHECK-FORCEALL-NEXT: addq $1, %rdx |
| ; CHECK-FORCEALL-NEXT: movl %eax, %ecx |
| ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rdi |
| ; CHECK-FORCEALL-NEXT: je .LBB4_2 |
| ; CHECK-FORCEALL-NEXT: jmp .LBB4_4 |
| entry: |
| %0 = load i32, ptr %a, align 4 |
| %cmp13 = icmp sgt i32 %n, 1 |
| br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup |
| |
| for.body.preheader: ; preds = %entry |
| %wide.trip.count = zext i32 %n to i64 |
| br label %for.body |
| |
| for.cond.cleanup: ; preds = %for.body, %entry |
| %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ] |
| ret i32 %t.0.lcssa |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] |
| %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ] |
| %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv |
| %1 = load i32, ptr %arrayidx1, align 4 |
| %cmp2 = icmp sgt i32 %1, %t.014 |
| %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count |
| br i1 %exitcond, label %for.cond.cleanup, label %for.body |
| } |
| |
| define i32 @BinarySearch(i32 %Mask, ptr nocapture readonly %Curr, ptr nocapture readonly %Next) #0 { |
| ; CHECK-LABEL: BinarySearch: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl (%rsi), %eax |
| ; CHECK-NEXT: jmp .LBB5_2 |
| ; CHECK-NEXT: .LBB5_1: # %while.body |
| ; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 |
| ; CHECK-NEXT: movl %ecx, %eax |
| ; CHECK-NEXT: xorl %ecx, %ecx |
| ; CHECK-NEXT: btl %eax, %edi |
| ; CHECK-NEXT: setae %cl |
| ; CHECK-NEXT: movq 8(%rdx,%rcx,8), %rdx |
| ; CHECK-NEXT: .LBB5_2: # %while.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movl (%rdx), %ecx |
| ; CHECK-NEXT: cmpl %ecx, %eax |
| ; CHECK-NEXT: ja .LBB5_1 |
| ; CHECK-NEXT: # %bb.3: # %while.end |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: BinarySearch: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rsi), %eax |
| ; CHECK-FORCEALL-NEXT: jmp .LBB5_2 |
| ; CHECK-FORCEALL-NEXT: .LBB5_1: # %while.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB5_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %ecx, %eax |
| ; CHECK-FORCEALL-NEXT: xorl %ecx, %ecx |
| ; CHECK-FORCEALL-NEXT: btl %eax, %edi |
| ; CHECK-FORCEALL-NEXT: setae %cl |
| ; CHECK-FORCEALL-NEXT: movq 8(%rdx,%rcx,8), %rdx |
| ; CHECK-FORCEALL-NEXT: .LBB5_2: # %while.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl (%rdx), %ecx |
| ; CHECK-FORCEALL-NEXT: cmpl %ecx, %eax |
| ; CHECK-FORCEALL-NEXT: ja .LBB5_1 |
| ; CHECK-FORCEALL-NEXT: # %bb.3: # %while.end |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %0 = load i32, ptr %Curr, align 8 |
| %1 = load i32, ptr %Next, align 8 |
| %cmp10 = icmp ugt i32 %0, %1 |
| br i1 %cmp10, label %while.body, label %while.end |
| |
| while.body: ; preds = %entry, %while.body |
| %2 = phi i32 [ %4, %while.body ], [ %1, %entry ] |
| %Next.addr.011 = phi ptr [ %3, %while.body ], [ %Next, %entry ] |
| %shl = shl i32 1, %2 |
| %and = and i32 %shl, %Mask |
| %tobool = icmp eq i32 %and, 0 |
| %Left = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 2 |
| %Right = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 1 |
| %Left.sink = select i1 %tobool, ptr %Left, ptr %Right |
| %3 = load ptr, ptr %Left.sink, align 8 |
| %4 = load i32, ptr %3, align 8 |
| %cmp = icmp ugt i32 %2, %4 |
| br i1 %cmp, label %while.body, label %while.end |
| |
| while.end: ; preds = %while.body, %entry |
| %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ] |
| ret i32 %.lcssa |
| } |
| |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| ;; The following test checks that x86-cmov-converter optimization transforms |
| ;; CMOV instructions into branch correctly. |
| ;; |
| ;; MBB: |
| ;; cond = cmp ... |
| ;; v1 = CMOVgt t1, f1, cond |
| ;; v2 = CMOVle s1, f2, cond |
| ;; |
| ;; Where: t1 = 11, f1 = 22, f2 = a |
| ;; |
| ;; After CMOV transformation |
| ;; ------------------------- |
| ;; MBB: |
| ;; cond = cmp ... |
| ;; ja %SinkMBB |
| ;; |
| ;; FalseMBB: |
| ;; jmp %SinkMBB |
| ;; |
| ;; SinkMBB: |
| ;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] |
| ;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch |
| ;; ; true-value with false-value |
| ;; ; Phi instruction cannot use |
| ;; ; previous Phi instruction result |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| |
| define void @Transform(ptr%arr, ptr%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 { |
| ; CHECK-LABEL: Transform: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movb $1, %al |
| ; CHECK-NEXT: testb %al, %al |
| ; CHECK-NEXT: jne .LBB6_5 |
| ; CHECK-NEXT: # %bb.1: # %while.body.preheader |
| ; CHECK-NEXT: movl %edx, %r8d |
| ; CHECK-NEXT: xorl %esi, %esi |
| ; CHECK-NEXT: .LBB6_2: # %while.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: movslq %esi, %rsi |
| ; CHECK-NEXT: movl (%rdi,%rsi,4), %eax |
| ; CHECK-NEXT: xorl %edx, %edx |
| ; CHECK-NEXT: divl %r8d |
| ; CHECK-NEXT: movl %eax, %edx |
| ; CHECK-NEXT: movl $11, %eax |
| ; CHECK-NEXT: movl %r8d, %ecx |
| ; CHECK-NEXT: cmpl %r8d, %edx |
| ; CHECK-NEXT: ja .LBB6_4 |
| ; CHECK-NEXT: # %bb.3: # %while.body |
| ; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 |
| ; CHECK-NEXT: movl $22, %eax |
| ; CHECK-NEXT: movl $22, %ecx |
| ; CHECK-NEXT: .LBB6_4: # %while.body |
| ; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 |
| ; CHECK-NEXT: xorl %edx, %edx |
| ; CHECK-NEXT: divl %ecx |
| ; CHECK-NEXT: movl %edx, (%rdi,%rsi,4) |
| ; CHECK-NEXT: addl $1, %esi |
| ; CHECK-NEXT: cmpl %r9d, %esi |
| ; CHECK-NEXT: ja .LBB6_2 |
| ; CHECK-NEXT: .LBB6_5: # %while.end |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: Transform: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movb $1, %al |
| ; CHECK-FORCEALL-NEXT: testb %al, %al |
| ; CHECK-FORCEALL-NEXT: jne .LBB6_5 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %while.body.preheader |
| ; CHECK-FORCEALL-NEXT: movl %edx, %r8d |
| ; CHECK-FORCEALL-NEXT: xorl %esi, %esi |
| ; CHECK-FORCEALL-NEXT: .LBB6_2: # %while.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: movslq %esi, %rsi |
| ; CHECK-FORCEALL-NEXT: movl (%rdi,%rsi,4), %eax |
| ; CHECK-FORCEALL-NEXT: xorl %edx, %edx |
| ; CHECK-FORCEALL-NEXT: divl %r8d |
| ; CHECK-FORCEALL-NEXT: movl %eax, %edx |
| ; CHECK-FORCEALL-NEXT: movl $11, %eax |
| ; CHECK-FORCEALL-NEXT: movl %r8d, %ecx |
| ; CHECK-FORCEALL-NEXT: cmpl %r8d, %edx |
| ; CHECK-FORCEALL-NEXT: ja .LBB6_4 |
| ; CHECK-FORCEALL-NEXT: # %bb.3: # %while.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB6_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl $22, %eax |
| ; CHECK-FORCEALL-NEXT: movl $22, %ecx |
| ; CHECK-FORCEALL-NEXT: .LBB6_4: # %while.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB6_2 Depth=1 |
| ; CHECK-FORCEALL-NEXT: xorl %edx, %edx |
| ; CHECK-FORCEALL-NEXT: divl %ecx |
| ; CHECK-FORCEALL-NEXT: movl %edx, (%rdi,%rsi,4) |
| ; CHECK-FORCEALL-NEXT: addl $1, %esi |
| ; CHECK-FORCEALL-NEXT: cmpl %r9d, %esi |
| ; CHECK-FORCEALL-NEXT: ja .LBB6_2 |
| ; CHECK-FORCEALL-NEXT: .LBB6_5: # %while.end |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cmp10 = icmp ugt i32 0, %n |
| br i1 %cmp10, label %while.body, label %while.end |
| |
| while.body: ; preds = %entry, %while.body |
| %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ] |
| %arr_i = getelementptr inbounds i32, ptr %arr, i32 %i |
| %x = load i32, ptr %arr_i, align 4 |
| %div = udiv i32 %x, %a |
| %cond = icmp ugt i32 %div, %a |
| %condOpp = icmp ule i32 %div, %a |
| %s1 = select i1 %cond, i32 11, i32 22 |
| %s2 = select i1 %condOpp, i32 %s1, i32 %a |
| %sum = urem i32 %s1, %s2 |
| store i32 %sum, ptr %arr_i, align 4 |
| %i_inc = add i32 %i, 1 |
| %cmp = icmp ugt i32 %i_inc, %n |
| br i1 %cmp, label %while.body, label %while.end |
| |
| while.end: ; preds = %while.body, %entry |
| ret void |
| } |
| |
| ; Test that we always will convert a cmov with a memory operand into a branch, |
| ; even outside of a loop. |
| define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, ptr %y) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edx, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: ja .LBB7_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movl (%rcx), %eax |
| ; CHECK-NEXT: .LBB7_2: # %entry |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: ja .LBB7_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax |
| ; CHECK-FORCEALL-NEXT: .LBB7_2: # %entry |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %load = load i32, ptr %y |
| %z = select i1 %cond, i32 %x, i32 %load |
| ret i32 %z |
| } |
| |
| ; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch. |
| define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_unpredictable: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edx, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: ja .LBB8_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movl (%rcx), %eax |
| ; CHECK-NEXT: .LBB8_2: # %entry |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: ja .LBB8_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax |
| ; CHECK-FORCEALL-NEXT: .LBB8_2: # %entry |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %load = load i32, ptr %y |
| %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0 |
| ret i32 %z |
| } |
| |
| ; Test that we can convert a group of cmovs where only one has a memory |
| ; operand. |
| define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_in_group: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edx, %eax |
| ; CHECK-NEXT: movl %edx, %r8d |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: ja .LBB9_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movl (%rcx), %r8d |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: movl %esi, %edx |
| ; CHECK-NEXT: .LBB9_2: # %entry |
| ; CHECK-NEXT: addl %r8d, %eax |
| ; CHECK-NEXT: addl %edx, %eax |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: movl %edx, %r8d |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: ja .LBB9_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %r8d |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: movl %esi, %edx |
| ; CHECK-FORCEALL-NEXT: .LBB9_2: # %entry |
| ; CHECK-FORCEALL-NEXT: addl %r8d, %eax |
| ; CHECK-FORCEALL-NEXT: addl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %y = load i32, ptr %y.ptr |
| %z1 = select i1 %cond, i32 %x, i32 %a |
| %z2 = select i1 %cond, i32 %x, i32 %y |
| %z3 = select i1 %cond, i32 %x, i32 %b |
| %s1 = add i32 %z1, %z2 |
| %s2 = add i32 %s1, %z3 |
| ret i32 %s2 |
| } |
| |
| ; Same as before but with operands reversed in the select with a load. |
| define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_in_group2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edx, %eax |
| ; CHECK-NEXT: movl %edx, %r8d |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: jbe .LBB10_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movl (%rcx), %r8d |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: movl %esi, %edx |
| ; CHECK-NEXT: .LBB10_2: # %entry |
| ; CHECK-NEXT: addl %r8d, %eax |
| ; CHECK-NEXT: addl %edx, %eax |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: movl %edx, %r8d |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: jbe .LBB10_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %r8d |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: movl %esi, %edx |
| ; CHECK-FORCEALL-NEXT: .LBB10_2: # %entry |
| ; CHECK-FORCEALL-NEXT: addl %r8d, %eax |
| ; CHECK-FORCEALL-NEXT: addl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %y = load i32, ptr %y.ptr |
| %z2 = select i1 %cond, i32 %a, i32 %x |
| %z1 = select i1 %cond, i32 %y, i32 %x |
| %z3 = select i1 %cond, i32 %b, i32 %x |
| %s1 = add i32 %z1, %z2 |
| %s2 = add i32 %s1, %z3 |
| ret i32 %s2 |
| } |
| |
| ; Test that we don't convert a group of cmovs with conflicting directions of |
| ; loads. |
| define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, ptr %y1.ptr, ptr %y2.ptr) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: movl (%rcx), %eax |
| ; CHECK-NEXT: cmoval %edx, %eax |
| ; CHECK-NEXT: cmoval (%r8), %edx |
| ; CHECK-NEXT: addl %edx, %eax |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_conflicting_dir: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax |
| ; CHECK-FORCEALL-NEXT: cmoval %edx, %eax |
| ; CHECK-FORCEALL-NEXT: cmoval (%r8), %edx |
| ; CHECK-FORCEALL-NEXT: addl %edx, %eax |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %y1 = load i32, ptr %y1.ptr |
| %y2 = load i32, ptr %y2.ptr |
| %z1 = select i1 %cond, i32 %x, i32 %y1 |
| %z2 = select i1 %cond, i32 %y2, i32 %x |
| %s1 = add i32 %z1, %z2 |
| ret i32 %s1 |
| } |
| |
| ; Test that we can convert a group of cmovs where only one has a memory |
| ; operand and where that memory operand's registers come from a prior cmov in |
| ; the group. |
| define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, ptr %x, ptr %y) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: ja .LBB12_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movl (%rcx), %eax |
| ; CHECK-NEXT: .LBB12_2: # %entry |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: ja .LBB12_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax |
| ; CHECK-FORCEALL-NEXT: .LBB12_2: # %entry |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %p = select i1 %cond, ptr %x, ptr %y |
| %load = load i32, ptr %p |
| %z = select i1 %cond, i32 %a, i32 %load |
| ret i32 %z |
| } |
| |
| ; Test that we can convert a group of two cmovs with memory operands where one |
| ; uses the result of the other as part of the address. |
| define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, ptr %x, ptr %y) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: ja .LBB13_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movq (%rcx), %rax |
| ; CHECK-NEXT: movl (%rax), %eax |
| ; CHECK-NEXT: .LBB13_2: # %entry |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: ja .LBB13_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movq (%rcx), %rax |
| ; CHECK-FORCEALL-NEXT: movl (%rax), %eax |
| ; CHECK-FORCEALL-NEXT: .LBB13_2: # %entry |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %load1 = load ptr, ptr %y |
| %p = select i1 %cond, ptr %x, ptr %load1 |
| %load2 = load i32, ptr %p |
| %z = select i1 %cond, i32 %a, i32 %load2 |
| ret i32 %z |
| } |
| |
| ; Test that we can convert a group of cmovs where only one has a memory |
| ; operand and where that memory operand's registers come from a prior cmov and |
| ; where that cmov gets *its* input from a prior cmov in the group. |
| define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, ptr %x, ptr %y, ptr %z) #0 { |
| ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: ja .LBB14_2 |
| ; CHECK-NEXT: # %bb.1: # %entry |
| ; CHECK-NEXT: movl (%rcx), %eax |
| ; CHECK-NEXT: .LBB14_2: # %entry |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movl %edi, %eax |
| ; CHECK-FORCEALL-NEXT: cmpl %esi, %edi |
| ; CHECK-FORCEALL-NEXT: ja .LBB14_2 |
| ; CHECK-FORCEALL-NEXT: # %bb.1: # %entry |
| ; CHECK-FORCEALL-NEXT: movl (%rcx), %eax |
| ; CHECK-FORCEALL-NEXT: .LBB14_2: # %entry |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %cond = icmp ugt i32 %a, %b |
| %p = select i1 %cond, ptr %x, ptr %y |
| %p2 = select i1 %cond, ptr %z, ptr %p |
| %load = load i32, ptr %p2 |
| %r = select i1 %cond, i32 %a, i32 %load |
| ret i32 %r |
| } |
| |
| @begin = external global ptr |
| @end = external global ptr |
| |
| define void @test_memoperand_loop(i32 %data) #0 { |
| ; CHECK-LABEL: test_memoperand_loop: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movq begin@GOTPCREL(%rip), %r8 |
| ; CHECK-NEXT: movq (%r8), %rax |
| ; CHECK-NEXT: movq end@GOTPCREL(%rip), %rcx |
| ; CHECK-NEXT: movq (%rcx), %rdx |
| ; CHECK-NEXT: xorl %esi, %esi |
| ; CHECK-NEXT: movq %rax, %rcx |
| ; CHECK-NEXT: .LBB15_1: # %loop.body |
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: addq $8, %rcx |
| ; CHECK-NEXT: cmpq %rdx, %rcx |
| ; CHECK-NEXT: ja .LBB15_3 |
| ; CHECK-NEXT: # %bb.2: # %loop.body |
| ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-NEXT: movq (%r8), %rcx |
| ; CHECK-NEXT: .LBB15_3: # %loop.body |
| ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-NEXT: movl %edi, (%rcx) |
| ; CHECK-NEXT: addq $8, %rcx |
| ; CHECK-NEXT: cmpq %rdx, %rcx |
| ; CHECK-NEXT: ja .LBB15_5 |
| ; CHECK-NEXT: # %bb.4: # %loop.body |
| ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-NEXT: movq %rax, %rcx |
| ; CHECK-NEXT: .LBB15_5: # %loop.body |
| ; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-NEXT: movl %edi, (%rcx) |
| ; CHECK-NEXT: addl $1, %esi |
| ; CHECK-NEXT: cmpl $1024, %esi # imm = 0x400 |
| ; CHECK-NEXT: jl .LBB15_1 |
| ; CHECK-NEXT: # %bb.6: # %exit |
| ; CHECK-NEXT: retq |
| ; |
| ; CHECK-FORCEALL-LABEL: test_memoperand_loop: |
| ; CHECK-FORCEALL: # %bb.0: # %entry |
| ; CHECK-FORCEALL-NEXT: movq begin@GOTPCREL(%rip), %r8 |
| ; CHECK-FORCEALL-NEXT: movq (%r8), %rax |
| ; CHECK-FORCEALL-NEXT: movq end@GOTPCREL(%rip), %rcx |
| ; CHECK-FORCEALL-NEXT: movq (%rcx), %rdx |
| ; CHECK-FORCEALL-NEXT: xorl %esi, %esi |
| ; CHECK-FORCEALL-NEXT: movq %rax, %rcx |
| ; CHECK-FORCEALL-NEXT: .LBB15_1: # %loop.body |
| ; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; CHECK-FORCEALL-NEXT: addq $8, %rcx |
| ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx |
| ; CHECK-FORCEALL-NEXT: ja .LBB15_3 |
| ; CHECK-FORCEALL-NEXT: # %bb.2: # %loop.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movq (%r8), %rcx |
| ; CHECK-FORCEALL-NEXT: .LBB15_3: # %loop.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %edi, (%rcx) |
| ; CHECK-FORCEALL-NEXT: addq $8, %rcx |
| ; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx |
| ; CHECK-FORCEALL-NEXT: ja .LBB15_5 |
| ; CHECK-FORCEALL-NEXT: # %bb.4: # %loop.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movq %rax, %rcx |
| ; CHECK-FORCEALL-NEXT: .LBB15_5: # %loop.body |
| ; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 |
| ; CHECK-FORCEALL-NEXT: movl %edi, (%rcx) |
| ; CHECK-FORCEALL-NEXT: addl $1, %esi |
| ; CHECK-FORCEALL-NEXT: cmpl $1024, %esi # imm = 0x400 |
| ; CHECK-FORCEALL-NEXT: jl .LBB15_1 |
| ; CHECK-FORCEALL-NEXT: # %bb.6: # %exit |
| ; CHECK-FORCEALL-NEXT: retq |
| entry: |
| %begin = load ptr, ptr @begin, align 8 |
| %end = load ptr, ptr @end, align 8 |
| br label %loop.body |
| loop.body: |
| %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] |
| %phi.ptr = phi ptr [ %begin, %entry ], [ %dst2, %loop.body ] |
| %gep1 = getelementptr inbounds i32, ptr%phi.ptr, i64 2 |
| %cmp1 = icmp ugt ptr %gep1, %end |
| %begin_dup = load ptr, ptr @begin, align 8 |
| %dst1 = select i1 %cmp1, ptr %gep1, ptr %begin_dup |
| store i32 %data, ptr%dst1, align 4 |
| %gep2 = getelementptr inbounds i32, ptr%dst1, i64 2 |
| %cmp2 = icmp ugt ptr %gep2, %end |
| %dst2 = select i1 %cmp2, ptr %gep2, ptr %begin |
| store i32 %data, ptr%dst2, align 4 |
| %iv.next = add i32 %phi.iv, 1 |
| %cond = icmp slt i32 %iv.next, 1024 |
| br i1 %cond, label %loop.body, label %exit |
| exit: |
| ret void |
| } |
| |
| attributes #0 = {"target-cpu"="x86-64" "tune-cpu"="x86-64"} |
| !0 = !{} |