| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,X64 |
| ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=CHECK,X86 |
| |
| ; Pin the peek-through paths in combineBTToBitOpFlag: the bit positions of |
| ; BT and the bit-modify can differ by an (and x, BW-1) mask or by trunc/zext |
| ; wrappers, and the combine should still fuse them into a single BTR/BTS/BTC. |
| |
| ; (and x, 31) on the modify side, bare on the BT side. |
| define i1 @fuse_and_mask_on_modify(ptr %word, i32 %position) nounwind { |
| ; X64-LABEL: fuse_and_mask_on_modify: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %ecx |
| ; X64-NEXT: btcl %esi, %ecx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: fuse_and_mask_on_modify: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: btcl %eax, %edx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %edx, (%ecx) |
| ; X86-NEXT: retl |
| %ofs = and i32 %position, 31 |
| %bit = shl i32 1, %ofs |
| %ld = load i32, ptr %word |
| %res = xor i32 %ld, %bit |
| %test = and i32 %ld, %bit |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; (and x, 63) on the BT side, bare on the modify side. |
| define i1 @fuse_and_mask_on_bt(ptr %word, i64 %position) nounwind { |
| ; X64-LABEL: fuse_and_mask_on_bt: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rcx |
| ; X64-NEXT: btsq %rsi, %rcx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: fuse_and_mask_on_bt: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $1, %esi |
| ; X86-NEXT: xorl %edi, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: shll %cl, %esi |
| ; X86-NEXT: testb $32, %cl |
| ; X86-NEXT: je .LBB1_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: xorl %esi, %esi |
| ; X86-NEXT: .LBB1_2: |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: andl $32, %eax |
| ; X86-NEXT: shrl $3, %eax |
| ; X86-NEXT: movl (%edx,%eax), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: orl %esi, (%edx) |
| ; X86-NEXT: orl %edi, 4(%edx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| %bit = shl i64 1, %position |
| %ld = load i64, ptr %word |
| %res = or i64 %ld, %bit |
| %ofs = and i64 %position, 63 |
| %tbit = shl i64 1, %ofs |
| %test = and i64 %ld, %tbit |
| %cmp = icmp ne i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; ZERO_EXTEND wrapper on the bit position. |
| define i1 @fuse_zext_pos(ptr %word, i32 %position) nounwind { |
| ; X64-LABEL: fuse_zext_pos: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rcx |
| ; X64-NEXT: btrq %rsi, %rcx |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: fuse_zext_pos: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl $1, %eax |
| ; X86-NEXT: xorl %esi, %esi |
| ; X86-NEXT: shldl %cl, %eax, %esi |
| ; X86-NEXT: shll %cl, %eax |
| ; X86-NEXT: testb $32, %cl |
| ; X86-NEXT: je .LBB2_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: movl %eax, %esi |
| ; X86-NEXT: movl $0, %eax |
| ; X86-NEXT: .LBB2_2: |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: notl %eax |
| ; X86-NEXT: movl (%edx), %ebx |
| ; X86-NEXT: movl 4(%edx), %edi |
| ; X86-NEXT: movl %edi, %ebp |
| ; X86-NEXT: jne .LBB2_4 |
| ; X86-NEXT: # %bb.3: |
| ; X86-NEXT: movl %ebx, %ebp |
| ; X86-NEXT: .LBB2_4: |
| ; X86-NEXT: andl %esi, %edi |
| ; X86-NEXT: andl %eax, %ebx |
| ; X86-NEXT: btl %ecx, %ebp |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: movl %ebx, (%edx) |
| ; X86-NEXT: movl %edi, 4(%edx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| %zext = zext i32 %position to i64 |
| %bit = shl i64 1, %zext |
| %mask = xor i64 %bit, -1 |
| %ld = load i64, ptr %word |
| %res = and i64 %ld, %mask |
| %test = and i64 %ld, %bit |
| %cmp = icmp eq i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; Negative: bit positions differ, BTC and BT must both remain. |
| define i1 @no_fuse_diff_pos_xor(ptr %word, i32 %a, i32 %b) nounwind { |
| ; X64-LABEL: no_fuse_diff_pos_xor: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btcl %edx, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: no_fuse_diff_pos_xor: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btcl %eax, %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| %bita = shl i32 1, %a |
| %bitb = shl i32 1, %b |
| %ld = load i32, ptr %word |
| %res = xor i32 %ld, %bitb |
| %test = and i32 %ld, %bita |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; Negative: bit positions differ, BTS and BT must both remain. |
| define i1 @no_fuse_diff_pos_or(ptr %word, i64 %a, i64 %b) nounwind { |
| ; X64-LABEL: no_fuse_diff_pos_or: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btsq %rdx, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: no_fuse_diff_pos_or: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $1, %esi |
| ; X86-NEXT: xorl %edi, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: shll %cl, %esi |
| ; X86-NEXT: testb $32, %cl |
| ; X86-NEXT: je .LBB4_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: xorl %esi, %esi |
| ; X86-NEXT: .LBB4_2: |
| ; X86-NEXT: movl (%edx), %ebx |
| ; X86-NEXT: movl 4(%edx), %ecx |
| ; X86-NEXT: orl %ecx, %edi |
| ; X86-NEXT: orl %ebx, %esi |
| ; X86-NEXT: testb $32, %al |
| ; X86-NEXT: jne .LBB4_4 |
| ; X86-NEXT: # %bb.3: |
| ; X86-NEXT: movl %ebx, %ecx |
| ; X86-NEXT: .LBB4_4: |
| ; X86-NEXT: btl %eax, %ecx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %esi, (%edx) |
| ; X86-NEXT: movl %edi, 4(%edx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| %bita = shl i64 1, %a |
| %bitb = shl i64 1, %b |
| %ld = load i64, ptr %word |
| %res = or i64 %ld, %bitb |
| %test = and i64 %ld, %bita |
| %cmp = icmp ne i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; Negative: mask on the modify side drops a low bit (bit 0 of and-30 is clear), |
| ; so peek-through must leave the masked position in place and BTC/BT remain. |
| define i1 @no_fuse_partial_mask_on_modify(ptr %word, i32 %position) nounwind { |
| ; X64-LABEL: no_fuse_partial_mask_on_modify: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: andb $30, %al |
| ; X64-NEXT: movl (%rdi), %ecx |
| ; X64-NEXT: movl %ecx, %edx |
| ; X64-NEXT: btcl %eax, %edx |
| ; X64-NEXT: andl $30, %esi |
| ; X64-NEXT: btl %esi, %ecx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %edx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: no_fuse_partial_mask_on_modify: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %eax, %edx |
| ; X86-NEXT: andb $30, %dl |
| ; X86-NEXT: movl (%ecx), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: andl $30, %eax |
| ; X86-NEXT: btl %eax, %esi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %edi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| %ofs = and i32 %position, 30 |
| %bit = shl i32 1, %ofs |
| %ld = load i32, ptr %word |
| %res = xor i32 %ld, %bit |
| %test = and i32 %ld, %bit |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; Negative: bit positions differ, BTR and BT must both remain. |
| define i1 @no_fuse_diff_pos_and(ptr %word, i64 %a, i64 %b) nounwind { |
| ; X64-LABEL: no_fuse_diff_pos_and: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btrq %rdx, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: no_fuse_diff_pos_and: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $1, %esi |
| ; X86-NEXT: xorl %edi, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: shll %cl, %esi |
| ; X86-NEXT: testb $32, %cl |
| ; X86-NEXT: je .LBB6_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: xorl %esi, %esi |
| ; X86-NEXT: .LBB6_2: |
| ; X86-NEXT: notl %edi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: movl (%edx), %ebx |
| ; X86-NEXT: movl 4(%edx), %ecx |
| ; X86-NEXT: andl %ecx, %edi |
| ; X86-NEXT: andl %ebx, %esi |
| ; X86-NEXT: testb $32, %al |
| ; X86-NEXT: jne .LBB6_4 |
| ; X86-NEXT: # %bb.3: |
| ; X86-NEXT: movl %ebx, %ecx |
| ; X86-NEXT: .LBB6_4: |
| ; X86-NEXT: btl %eax, %ecx |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: movl %esi, (%edx) |
| ; X86-NEXT: movl %edi, 4(%edx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| %bita = shl i64 1, %a |
| %bitb = shl i64 1, %b |
| %notb = xor i64 %bitb, -1 |
| %ld = load i64, ptr %word |
| %res = and i64 %ld, %notb |
| %test = and i64 %ld, %bita |
| %cmp = icmp eq i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; The bit index may be narrower than log2(bitwidth) bits (zext from i1); |
| ; the peek-through must stop there instead of asserting (PR201444). |
| define i32 @bt_index_zext_i1(i32 %c, i1 %t) nounwind { |
| ; X64-LABEL: bt_index_zext_i1: |
| ; X64: # %bb.0: |
| ; X64-NEXT: andl $1, %esi |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: btl %esi, %edi |
| ; X64-NEXT: sbbl %eax, %eax |
| ; X64-NEXT: retq |
| ; |
| ; X86-LABEL: bt_index_zext_i1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: andl $1, %edx |
| ; X86-NEXT: xorl %eax, %eax |
| ; X86-NEXT: btl %edx, %ecx |
| ; X86-NEXT: sbbl %eax, %eax |
| ; X86-NEXT: retl |
| %sh = zext i1 %t to i32 |
| %shr = lshr i32 %c, %sh |
| %and = and i32 %shr, 1 |
| %sub = sub i32 0, %and |
| ret i32 %sub |
| } |
| |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; CHECK: {{.*}} |