blob: 9d2eb234d6b611646adeb578122baf5b793994e6 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,X64
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=CHECK,X86
; Pin the peek-through paths in combineBTToBitOpFlag: the bit positions of
; BT and the bit-modify can differ by an (and x, BW-1) mask or by trunc/zext
; wrappers, and the combine should still fuse them into a single BTR/BTS/BTC.
; (and x, 31) on the modify side, bare on the BT side.
define i1 @fuse_and_mask_on_modify(ptr %word, i32 %position) nounwind {
; X64-LABEL: fuse_and_mask_on_modify:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: btcl %esi, %ecx
; X64-NEXT: setb %al
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: fuse_and_mask_on_modify:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btcl %eax, %edx
; X86-NEXT: setb %al
; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: retl
%ofs = and i32 %position, 31
%bit = shl i32 1, %ofs
%ld = load i32, ptr %word
%res = xor i32 %ld, %bit
%test = and i32 %ld, %bit
%cmp = icmp ne i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
; (and x, 63) on the BT side, bare on the modify side.
define i1 @fuse_and_mask_on_bt(ptr %word, i64 %position) nounwind {
; X64-LABEL: fuse_and_mask_on_bt:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: btsq %rsi, %rcx
; X64-NEXT: setb %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: fuse_and_mask_on_bt:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %esi
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: shll %cl, %esi
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %esi, %edi
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: .LBB1_2:
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andl $32, %eax
; X86-NEXT: shrl $3, %eax
; X86-NEXT: movl (%edx,%eax), %eax
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setb %al
; X86-NEXT: orl %esi, (%edx)
; X86-NEXT: orl %edi, 4(%edx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
%bit = shl i64 1, %position
%ld = load i64, ptr %word
%res = or i64 %ld, %bit
%ofs = and i64 %position, 63
%tbit = shl i64 1, %ofs
%test = and i64 %ld, %tbit
%cmp = icmp ne i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
; ZERO_EXTEND wrapper on the bit position.
define i1 @fuse_zext_pos(ptr %word, i32 %position) nounwind {
; X64-LABEL: fuse_zext_pos:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: btrq %rsi, %rcx
; X64-NEXT: setae %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: fuse_zext_pos:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: shldl %cl, %eax, %esi
; X86-NEXT: shll %cl, %eax
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB2_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %eax, %esi
; X86-NEXT: movl $0, %eax
; X86-NEXT: .LBB2_2:
; X86-NEXT: notl %esi
; X86-NEXT: notl %eax
; X86-NEXT: movl (%edx), %ebx
; X86-NEXT: movl 4(%edx), %edi
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: jne .LBB2_4
; X86-NEXT: # %bb.3:
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: .LBB2_4:
; X86-NEXT: andl %esi, %edi
; X86-NEXT: andl %eax, %ebx
; X86-NEXT: btl %ecx, %ebp
; X86-NEXT: setae %al
; X86-NEXT: movl %ebx, (%edx)
; X86-NEXT: movl %edi, 4(%edx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%zext = zext i32 %position to i64
%bit = shl i64 1, %zext
%mask = xor i64 %bit, -1
%ld = load i64, ptr %word
%res = and i64 %ld, %mask
%test = and i64 %ld, %bit
%cmp = icmp eq i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
; Negative: bit positions differ, BTC and BT must both remain.
define i1 @no_fuse_diff_pos_xor(ptr %word, i32 %a, i32 %b) nounwind {
; X64-LABEL: no_fuse_diff_pos_xor:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: btcl %edx, %ecx
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setb %al
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: no_fuse_diff_pos_xor:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: btcl %eax, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btl %eax, %edx
; X86-NEXT: setb %al
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: retl
%bita = shl i32 1, %a
%bitb = shl i32 1, %b
%ld = load i32, ptr %word
%res = xor i32 %ld, %bitb
%test = and i32 %ld, %bita
%cmp = icmp ne i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
; Negative: bit positions differ, BTS and BT must both remain.
define i1 @no_fuse_diff_pos_or(ptr %word, i64 %a, i64 %b) nounwind {
; X64-LABEL: no_fuse_diff_pos_or:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: btsq %rdx, %rcx
; X64-NEXT: btq %rsi, %rax
; X64-NEXT: setb %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: no_fuse_diff_pos_or:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %esi
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: shll %cl, %esi
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB4_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %esi, %edi
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: .LBB4_2:
; X86-NEXT: movl (%edx), %ebx
; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: orl %ecx, %edi
; X86-NEXT: orl %ebx, %esi
; X86-NEXT: testb $32, %al
; X86-NEXT: jne .LBB4_4
; X86-NEXT: # %bb.3:
; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: .LBB4_4:
; X86-NEXT: btl %eax, %ecx
; X86-NEXT: setb %al
; X86-NEXT: movl %esi, (%edx)
; X86-NEXT: movl %edi, 4(%edx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
%bita = shl i64 1, %a
%bitb = shl i64 1, %b
%ld = load i64, ptr %word
%res = or i64 %ld, %bitb
%test = and i64 %ld, %bita
%cmp = icmp ne i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
; Negative: mask on the modify side drops a low bit (bit 0 of and-30 is clear),
; so peek-through must leave the masked position in place and BTC/BT remain.
define i1 @no_fuse_partial_mask_on_modify(ptr %word, i32 %position) nounwind {
; X64-LABEL: no_fuse_partial_mask_on_modify:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: andb $30, %al
; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: btcl %eax, %edx
; X64-NEXT: andl $30, %esi
; X64-NEXT: btl %esi, %ecx
; X64-NEXT: setb %al
; X64-NEXT: movl %edx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: no_fuse_partial_mask_on_modify:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: andb $30, %dl
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: btcl %edx, %edi
; X86-NEXT: andl $30, %eax
; X86-NEXT: btl %eax, %esi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
%ofs = and i32 %position, 30
%bit = shl i32 1, %ofs
%ld = load i32, ptr %word
%res = xor i32 %ld, %bit
%test = and i32 %ld, %bit
%cmp = icmp ne i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
; Negative: bit positions differ, BTR and BT must both remain.
define i1 @no_fuse_diff_pos_and(ptr %word, i64 %a, i64 %b) nounwind {
; X64-LABEL: no_fuse_diff_pos_and:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: btrq %rdx, %rcx
; X64-NEXT: btq %rsi, %rax
; X64-NEXT: setae %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: no_fuse_diff_pos_and:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %esi
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: shll %cl, %esi
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB6_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %esi, %edi
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: .LBB6_2:
; X86-NEXT: notl %edi
; X86-NEXT: notl %esi
; X86-NEXT: movl (%edx), %ebx
; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: andl %ecx, %edi
; X86-NEXT: andl %ebx, %esi
; X86-NEXT: testb $32, %al
; X86-NEXT: jne .LBB6_4
; X86-NEXT: # %bb.3:
; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: .LBB6_4:
; X86-NEXT: btl %eax, %ecx
; X86-NEXT: setae %al
; X86-NEXT: movl %esi, (%edx)
; X86-NEXT: movl %edi, 4(%edx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
%bita = shl i64 1, %a
%bitb = shl i64 1, %b
%notb = xor i64 %bitb, -1
%ld = load i64, ptr %word
%res = and i64 %ld, %notb
%test = and i64 %ld, %bita
%cmp = icmp eq i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
; The bit index may be narrower than log2(bitwidth) bits (zext from i1);
; the peek-through must stop there instead of asserting (PR201444).
define i32 @bt_index_zext_i1(i32 %c, i1 %t) nounwind {
; X64-LABEL: bt_index_zext_i1:
; X64: # %bb.0:
; X64-NEXT: andl $1, %esi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl %esi, %edi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
;
; X86-LABEL: bt_index_zext_i1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: andl $1, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: btl %edx, %ecx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: retl
%sh = zext i1 %t to i32
%shr = lshr i32 %c, %sh
%and = and i32 %shr, 1
%sub = sub i32 0, %and
ret i32 %sub
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}