blob: 9d31c298bfb9ee6bcdf51ab4b696760250a3b821 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers
;
; i32 bt/btc/btr/bts + init (reference)
;
define i1 @test_eq_i32(ptr %word, i32 %position) nounwind {
; X86-LABEL: test_eq_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setae %al
; X86-NEXT: retl
;
; X64-LABEL: test_eq_i32:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setae %al
; X64-NEXT: retq
%rem = and i32 %position, 31
%bit = shl nuw i32 1, %rem
%ld = load i32, ptr %word
%test = and i32 %ld, %bit
%cmp = icmp eq i32 %test, 0
ret i1 %cmp
}
define i1 @complement_ne_i32(ptr %word, i32 %position) nounwind {
; X86-LABEL: complement_ne_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: btcl %eax, %esi
; X86-NEXT: btl %eax, %edx
; X86-NEXT: setb %al
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: complement_ne_i32:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: btcl %esi, %ecx
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setb %al
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: retq
%ofs = and i32 %position, 31
%bit = shl nuw i32 1, %ofs
%ld = load i32, ptr %word
%test = and i32 %ld, %bit
%res = xor i32 %ld, %bit
%cmp = icmp ne i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
define i1 @reset_eq_i32(ptr %word, i32 %position) nounwind {
; X86-LABEL: reset_eq_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: btrl %eax, %esi
; X86-NEXT: btl %eax, %edx
; X86-NEXT: setae %al
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: reset_eq_i32:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: btrl %esi, %ecx
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setae %al
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: retq
%ofs = and i32 %position, 31
%bit = shl nuw i32 1, %ofs
%mask = xor i32 %bit, -1
%ld = load i32, ptr %word
%test = and i32 %ld, %bit
%res = and i32 %ld, %mask
%cmp = icmp eq i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
define i1 @set_ne_i32(ptr %word, i32 %position) nounwind {
; X86-LABEL: set_ne_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: btsl %eax, %esi
; X86-NEXT: btl %eax, %edx
; X86-NEXT: setb %al
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: set_ne_i32:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: btsl %esi, %ecx
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setb %al
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: retq
%ofs = and i32 %position, 31
%bit = shl nuw i32 1, %ofs
%ld = load i32, ptr %word
%test = and i32 %ld, %bit
%res = or i32 %ld, %bit
%cmp = icmp ne i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-LABEL: init_eq_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movl (%edx), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: btrl %ecx, %edi
; X86-NEXT: orl %eax, %edi
; X86-NEXT: btl %ecx, %esi
; X86-NEXT: setae %al
; X86-NEXT: movl %edi, (%edx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; SSE-LABEL: init_eq_i32:
; SSE: # %bb.0:
; SSE-NEXT: movl %esi, %ecx
; SSE-NEXT: shll %cl, %edx
; SSE-NEXT: movl (%rdi), %eax
; SSE-NEXT: movl %eax, %esi
; SSE-NEXT: btrl %ecx, %esi
; SSE-NEXT: orl %edx, %esi
; SSE-NEXT: btl %ecx, %eax
; SSE-NEXT: setae %al
; SSE-NEXT: movl %esi, (%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: init_eq_i32:
; AVX: # %bb.0:
; AVX-NEXT: shlxl %esi, %edx, %eax
; AVX-NEXT: movl (%rdi), %ecx
; AVX-NEXT: movl %ecx, %edx
; AVX-NEXT: btrl %esi, %edx
; AVX-NEXT: orl %eax, %edx
; AVX-NEXT: btl %esi, %ecx
; AVX-NEXT: setae %al
; AVX-NEXT: movl %edx, (%rdi)
; AVX-NEXT: retq
%ofs = and i32 %position, 31
%bit = shl nuw i32 1, %ofs
%mask = xor i32 %bit, -1
%val0 = zext i1 %value to i32
%val = shl nuw i32 %val0, %ofs
%ld = load i32, ptr %word
%test = and i32 %ld, %bit
%res0 = and i32 %ld, %mask
%res = or i32 %res0, %val
%cmp = icmp eq i32 %test, 0
store i32 %res, ptr %word
ret i1 %cmp
}
;
; i64 bt/btc/btr/bts + init
;
define i1 @test_ne_i64(ptr %word, i32 %position) nounwind {
; X86-LABEL: test_ne_i64:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $32, %edx
; X86-NEXT: shrl $3, %edx
; X86-NEXT: movl (%eax,%edx), %eax
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: test_ne_i64:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: btq %rsi, %rax
; X64-NEXT: setb %al
; X64-NEXT: retq
%rem = and i32 %position, 63
%ofs = zext nneg i32 %rem to i64
%bit = shl nuw i64 1, %ofs
%ld = load i64, ptr %word
%test = and i64 %ld, %bit
%cmp = icmp ne i64 %test, 0
ret i1 %cmp
}
define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind {
; X86-LABEL: complement_ne_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $32, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: btcl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: complement_ne_i64:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: btcq %rsi, %rcx
; X64-NEXT: btq %rsi, %rax
; X64-NEXT: setb %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
%rem = and i32 %position, 63
%ofs = zext nneg i32 %rem to i64
%bit = shl nuw i64 1, %ofs
%ld = load i64, ptr %word
%test = and i64 %ld, %bit
%res = xor i64 %ld, %bit
%cmp = icmp ne i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind {
; X86-LABEL: reset_eq_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $32, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setae %al
; X86-NEXT: btrl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: reset_eq_i64:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: btrq %rsi, %rcx
; X64-NEXT: btq %rsi, %rax
; X64-NEXT: setae %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
%rem = and i32 %position, 63
%ofs = zext nneg i32 %rem to i64
%bit = shl nuw i64 1, %ofs
%mask = xor i64 %bit, -1
%ld = load i64, ptr %word
%test = and i64 %ld, %bit
%res = and i64 %ld, %mask
%cmp = icmp eq i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
define i1 @set_ne_i64(ptr %word, i32 %position) nounwind {
; X86-LABEL: set_ne_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $32, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: btsl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: set_ne_i64:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: btsq %rsi, %rcx
; X64-NEXT: btq %rsi, %rax
; X64-NEXT: setb %al
; X64-NEXT: movq %rcx, (%rdi)
; X64-NEXT: retq
%rem = and i32 %position, 63
%ofs = zext nneg i32 %rem to i64
%bit = shl nuw i64 1, %ofs
%ld = load i64, ptr %word
%test = and i64 %ld, %bit
%res = or i64 %ld, %bit
%cmp = icmp ne i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-LABEL: init_eq_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: andl $32, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%edx,%esi), %edi
; X86-NEXT: btl %ecx, %edi
; X86-NEXT: setae %al
; X86-NEXT: btrl %ecx, %edi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shll %cl, %ebx
; X86-NEXT: orl %edi, %ebx
; X86-NEXT: movl %ebx, (%edx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; SSE-LABEL: init_eq_i64:
; SSE: # %bb.0:
; SSE-NEXT: movl %esi, %ecx
; SSE-NEXT: movl %edx, %eax
; SSE-NEXT: shlq %cl, %rax
; SSE-NEXT: movq (%rdi), %rdx
; SSE-NEXT: movq %rdx, %rsi
; SSE-NEXT: btrq %rcx, %rsi
; SSE-NEXT: orq %rax, %rsi
; SSE-NEXT: btq %rcx, %rdx
; SSE-NEXT: setae %al
; SSE-NEXT: movq %rsi, (%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: init_eq_i64:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $esi killed $esi def $rsi
; AVX-NEXT: movl %edx, %eax
; AVX-NEXT: shlxq %rsi, %rax, %rax
; AVX-NEXT: movq (%rdi), %rcx
; AVX-NEXT: movq %rcx, %rdx
; AVX-NEXT: btrq %rsi, %rdx
; AVX-NEXT: orq %rax, %rdx
; AVX-NEXT: btq %rsi, %rcx
; AVX-NEXT: setae %al
; AVX-NEXT: movq %rdx, (%rdi)
; AVX-NEXT: retq
%rem = and i32 %position, 63
%ofs = zext nneg i32 %rem to i64
%bit = shl nuw i64 1, %ofs
%mask = xor i64 %bit, -1
%val0 = zext i1 %value to i64
%val = shl nuw i64 %val0, %ofs
%ld = load i64, ptr %word
%test = and i64 %ld, %bit
%res0 = and i64 %ld, %mask
%res = or i64 %res0, %val
%cmp = icmp eq i64 %test, 0
store i64 %res, ptr %word
ret i1 %cmp
}
;
; i128
;
define i1 @test_ne_i128(ptr %word, i32 %position) nounwind {
; X86-LABEL: test_ne_i128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $96, %edx
; X86-NEXT: shrl $3, %edx
; X86-NEXT: movl (%eax,%edx), %eax
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: test_ne_i128:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: andl $96, %eax
; X64-NEXT: shrl $3, %eax
; X64-NEXT: movl (%rdi,%rax), %eax
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%ld = load i128, ptr %word
%test = and i128 %ld, %bit
%cmp = icmp ne i128 %test, 0
ret i1 %cmp
}
define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind {
; X86-LABEL: complement_ne_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $96, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: btcl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: complement_ne_i128:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: andl $96, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: movl (%rdi,%rcx), %edx
; X64-NEXT: btl %esi, %edx
; X64-NEXT: setb %al
; X64-NEXT: btcl %esi, %edx
; X64-NEXT: movl %edx, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%ld = load i128, ptr %word
%test = and i128 %ld, %bit
%res = xor i128 %ld, %bit
%cmp = icmp ne i128 %test, 0
store i128 %res, ptr %word
ret i1 %cmp
}
define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind {
; X86-LABEL: reset_eq_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $96, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setae %al
; X86-NEXT: btrl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: reset_eq_i128:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: andl $96, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: movl (%rdi,%rcx), %edx
; X64-NEXT: btl %esi, %edx
; X64-NEXT: setae %al
; X64-NEXT: btrl %esi, %edx
; X64-NEXT: movl %edx, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%mask = xor i128 %bit, -1
%ld = load i128, ptr %word
%test = and i128 %ld, %bit
%res = and i128 %ld, %mask
%cmp = icmp eq i128 %test, 0
store i128 %res, ptr %word
ret i1 %cmp
}
define i1 @set_ne_i128(ptr %word, i32 %position) nounwind {
; X86-LABEL: set_ne_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $96, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: btsl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: set_ne_i128:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: andl $96, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: movl (%rdi,%rcx), %edx
; X64-NEXT: btl %esi, %edx
; X64-NEXT: setb %al
; X64-NEXT: btsl %esi, %edx
; X64-NEXT: movl %edx, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%ld = load i128, ptr %word
%test = and i128 %ld, %bit
%res = or i128 %ld, %bit
%cmp = icmp ne i128 %test, 0
store i128 %res, ptr %word
ret i1 %cmp
}
define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-LABEL: init_eq_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: andl $96, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%edx,%esi), %edi
; X86-NEXT: btl %ecx, %edi
; X86-NEXT: setae %al
; X86-NEXT: btrl %ecx, %edi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shll %cl, %ebx
; X86-NEXT: orl %edi, %ebx
; X86-NEXT: movl %ebx, (%edx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; SSE-LABEL: init_eq_i128:
; SSE: # %bb.0:
; SSE-NEXT: movl %esi, %ecx
; SSE-NEXT: andl $96, %esi
; SSE-NEXT: shrl $3, %esi
; SSE-NEXT: movl (%rdi,%rsi), %r8d
; SSE-NEXT: btl %ecx, %r8d
; SSE-NEXT: setae %al
; SSE-NEXT: shll %cl, %edx
; SSE-NEXT: btrl %ecx, %r8d
; SSE-NEXT: orl %r8d, %edx
; SSE-NEXT: movl %edx, (%rdi,%rsi)
; SSE-NEXT: retq
;
; AVX-LABEL: init_eq_i128:
; AVX: # %bb.0:
; AVX-NEXT: movl %esi, %ecx
; AVX-NEXT: andl $96, %ecx
; AVX-NEXT: shrl $3, %ecx
; AVX-NEXT: movl (%rdi,%rcx), %r8d
; AVX-NEXT: btl %esi, %r8d
; AVX-NEXT: setae %al
; AVX-NEXT: btrl %esi, %r8d
; AVX-NEXT: shlxl %esi, %edx, %edx
; AVX-NEXT: orl %r8d, %edx
; AVX-NEXT: movl %edx, (%rdi,%rcx)
; AVX-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%mask = xor i128 %bit, -1
%val0 = zext i1 %value to i128
%val = shl nuw i128 %val0, %ofs
%ld = load i128, ptr %word
%test = and i128 %ld, %bit
%res0 = and i128 %ld, %mask
%res = or i128 %res0, %val
%cmp = icmp eq i128 %test, 0
store i128 %res, ptr %word
ret i1 %cmp
}
; i512
define i1 @test_ne_i512(ptr %word, i32 %position) nounwind {
; X86-LABEL: test_ne_i512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: shrl $3, %edx
; X86-NEXT: andl $60, %edx
; X86-NEXT: movl (%eax,%edx), %eax
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: test_ne_i512:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: shrl $3, %eax
; X64-NEXT: andl $60, %eax
; X64-NEXT: movl (%rdi,%rax), %eax
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
%rem = and i32 %position, 511
%ofs = zext nneg i32 %rem to i512
%bit = shl nuw i512 1, %ofs
%ld = load i512, ptr %word
%test = and i512 %ld, %bit
%cmp = icmp ne i512 %test, 0
ret i1 %cmp
}
define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind {
; X86-LABEL: complement_ne_i512:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: andl $60, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: btcl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: complement_ne_i512:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: andl $60, %ecx
; X64-NEXT: movl (%rdi,%rcx), %edx
; X64-NEXT: btl %esi, %edx
; X64-NEXT: setb %al
; X64-NEXT: btcl %esi, %edx
; X64-NEXT: movl %edx, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 511
%ofs = zext nneg i32 %rem to i512
%bit = shl nuw i512 1, %ofs
%ld = load i512, ptr %word
%test = and i512 %ld, %bit
%res = xor i512 %ld, %bit
%cmp = icmp ne i512 %test, 0
store i512 %res, ptr %word
ret i1 %cmp
}
define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind {
; X86-LABEL: reset_eq_i512:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: andl $60, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setae %al
; X86-NEXT: btrl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: reset_eq_i512:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: andl $60, %ecx
; X64-NEXT: movl (%rdi,%rcx), %edx
; X64-NEXT: btl %esi, %edx
; X64-NEXT: setae %al
; X64-NEXT: btrl %esi, %edx
; X64-NEXT: movl %edx, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 511
%ofs = zext nneg i32 %rem to i512
%bit = shl nuw i512 1, %ofs
%mask = xor i512 %bit, -1
%ld = load i512, ptr %word
%test = and i512 %ld, %bit
%res = and i512 %ld, %mask
%cmp = icmp eq i512 %test, 0
store i512 %res, ptr %word
ret i1 %cmp
}
define i1 @set_ne_i512(ptr %word, i32 %position) nounwind {
; X86-LABEL: set_ne_i512:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: andl $60, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: btl %edx, %edi
; X86-NEXT: setb %al
; X86-NEXT: btsl %edx, %edi
; X86-NEXT: movl %edi, (%ecx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: set_ne_i512:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: andl $60, %ecx
; X64-NEXT: movl (%rdi,%rcx), %edx
; X64-NEXT: btl %esi, %edx
; X64-NEXT: setb %al
; X64-NEXT: btsl %esi, %edx
; X64-NEXT: movl %edx, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 511
%ofs = zext nneg i32 %rem to i512
%bit = shl nuw i512 1, %ofs
%ld = load i512, ptr %word
%test = and i512 %ld, %bit
%res = or i512 %ld, %bit
%cmp = icmp ne i512 %test, 0
store i512 %res, ptr %word
ret i1 %cmp
}
define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {
; X86-LABEL: init_eq_i512:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: andl $60, %esi
; X86-NEXT: movl (%edx,%esi), %edi
; X86-NEXT: btl %ecx, %edi
; X86-NEXT: setae %al
; X86-NEXT: btrl %ecx, %edi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shll %cl, %ebx
; X86-NEXT: orl %edi, %ebx
; X86-NEXT: movl %ebx, (%edx,%esi)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; SSE-LABEL: init_eq_i512:
; SSE: # %bb.0:
; SSE-NEXT: movl %esi, %ecx
; SSE-NEXT: shrl $3, %esi
; SSE-NEXT: andl $60, %esi
; SSE-NEXT: movl (%rdi,%rsi), %r8d
; SSE-NEXT: btl %ecx, %r8d
; SSE-NEXT: setae %al
; SSE-NEXT: shll %cl, %edx
; SSE-NEXT: btrl %ecx, %r8d
; SSE-NEXT: orl %r8d, %edx
; SSE-NEXT: movl %edx, (%rdi,%rsi)
; SSE-NEXT: retq
;
; AVX-LABEL: init_eq_i512:
; AVX: # %bb.0:
; AVX-NEXT: movl %esi, %ecx
; AVX-NEXT: shrl $3, %ecx
; AVX-NEXT: andl $60, %ecx
; AVX-NEXT: movl (%rdi,%rcx), %r8d
; AVX-NEXT: btl %esi, %r8d
; AVX-NEXT: setae %al
; AVX-NEXT: btrl %esi, %r8d
; AVX-NEXT: shlxl %esi, %edx, %edx
; AVX-NEXT: orl %r8d, %edx
; AVX-NEXT: movl %edx, (%rdi,%rcx)
; AVX-NEXT: retq
%rem = and i32 %position, 511
%ofs = zext nneg i32 %rem to i512
%bit = shl nuw i512 1, %ofs
%mask = xor i512 %bit, -1
%val0 = zext i1 %value to i512
%val = shl nuw i512 %val0, %ofs
%ld = load i512, ptr %word
%test = and i512 %ld, %bit
%res0 = and i512 %ld, %mask
%res = or i512 %res0, %val
%cmp = icmp eq i512 %test, 0
store i512 %res, ptr %word
ret i1 %cmp
}
; i4096
define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
; X86-LABEL: test_ne_i4096:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $4064, %edx # imm = 0xFE0
; X86-NEXT: shrl $3, %edx
; X86-NEXT: movl (%eax,%edx), %eax
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: test_ne_i4096:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: andl $4064, %eax # imm = 0xFE0
; X64-NEXT: shrl $3, %eax
; X64-NEXT: movl (%rdi,%rax), %eax
; X64-NEXT: btl %esi, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
%rem = and i32 %position, 4095
%ofs = zext nneg i32 %rem to i4096
%bit = shl nuw i4096 1, %ofs
%ld = load i4096, ptr %word
%test = and i4096 %ld, %bit
%cmp = icmp ne i4096 %test, 0
ret i1 %cmp
}
; Special Cases
; Multiple uses of the stored value
define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
; X86-LABEL: complement_cmpz_i128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: andl $96, %ecx
; X86-NEXT: shrl $3, %ecx
; X86-NEXT: xorl %edx, (%eax,%ecx)
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: orl 12(%eax), %edx
; X86-NEXT: orl 8(%eax), %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; SSE-LABEL: complement_cmpz_i128:
; SSE: # %bb.0:
; SSE-NEXT: movl %esi, %ecx
; SSE-NEXT: movl $1, %eax
; SSE-NEXT: shll %cl, %eax
; SSE-NEXT: andl $96, %ecx
; SSE-NEXT: shrl $3, %ecx
; SSE-NEXT: xorl %eax, (%rdi,%rcx)
; SSE-NEXT: movq (%rdi), %rax
; SSE-NEXT: orq 8(%rdi), %rax
; SSE-NEXT: setne %al
; SSE-NEXT: retq
;
; AVX-LABEL: complement_cmpz_i128:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $esi killed $esi def $rsi
; AVX-NEXT: movl $1, %eax
; AVX-NEXT: shlxl %esi, %eax, %eax
; AVX-NEXT: andl $96, %esi
; AVX-NEXT: shrl $3, %esi
; AVX-NEXT: xorl %eax, (%rdi,%rsi)
; AVX-NEXT: movq (%rdi), %rax
; AVX-NEXT: orq 8(%rdi), %rax
; AVX-NEXT: setne %al
; AVX-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%ld = load i128, ptr %word
%res = xor i128 %ld, %bit
store i128 %res, ptr %word
%cmp = icmp ne i128 %res, 0
ret i1 %cmp
}
; Multiple loads in store chain
define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X86-LABEL: reset_multiload_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $96, %esi
; X86-NEXT: shrl $3, %esi
; X86-NEXT: movl (%ecx,%esi), %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: btrl %edx, %ebx
; X86-NEXT: btl %edx, %edi
; X86-NEXT: movl %ebx, (%ecx,%esi)
; X86-NEXT: jae .LBB22_2
; X86-NEXT: # %bb.1:
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .LBB22_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: reset_multiload_i128:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: andl $96, %ecx
; X64-NEXT: shrl $3, %ecx
; X64-NEXT: movl (%rdi,%rcx), %r9d
; X64-NEXT: movl %r9d, %r8d
; X64-NEXT: btrl %esi, %r8d
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl %esi, %r9d
; X64-NEXT: jb .LBB22_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl (%rdx), %eax
; X64-NEXT: .LBB22_2:
; X64-NEXT: movl %r8d, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%mask = xor i128 %bit, -1
%ld = load i128, ptr %word
%sel = load i32, ptr %p
%test = and i128 %ld, %bit
%res = and i128 %ld, %mask
%cmp = icmp eq i128 %test, 0
store i128 %res, ptr %word
%ret = select i1 %cmp, i32 %sel, i32 0
ret i32 %ret
}
; Multiple uses of the store chain AND stored value
define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind {
; X86-LABEL: chain_reset_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-2, %edi
; X86-NEXT: roll %cl, %edi
; X86-NEXT: shrl $3, %ecx
; X86-NEXT: andl $28, %ecx
; X86-NEXT: andl %edi, (%esi,%ecx)
; X86-NEXT: movl 8(%esi), %ebx
; X86-NEXT: movl (%esi), %edi
; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 12(%esi), %ebp
; X86-NEXT: orl 28(%esi), %ebp
; X86-NEXT: orl 20(%esi), %ecx
; X86-NEXT: orl %ebp, %ecx
; X86-NEXT: orl 24(%esi), %ebx
; X86-NEXT: movl 16(%esi), %ebp
; X86-NEXT: orl %edi, %ebp
; X86-NEXT: orl %ebx, %ebp
; X86-NEXT: movl (%edx), %esi
; X86-NEXT: movl %edi, (%edx)
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: orl %ecx, %ebp
; X86-NEXT: jne .LBB23_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl %esi, %eax
; X86-NEXT: .LBB23_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; SSE-LABEL: chain_reset_i256:
; SSE: # %bb.0:
; SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
; SSE-NEXT: movl $-2, %eax
; SSE-NEXT: roll %cl, %eax
; SSE-NEXT: shrl $3, %ecx
; SSE-NEXT: andl $28, %ecx
; SSE-NEXT: andl %eax, (%rdi,%rcx)
; SSE-NEXT: movq (%rdi), %rcx
; SSE-NEXT: movq 8(%rdi), %r8
; SSE-NEXT: orq 24(%rdi), %r8
; SSE-NEXT: movq 16(%rdi), %rdi
; SSE-NEXT: orq %rcx, %rdi
; SSE-NEXT: movl (%rsi), %eax
; SSE-NEXT: movl %ecx, (%rsi)
; SSE-NEXT: movl (%rdx), %ecx
; SSE-NEXT: addl %ecx, %eax
; SSE-NEXT: orq %r8, %rdi
; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: chain_reset_i256:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $ecx killed $ecx def $rcx
; AVX-NEXT: movl $-2, %eax
; AVX-NEXT: roll %cl, %eax
; AVX-NEXT: shrl $3, %ecx
; AVX-NEXT: andl $28, %ecx
; AVX-NEXT: andl %eax, (%rdi,%rcx)
; AVX-NEXT: vmovdqu (%rdi), %ymm0
; AVX-NEXT: movl (%rdi), %ecx
; AVX-NEXT: movl (%rsi), %eax
; AVX-NEXT: movl %ecx, (%rsi)
; AVX-NEXT: movl (%rdx), %ecx
; AVX-NEXT: addl %ecx, %eax
; AVX-NEXT: vptest %ymm0, %ymm0
; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%rem = and i32 %position, 255
%ofs = zext nneg i32 %rem to i256
%bit = shl nuw i256 1, %ofs
%ld0 = load i256, ptr %p0
%msk = xor i256 %bit, -1
%res = and i256 %ld0, %msk
store i256 %res, ptr %p0
%cmp = icmp ne i256 %res, 0
%ld1 = load i32, ptr %p1
%trunc = trunc i256 %res to i32
store i32 %trunc, ptr %p1
%ld2 = load i32, ptr %p2
%add = add i32 %ld1, %ld2
%sel = select i1 %cmp, i32 %ld2, i32 %add
ret i32 %sel
}
; BTC/BT/BTS sequence on same i128
define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind {
; X86-LABEL: sequence_i128:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $144, %esp
; X86-NEXT: movb 20(%ebp), %ch
; X86-NEXT: movb 12(%ebp), %cl
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 56(%esp,%eax), %edx
; X86-NEXT: movl 60(%esp,%eax), %esi
; X86-NEXT: shldl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax), %edi
; X86-NEXT: movl 52(%esp,%eax), %ebx
; X86-NEXT: shldl %cl, %ebx, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl %cl, %edi, %ebx
; X86-NEXT: shll %cl, %edi
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movb %ch, %al
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 84(%esp,%eax), %edx
; X86-NEXT: movl 88(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movzbl 20(%ebp), %ecx
; X86-NEXT: shldl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 80(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 92(%esp,%eax), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: shldl %cl, %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: xorl 8(%eax), %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: xorl 12(%eax), %esi
; X86-NEXT: xorl (%eax), %edi
; X86-NEXT: xorl 4(%eax), %ebx
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl 16(%ebp), %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: andb $96, %al
; X86-NEXT: shrb $3, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 96(%esp,%eax), %eax
; X86-NEXT: movl 16(%ebp), %ecx
; X86-NEXT: btl %ecx, %eax
; X86-NEXT: setae %al
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movl %edx, 8(%ecx)
; X86-NEXT: movl %esi, 12(%ecx)
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; SSE-LABEL: sequence_i128:
; SSE: # %bb.0:
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: movl %esi, %ecx
; SSE-NEXT: movl $1, %r8d
; SSE-NEXT: xorl %esi, %esi
; SSE-NEXT: shldq %cl, %r8, %rsi
; SSE-NEXT: movl $1, %r9d
; SSE-NEXT: shlq %cl, %r9
; SSE-NEXT: xorl %r11d, %r11d
; SSE-NEXT: testb $64, %cl
; SSE-NEXT: cmovneq %r9, %rsi
; SSE-NEXT: cmovneq %r11, %r9
; SSE-NEXT: xorl %r10d, %r10d
; SSE-NEXT: movl %eax, %ecx
; SSE-NEXT: shldq %cl, %r8, %r10
; SSE-NEXT: shlq %cl, %r8
; SSE-NEXT: testb $64, %al
; SSE-NEXT: cmovneq %r8, %r10
; SSE-NEXT: cmovneq %r11, %r8
; SSE-NEXT: xorq 8(%rdi), %rsi
; SSE-NEXT: xorq (%rdi), %r9
; SSE-NEXT: movl %edx, %ecx
; SSE-NEXT: andb $32, %cl
; SSE-NEXT: movq %r9, %rax
; SSE-NEXT: shrdq %cl, %rsi, %rax
; SSE-NEXT: movq %rsi, %r11
; SSE-NEXT: shrq %cl, %r11
; SSE-NEXT: testb $64, %dl
; SSE-NEXT: cmoveq %rax, %r11
; SSE-NEXT: btl %edx, %r11d
; SSE-NEXT: setae %al
; SSE-NEXT: orq %r10, %rsi
; SSE-NEXT: orq %r8, %r9
; SSE-NEXT: movq %r9, (%rdi)
; SSE-NEXT: movq %rsi, 8(%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: sequence_i128:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: movl %esi, %ecx
; AVX2-NEXT: xorl %r9d, %r9d
; AVX2-NEXT: movl $1, %r10d
; AVX2-NEXT: xorl %esi, %esi
; AVX2-NEXT: shldq %cl, %r10, %rsi
; AVX2-NEXT: shlxq %rcx, %r10, %r8
; AVX2-NEXT: testb $64, %cl
; AVX2-NEXT: cmovneq %r8, %rsi
; AVX2-NEXT: cmovneq %r9, %r8
; AVX2-NEXT: xorl %r11d, %r11d
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shldq %cl, %r10, %r11
; AVX2-NEXT: shlxq %rax, %r10, %r10
; AVX2-NEXT: testb $64, %al
; AVX2-NEXT: cmovneq %r10, %r11
; AVX2-NEXT: cmovneq %r9, %r10
; AVX2-NEXT: xorq 8(%rdi), %rsi
; AVX2-NEXT: xorq (%rdi), %r8
; AVX2-NEXT: movl %edx, %ecx
; AVX2-NEXT: andb $32, %cl
; AVX2-NEXT: movq %r8, %rax
; AVX2-NEXT: shrdq %cl, %rsi, %rax
; AVX2-NEXT: shrxq %rcx, %rsi, %rcx
; AVX2-NEXT: testb $64, %dl
; AVX2-NEXT: cmoveq %rax, %rcx
; AVX2-NEXT: btl %edx, %ecx
; AVX2-NEXT: setae %al
; AVX2-NEXT: orq %r11, %rsi
; AVX2-NEXT: orq %r10, %r8
; AVX2-NEXT: movq %r8, (%rdi)
; AVX2-NEXT: movq %rsi, 8(%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: sequence_i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movl %ecx, %eax
; AVX512-NEXT: movl %esi, %ecx
; AVX512-NEXT: movl $1, %r9d
; AVX512-NEXT: xorl %esi, %esi
; AVX512-NEXT: shldq %cl, %r9, %rsi
; AVX512-NEXT: xorl %r10d, %r10d
; AVX512-NEXT: shlxq %rcx, %r9, %r8
; AVX512-NEXT: testb $64, %cl
; AVX512-NEXT: cmovneq %r8, %rsi
; AVX512-NEXT: cmovneq %r10, %r8
; AVX512-NEXT: xorl %r11d, %r11d
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shldq %cl, %r9, %r11
; AVX512-NEXT: shlxq %rax, %r9, %r9
; AVX512-NEXT: testb $64, %al
; AVX512-NEXT: cmovneq %r9, %r11
; AVX512-NEXT: cmovneq %r10, %r9
; AVX512-NEXT: xorq 8(%rdi), %rsi
; AVX512-NEXT: xorq (%rdi), %r8
; AVX512-NEXT: movl %edx, %ecx
; AVX512-NEXT: andb $32, %cl
; AVX512-NEXT: movq %r8, %rax
; AVX512-NEXT: shrdq %cl, %rsi, %rax
; AVX512-NEXT: shrxq %rcx, %rsi, %rcx
; AVX512-NEXT: testb $64, %dl
; AVX512-NEXT: cmoveq %rax, %rcx
; AVX512-NEXT: btl %edx, %ecx
; AVX512-NEXT: setae %al
; AVX512-NEXT: orq %r11, %rsi
; AVX512-NEXT: orq %r9, %r8
; AVX512-NEXT: movq %r8, (%rdi)
; AVX512-NEXT: movq %rsi, 8(%rdi)
; AVX512-NEXT: retq
%rem0 = and i32 %pos0, 127
%rem1 = and i32 %pos1, 127
%rem2 = and i32 %pos2, 127
%ofs0 = zext nneg i32 %rem0 to i128
%ofs1 = zext nneg i32 %rem1 to i128
%ofs2 = zext nneg i32 %rem2 to i128
%bit0 = shl nuw i128 1, %ofs0
%bit1 = shl nuw i128 1, %ofs1
%bit2 = shl nuw i128 1, %ofs2
%ld = load i128, ptr %word
%res0 = xor i128 %ld, %bit0
%test1 = and i128 %res0, %bit1
%cmp1 = icmp eq i128 %test1, 0
%res2 = or i128 %res0, %bit2
store i128 %res2, ptr %word
ret i1 %cmp1
}