| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 |
| |
| ; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers |
| |
| ; |
| ; i32 bt/btc/btr/bts + init (reference) |
| ; |
| |
| define i1 @test_eq_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_eq_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl (%eax), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_eq_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 31 |
| %bit = shl nuw i32 1, %rem |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %cmp = icmp eq i32 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btcl %eax, %esi |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btcl %esi, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res = xor i32 %ld, %bit |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btrl %eax, %esi |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btrl %esi, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %mask = xor i32 %bit, -1 |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res = and i32 %ld, %mask |
| %cmp = icmp eq i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btsl %eax, %esi |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btsl %esi, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res = or i32 %ld, %bit |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: shll %cl, %eax |
| ; X86-NEXT: movl (%edx), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: orl %eax, %edi |
| ; X86-NEXT: btl %ecx, %esi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: movl %edi, (%edx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: shll %cl, %edx |
| ; SSE-NEXT: movl (%rdi), %eax |
| ; SSE-NEXT: movl %eax, %esi |
| ; SSE-NEXT: btrl %ecx, %esi |
| ; SSE-NEXT: orl %edx, %esi |
| ; SSE-NEXT: btl %ecx, %eax |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: movl %esi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: shlxl %esi, %edx, %eax |
| ; AVX-NEXT: movl (%rdi), %ecx |
| ; AVX-NEXT: movl %ecx, %edx |
| ; AVX-NEXT: btrl %esi, %edx |
| ; AVX-NEXT: orl %eax, %edx |
| ; AVX-NEXT: btl %esi, %ecx |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: movl %edx, (%rdi) |
| ; AVX-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %mask = xor i32 %bit, -1 |
| %val0 = zext i1 %value to i32 |
| %val = shl nuw i32 %val0, %ofs |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res0 = and i32 %ld, %mask |
| %res = or i32 %res0, %val |
| %cmp = icmp eq i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; |
| ; i64 bt/btc/btr/bts + init |
| ; |
| |
| define i1 @test_ne_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $32, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %cmp = icmp ne i64 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btcq %rsi, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res = xor i64 %ld, %bit |
| %cmp = icmp ne i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btrq %rsi, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %mask = xor i64 %bit, -1 |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res = and i64 %ld, %mask |
| %cmp = icmp eq i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btsl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btsq %rsi, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res = or i64 %ld, %bit |
| %cmp = icmp ne i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%edx,%esi), %edi |
| ; X86-NEXT: btl %ecx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shll %cl, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: movl %ebx, (%edx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: movl %edx, %eax |
| ; SSE-NEXT: shlq %cl, %rax |
| ; SSE-NEXT: movq (%rdi), %rdx |
| ; SSE-NEXT: movq %rdx, %rsi |
| ; SSE-NEXT: btrq %rcx, %rsi |
| ; SSE-NEXT: orq %rax, %rsi |
| ; SSE-NEXT: btq %rcx, %rdx |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $esi killed $esi def $rsi |
| ; AVX-NEXT: movl %edx, %eax |
| ; AVX-NEXT: shlxq %rsi, %rax, %rax |
| ; AVX-NEXT: movq (%rdi), %rcx |
| ; AVX-NEXT: movq %rcx, %rdx |
| ; AVX-NEXT: btrq %rsi, %rdx |
| ; AVX-NEXT: orq %rax, %rdx |
| ; AVX-NEXT: btq %rsi, %rcx |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: movq %rdx, (%rdi) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %mask = xor i64 %bit, -1 |
| %val0 = zext i1 %value to i64 |
| %val = shl nuw i64 %val0, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res0 = and i64 %ld, %mask |
| %res = or i64 %res0, %val |
| %cmp = icmp eq i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; |
| ; i128 |
| ; |
| |
| define i1 @test_ne_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $96, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: andl $96, %eax |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: movl (%rdi,%rax), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %cmp = icmp ne i128 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btcl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res = xor i128 %ld, %bit |
| %cmp = icmp ne i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: btrl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %mask = xor i128 %bit, -1 |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res = and i128 %ld, %mask |
| %cmp = icmp eq i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btsl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btsl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res = or i128 %ld, %bit |
| %cmp = icmp ne i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%edx,%esi), %edi |
| ; X86-NEXT: btl %ecx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shll %cl, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: movl %ebx, (%edx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i128: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $96, %esi |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: movl (%rdi,%rsi), %r8d |
| ; SSE-NEXT: btl %ecx, %r8d |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: shll %cl, %edx |
| ; SSE-NEXT: btrl %ecx, %r8d |
| ; SSE-NEXT: orl %r8d, %edx |
| ; SSE-NEXT: movl %edx, (%rdi,%rsi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i128: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movl %esi, %ecx |
| ; AVX-NEXT: andl $96, %ecx |
| ; AVX-NEXT: shrl $3, %ecx |
| ; AVX-NEXT: movl (%rdi,%rcx), %r8d |
| ; AVX-NEXT: btl %esi, %r8d |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: btrl %esi, %r8d |
| ; AVX-NEXT: shlxl %esi, %edx, %edx |
| ; AVX-NEXT: orl %r8d, %edx |
| ; AVX-NEXT: movl %edx, (%rdi,%rcx) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %mask = xor i128 %bit, -1 |
| %val0 = zext i1 %value to i128 |
| %val = shl nuw i128 %val0, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res0 = and i128 %ld, %mask |
| %res = or i128 %res0, %val |
| %cmp = icmp eq i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; i512 |
| |
| define i1 @test_ne_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: andl $60, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: andl $60, %eax |
| ; X64-NEXT: movl (%rdi,%rax), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %cmp = icmp ne i512 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: andl $60, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btcl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res = xor i512 %ld, %bit |
| %cmp = icmp ne i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: andl $60, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: btrl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %mask = xor i512 %bit, -1 |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res = and i512 %ld, %mask |
| %cmp = icmp eq i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btsl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: andl $60, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btsl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res = or i512 %ld, %bit |
| %cmp = icmp ne i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%edx,%esi), %edi |
| ; X86-NEXT: btl %ecx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shll %cl, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: movl %ebx, (%edx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $60, %esi |
| ; SSE-NEXT: movl (%rdi,%rsi), %r8d |
| ; SSE-NEXT: btl %ecx, %r8d |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: shll %cl, %edx |
| ; SSE-NEXT: btrl %ecx, %r8d |
| ; SSE-NEXT: orl %r8d, %edx |
| ; SSE-NEXT: movl %edx, (%rdi,%rsi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i512: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movl %esi, %ecx |
| ; AVX-NEXT: shrl $3, %ecx |
| ; AVX-NEXT: andl $60, %ecx |
| ; AVX-NEXT: movl (%rdi,%rcx), %r8d |
| ; AVX-NEXT: btl %esi, %r8d |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: btrl %esi, %r8d |
| ; AVX-NEXT: shlxl %esi, %edx, %edx |
| ; AVX-NEXT: orl %r8d, %edx |
| ; AVX-NEXT: movl %edx, (%rdi,%rcx) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %mask = xor i512 %bit, -1 |
| %val0 = zext i1 %value to i512 |
| %val = shl nuw i512 %val0, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res0 = and i512 %ld, %mask |
| %res = or i512 %res0, %val |
| %cmp = icmp eq i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; i4096 |
| |
| define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i4096: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $4064, %edx # imm = 0xFE0 |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i4096: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: andl $4064, %eax # imm = 0xFE0 |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: movl (%rdi,%rax), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 4095 |
| %ofs = zext nneg i32 %rem to i4096 |
| %bit = shl nuw i4096 1, %ofs |
| %ld = load i4096, ptr %word |
| %test = and i4096 %ld, %bit |
| %cmp = icmp ne i4096 %test, 0 |
| ret i1 %cmp |
| } |
| |
| ; Special Cases |
| |
| ; Multiple uses of the stored value |
| define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_cmpz_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $1, %edx |
| ; X86-NEXT: shll %cl, %edx |
| ; X86-NEXT: andl $96, %ecx |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: xorl %edx, (%eax,%ecx) |
| ; X86-NEXT: movl (%eax), %ecx |
| ; X86-NEXT: movl 4(%eax), %edx |
| ; X86-NEXT: orl 12(%eax), %edx |
| ; X86-NEXT: orl 8(%eax), %ecx |
| ; X86-NEXT: orl %edx, %ecx |
| ; X86-NEXT: setne %al |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: complement_cmpz_i128: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: movl $1, %eax |
| ; SSE-NEXT: shll %cl, %eax |
| ; SSE-NEXT: andl $96, %ecx |
| ; SSE-NEXT: shrl $3, %ecx |
| ; SSE-NEXT: xorl %eax, (%rdi,%rcx) |
| ; SSE-NEXT: movq (%rdi), %rax |
| ; SSE-NEXT: orq 8(%rdi), %rax |
| ; SSE-NEXT: setne %al |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: complement_cmpz_i128: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $esi killed $esi def $rsi |
| ; AVX-NEXT: movl $1, %eax |
| ; AVX-NEXT: shlxl %esi, %eax, %eax |
| ; AVX-NEXT: andl $96, %esi |
| ; AVX-NEXT: shrl $3, %esi |
| ; AVX-NEXT: xorl %eax, (%rdi,%rsi) |
| ; AVX-NEXT: movq (%rdi), %rax |
| ; AVX-NEXT: orq 8(%rdi), %rax |
| ; AVX-NEXT: setne %al |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %res = xor i128 %ld, %bit |
| store i128 %res, ptr %word |
| %cmp = icmp ne i128 %res, 0 |
| ret i1 %cmp |
| } |
| |
| ; Multiple loads in store chain |
| define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { |
| ; X86-LABEL: reset_multiload_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl (%eax), %eax |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: movl %edi, %ebx |
| ; X86-NEXT: btrl %edx, %ebx |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: movl %ebx, (%ecx,%esi) |
| ; X86-NEXT: jae .LBB22_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: xorl %eax, %eax |
| ; X86-NEXT: .LBB22_2: |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_multiload_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %r9d |
| ; X64-NEXT: movl %r9d, %r8d |
| ; X64-NEXT: btrl %esi, %r8d |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: btl %esi, %r9d |
| ; X64-NEXT: jb .LBB22_2 |
| ; X64-NEXT: # %bb.1: |
| ; X64-NEXT: movl (%rdx), %eax |
| ; X64-NEXT: .LBB22_2: |
| ; X64-NEXT: movl %r8d, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %mask = xor i128 %bit, -1 |
| %ld = load i128, ptr %word |
| %sel = load i32, ptr %p |
| %test = and i128 %ld, %bit |
| %res = and i128 %ld, %mask |
| %cmp = icmp eq i128 %test, 0 |
| store i128 %res, ptr %word |
| %ret = select i1 %cmp, i32 %sel, i32 0 |
| ret i32 %ret |
| } |
| |
| ; Multiple uses of the store chain AND stored value |
| define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind { |
| ; X86-LABEL: chain_reset_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $-2, %edi |
| ; X86-NEXT: roll %cl, %edi |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: andl $28, %ecx |
| ; X86-NEXT: andl %edi, (%esi,%ecx) |
| ; X86-NEXT: movl 8(%esi), %ebx |
| ; X86-NEXT: movl (%esi), %edi |
| ; X86-NEXT: movl 4(%esi), %ecx |
| ; X86-NEXT: movl 12(%esi), %ebp |
| ; X86-NEXT: orl 28(%esi), %ebp |
| ; X86-NEXT: orl 20(%esi), %ecx |
| ; X86-NEXT: orl %ebp, %ecx |
| ; X86-NEXT: orl 24(%esi), %ebx |
| ; X86-NEXT: movl 16(%esi), %ebp |
| ; X86-NEXT: orl %edi, %ebp |
| ; X86-NEXT: orl %ebx, %ebp |
| ; X86-NEXT: movl (%edx), %esi |
| ; X86-NEXT: movl %edi, (%edx) |
| ; X86-NEXT: movl (%eax), %eax |
| ; X86-NEXT: orl %ecx, %ebp |
| ; X86-NEXT: jne .LBB23_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: addl %esi, %eax |
| ; X86-NEXT: .LBB23_2: |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: chain_reset_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: # kill: def $ecx killed $ecx def $rcx |
| ; SSE-NEXT: movl $-2, %eax |
| ; SSE-NEXT: roll %cl, %eax |
| ; SSE-NEXT: shrl $3, %ecx |
| ; SSE-NEXT: andl $28, %ecx |
| ; SSE-NEXT: andl %eax, (%rdi,%rcx) |
| ; SSE-NEXT: movq (%rdi), %rcx |
| ; SSE-NEXT: movq 8(%rdi), %r8 |
| ; SSE-NEXT: orq 24(%rdi), %r8 |
| ; SSE-NEXT: movq 16(%rdi), %rdi |
| ; SSE-NEXT: orq %rcx, %rdi |
| ; SSE-NEXT: movl (%rsi), %eax |
| ; SSE-NEXT: movl %ecx, (%rsi) |
| ; SSE-NEXT: movl (%rdx), %ecx |
| ; SSE-NEXT: addl %ecx, %eax |
| ; SSE-NEXT: orq %r8, %rdi |
| ; SSE-NEXT: cmovnel %ecx, %eax |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: chain_reset_i256: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $ecx killed $ecx def $rcx |
| ; AVX-NEXT: movl $-2, %eax |
| ; AVX-NEXT: roll %cl, %eax |
| ; AVX-NEXT: shrl $3, %ecx |
| ; AVX-NEXT: andl $28, %ecx |
| ; AVX-NEXT: andl %eax, (%rdi,%rcx) |
| ; AVX-NEXT: vmovdqu (%rdi), %ymm0 |
| ; AVX-NEXT: movl (%rdi), %ecx |
| ; AVX-NEXT: movl (%rsi), %eax |
| ; AVX-NEXT: movl %ecx, (%rsi) |
| ; AVX-NEXT: movl (%rdx), %ecx |
| ; AVX-NEXT: addl %ecx, %eax |
| ; AVX-NEXT: vptest %ymm0, %ymm0 |
| ; AVX-NEXT: cmovnel %ecx, %eax |
| ; AVX-NEXT: vzeroupper |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 255 |
| %ofs = zext nneg i32 %rem to i256 |
| %bit = shl nuw i256 1, %ofs |
| %ld0 = load i256, ptr %p0 |
| %msk = xor i256 %bit, -1 |
| %res = and i256 %ld0, %msk |
| store i256 %res, ptr %p0 |
| %cmp = icmp ne i256 %res, 0 |
| %ld1 = load i32, ptr %p1 |
| %trunc = trunc i256 %res to i32 |
| store i32 %trunc, ptr %p1 |
| %ld2 = load i32, ptr %p2 |
| %add = add i32 %ld1, %ld2 |
| %sel = select i1 %cmp, i32 %ld2, i32 %add |
| ret i32 %sel |
| } |
| |
| ; BTC/BT/BTS sequence on same i128 |
| define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind { |
| ; X86-LABEL: sequence_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $144, %esp |
| ; X86-NEXT: movb 20(%ebp), %ch |
| ; X86-NEXT: movb 12(%ebp), %cl |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $12, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 56(%esp,%eax), %edx |
| ; X86-NEXT: movl 60(%esp,%eax), %esi |
| ; X86-NEXT: shldl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax), %edi |
| ; X86-NEXT: movl 52(%esp,%eax), %ebx |
| ; X86-NEXT: shldl %cl, %ebx, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %edi, %ebx |
| ; X86-NEXT: shll %cl, %edi |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movb %ch, %al |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $12, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 84(%esp,%eax), %edx |
| ; X86-NEXT: movl 88(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzbl 20(%ebp), %ecx |
| ; X86-NEXT: shldl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 80(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 92(%esp,%eax), %eax |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %esi, %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: movl %esi, %eax |
| ; X86-NEXT: shll %cl, %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: xorl 8(%eax), %edx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: xorl 12(%eax), %esi |
| ; X86-NEXT: xorl (%eax), %edi |
| ; X86-NEXT: xorl 4(%eax), %ebx |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 16(%ebp), %eax |
| ; X86-NEXT: # kill: def $al killed $al killed $eax |
| ; X86-NEXT: andb $96, %al |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 96(%esp,%eax), %eax |
| ; X86-NEXT: movl 16(%ebp), %ecx |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload |
| ; X86-NEXT: movl 8(%ebp), %ecx |
| ; X86-NEXT: movl %edx, 8(%ecx) |
| ; X86-NEXT: movl %esi, 12(%ecx) |
| ; X86-NEXT: movl %edi, (%ecx) |
| ; X86-NEXT: movl %ebx, 4(%ecx) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: sequence_i128: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: movl $1, %r8d |
| ; SSE-NEXT: xorl %esi, %esi |
| ; SSE-NEXT: shldq %cl, %r8, %rsi |
| ; SSE-NEXT: movl $1, %r9d |
| ; SSE-NEXT: shlq %cl, %r9 |
| ; SSE-NEXT: xorl %r11d, %r11d |
| ; SSE-NEXT: testb $64, %cl |
| ; SSE-NEXT: cmovneq %r9, %rsi |
| ; SSE-NEXT: cmovneq %r11, %r9 |
| ; SSE-NEXT: xorl %r10d, %r10d |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: shldq %cl, %r8, %r10 |
| ; SSE-NEXT: shlq %cl, %r8 |
| ; SSE-NEXT: testb $64, %al |
| ; SSE-NEXT: cmovneq %r8, %r10 |
| ; SSE-NEXT: cmovneq %r11, %r8 |
| ; SSE-NEXT: xorq 8(%rdi), %rsi |
| ; SSE-NEXT: xorq (%rdi), %r9 |
| ; SSE-NEXT: movl %edx, %ecx |
| ; SSE-NEXT: andb $32, %cl |
| ; SSE-NEXT: movq %r9, %rax |
| ; SSE-NEXT: shrdq %cl, %rsi, %rax |
| ; SSE-NEXT: movq %rsi, %r11 |
| ; SSE-NEXT: shrq %cl, %r11 |
| ; SSE-NEXT: testb $64, %dl |
| ; SSE-NEXT: cmoveq %rax, %r11 |
| ; SSE-NEXT: btl %edx, %r11d |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: orq %r10, %rsi |
| ; SSE-NEXT: orq %r8, %r9 |
| ; SSE-NEXT: movq %r9, (%rdi) |
| ; SSE-NEXT: movq %rsi, 8(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: sequence_i128: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: xorl %r9d, %r9d |
| ; AVX2-NEXT: movl $1, %r10d |
| ; AVX2-NEXT: xorl %esi, %esi |
| ; AVX2-NEXT: shldq %cl, %r10, %rsi |
| ; AVX2-NEXT: shlxq %rcx, %r10, %r8 |
| ; AVX2-NEXT: testb $64, %cl |
| ; AVX2-NEXT: cmovneq %r8, %rsi |
| ; AVX2-NEXT: cmovneq %r9, %r8 |
| ; AVX2-NEXT: xorl %r11d, %r11d |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shldq %cl, %r10, %r11 |
| ; AVX2-NEXT: shlxq %rax, %r10, %r10 |
| ; AVX2-NEXT: testb $64, %al |
| ; AVX2-NEXT: cmovneq %r10, %r11 |
| ; AVX2-NEXT: cmovneq %r9, %r10 |
| ; AVX2-NEXT: xorq 8(%rdi), %rsi |
| ; AVX2-NEXT: xorq (%rdi), %r8 |
| ; AVX2-NEXT: movl %edx, %ecx |
| ; AVX2-NEXT: andb $32, %cl |
| ; AVX2-NEXT: movq %r8, %rax |
| ; AVX2-NEXT: shrdq %cl, %rsi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %rsi, %rcx |
| ; AVX2-NEXT: testb $64, %dl |
| ; AVX2-NEXT: cmoveq %rax, %rcx |
| ; AVX2-NEXT: btl %edx, %ecx |
| ; AVX2-NEXT: setae %al |
| ; AVX2-NEXT: orq %r11, %rsi |
| ; AVX2-NEXT: orq %r10, %r8 |
| ; AVX2-NEXT: movq %r8, (%rdi) |
| ; AVX2-NEXT: movq %rsi, 8(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: sequence_i128: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movl %ecx, %eax |
| ; AVX512-NEXT: movl %esi, %ecx |
| ; AVX512-NEXT: movl $1, %r9d |
| ; AVX512-NEXT: xorl %esi, %esi |
| ; AVX512-NEXT: shldq %cl, %r9, %rsi |
| ; AVX512-NEXT: xorl %r10d, %r10d |
| ; AVX512-NEXT: shlxq %rcx, %r9, %r8 |
| ; AVX512-NEXT: testb $64, %cl |
| ; AVX512-NEXT: cmovneq %r8, %rsi |
| ; AVX512-NEXT: cmovneq %r10, %r8 |
| ; AVX512-NEXT: xorl %r11d, %r11d |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shldq %cl, %r9, %r11 |
| ; AVX512-NEXT: shlxq %rax, %r9, %r9 |
| ; AVX512-NEXT: testb $64, %al |
| ; AVX512-NEXT: cmovneq %r9, %r11 |
| ; AVX512-NEXT: cmovneq %r10, %r9 |
| ; AVX512-NEXT: xorq 8(%rdi), %rsi |
| ; AVX512-NEXT: xorq (%rdi), %r8 |
| ; AVX512-NEXT: movl %edx, %ecx |
| ; AVX512-NEXT: andb $32, %cl |
| ; AVX512-NEXT: movq %r8, %rax |
| ; AVX512-NEXT: shrdq %cl, %rsi, %rax |
| ; AVX512-NEXT: shrxq %rcx, %rsi, %rcx |
| ; AVX512-NEXT: testb $64, %dl |
| ; AVX512-NEXT: cmoveq %rax, %rcx |
| ; AVX512-NEXT: btl %edx, %ecx |
| ; AVX512-NEXT: setae %al |
| ; AVX512-NEXT: orq %r11, %rsi |
| ; AVX512-NEXT: orq %r9, %r8 |
| ; AVX512-NEXT: movq %r8, (%rdi) |
| ; AVX512-NEXT: movq %rsi, 8(%rdi) |
| ; AVX512-NEXT: retq |
| %rem0 = and i32 %pos0, 127 |
| %rem1 = and i32 %pos1, 127 |
| %rem2 = and i32 %pos2, 127 |
| %ofs0 = zext nneg i32 %rem0 to i128 |
| %ofs1 = zext nneg i32 %rem1 to i128 |
| %ofs2 = zext nneg i32 %rem2 to i128 |
| %bit0 = shl nuw i128 1, %ofs0 |
| %bit1 = shl nuw i128 1, %ofs1 |
| %bit2 = shl nuw i128 1, %ofs2 |
| %ld = load i128, ptr %word |
| %res0 = xor i128 %ld, %bit0 |
| %test1 = and i128 %res0, %bit1 |
| %cmp1 = icmp eq i128 %test1, 0 |
| %res2 = or i128 %res0, %bit2 |
| store i128 %res2, ptr %word |
| ret i1 %cmp1 |
| } |