| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512 |
| |
| ; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers |
| |
| ; |
| ; i32 bt/btc/btr/bts + init (reference) |
| ; |
| |
| define i1 @test_eq_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_eq_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl (%eax), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_eq_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 31 |
| %bit = shl nuw i32 1, %rem |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %cmp = icmp eq i32 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btcl %eax, %esi |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btcl %esi, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res = xor i32 %ld, %bit |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btrl %eax, %esi |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btrl %esi, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %mask = xor i32 %bit, -1 |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res = and i32 %ld, %mask |
| %cmp = icmp eq i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i32(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: btsl %eax, %esi |
| ; X86-NEXT: btl %eax, %edx |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: movl %esi, (%ecx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: movl %eax, %ecx |
| ; X64-NEXT: btsl %esi, %ecx |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movl %ecx, (%rdi) |
| ; X64-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res = or i32 %ld, %bit |
| %cmp = icmp ne i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: shll %cl, %eax |
| ; X86-NEXT: movl (%edx), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: orl %eax, %edi |
| ; X86-NEXT: btl %ecx, %esi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: movl %edi, (%edx) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i32: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: shll %cl, %edx |
| ; SSE-NEXT: movl (%rdi), %eax |
| ; SSE-NEXT: movl %eax, %esi |
| ; SSE-NEXT: btrl %ecx, %esi |
| ; SSE-NEXT: orl %edx, %esi |
| ; SSE-NEXT: btl %ecx, %eax |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: movl %esi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i32: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: shlxl %esi, %edx, %eax |
| ; AVX-NEXT: movl (%rdi), %ecx |
| ; AVX-NEXT: movl %ecx, %edx |
| ; AVX-NEXT: btrl %esi, %edx |
| ; AVX-NEXT: orl %eax, %edx |
| ; AVX-NEXT: btl %esi, %ecx |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: movl %edx, (%rdi) |
| ; AVX-NEXT: retq |
| %ofs = and i32 %position, 31 |
| %bit = shl nuw i32 1, %ofs |
| %mask = xor i32 %bit, -1 |
| %val0 = zext i1 %value to i32 |
| %val = shl nuw i32 %val0, %ofs |
| %ld = load i32, ptr %word |
| %test = and i32 %ld, %bit |
| %res0 = and i32 %ld, %mask |
| %res = or i32 %res0, %val |
| %cmp = icmp eq i32 %test, 0 |
| store i32 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; |
| ; i64 bt/btc/btr/bts + init |
| ; |
| |
| define i1 @test_ne_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $32, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %cmp = icmp ne i64 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btcq %rsi, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res = xor i64 %ld, %bit |
| %cmp = icmp ne i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btrq %rsi, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %mask = xor i64 %bit, -1 |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res = and i64 %ld, %mask |
| %cmp = icmp eq i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i64(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btsl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: btsq %rsi, %rcx |
| ; X64-NEXT: btq %rsi, %rax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: movq %rcx, (%rdi) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res = or i64 %ld, %bit |
| %cmp = icmp ne i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: andl $32, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%edx,%esi), %edi |
| ; X86-NEXT: btl %ecx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shll %cl, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: movl %ebx, (%edx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: movl %edx, %eax |
| ; SSE-NEXT: shlq %cl, %rax |
| ; SSE-NEXT: movq (%rdi), %rdx |
| ; SSE-NEXT: movq %rdx, %rsi |
| ; SSE-NEXT: btrq %rcx, %rsi |
| ; SSE-NEXT: orq %rax, %rsi |
| ; SSE-NEXT: btq %rcx, %rdx |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i64: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $esi killed $esi def $rsi |
| ; AVX-NEXT: movl %edx, %eax |
| ; AVX-NEXT: shlxq %rsi, %rax, %rax |
| ; AVX-NEXT: movq (%rdi), %rcx |
| ; AVX-NEXT: movq %rcx, %rdx |
| ; AVX-NEXT: btrq %rsi, %rdx |
| ; AVX-NEXT: orq %rax, %rdx |
| ; AVX-NEXT: btq %rsi, %rcx |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: movq %rdx, (%rdi) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 63 |
| %ofs = zext nneg i32 %rem to i64 |
| %bit = shl nuw i64 1, %ofs |
| %mask = xor i64 %bit, -1 |
| %val0 = zext i1 %value to i64 |
| %val = shl nuw i64 %val0, %ofs |
| %ld = load i64, ptr %word |
| %test = and i64 %ld, %bit |
| %res0 = and i64 %ld, %mask |
| %res = or i64 %res0, %val |
| %cmp = icmp eq i64 %test, 0 |
| store i64 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; |
| ; i128 |
| ; |
| |
| define i1 @test_ne_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $96, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: andl $96, %eax |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: movl (%rdi,%rax), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %cmp = icmp ne i128 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btcl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res = xor i128 %ld, %bit |
| %cmp = icmp ne i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: btrl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %mask = xor i128 %bit, -1 |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res = and i128 %ld, %mask |
| %cmp = icmp eq i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btsl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btsl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res = or i128 %ld, %bit |
| %cmp = icmp ne i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%edx,%esi), %edi |
| ; X86-NEXT: btl %ecx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shll %cl, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: movl %ebx, (%edx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i128: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $96, %esi |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: movl (%rdi,%rsi), %r8d |
| ; SSE-NEXT: btl %ecx, %r8d |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: shll %cl, %edx |
| ; SSE-NEXT: btrl %ecx, %r8d |
| ; SSE-NEXT: orl %r8d, %edx |
| ; SSE-NEXT: movl %edx, (%rdi,%rsi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i128: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movl %esi, %ecx |
| ; AVX-NEXT: andl $96, %ecx |
| ; AVX-NEXT: shrl $3, %ecx |
| ; AVX-NEXT: movl (%rdi,%rcx), %r8d |
| ; AVX-NEXT: btl %esi, %r8d |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: btrl %esi, %r8d |
| ; AVX-NEXT: shlxl %esi, %edx, %edx |
| ; AVX-NEXT: orl %r8d, %edx |
| ; AVX-NEXT: movl %edx, (%rdi,%rcx) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %mask = xor i128 %bit, -1 |
| %val0 = zext i1 %value to i128 |
| %val = shl nuw i128 %val0, %ofs |
| %ld = load i128, ptr %word |
| %test = and i128 %ld, %bit |
| %res0 = and i128 %ld, %mask |
| %res = or i128 %res0, %val |
| %cmp = icmp eq i128 %test, 0 |
| store i128 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; i512 |
| |
| define i1 @test_ne_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: andl $60, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: andl $60, %eax |
| ; X64-NEXT: movl (%rdi,%rax), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %cmp = icmp ne i512 %test, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btcl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: complement_ne_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: andl $60, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btcl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res = xor i512 %ld, %bit |
| %cmp = icmp ne i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: reset_eq_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_eq_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: andl $60, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setae %al |
| ; X64-NEXT: btrl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %mask = xor i512 %bit, -1 |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res = and i512 %ld, %mask |
| %cmp = icmp eq i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @set_ne_i512(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: set_ne_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: btsl %edx, %edi |
| ; X86-NEXT: movl %edi, (%ecx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: set_ne_i512: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: andl $60, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %edx |
| ; X64-NEXT: btl %esi, %edx |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: btsl %esi, %edx |
| ; X64-NEXT: movl %edx, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res = or i512 %ld, %bit |
| %cmp = icmp ne i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind { |
| ; X86-LABEL: init_eq_i512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl (%edx,%esi), %edi |
| ; X86-NEXT: btl %ecx, %edi |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: btrl %ecx, %edi |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shll %cl, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: movl %ebx, (%edx,%esi) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: init_eq_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $60, %esi |
| ; SSE-NEXT: movl (%rdi,%rsi), %r8d |
| ; SSE-NEXT: btl %ecx, %r8d |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: shll %cl, %edx |
| ; SSE-NEXT: btrl %ecx, %r8d |
| ; SSE-NEXT: orl %r8d, %edx |
| ; SSE-NEXT: movl %edx, (%rdi,%rsi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: init_eq_i512: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: movl %esi, %ecx |
| ; AVX-NEXT: shrl $3, %ecx |
| ; AVX-NEXT: andl $60, %ecx |
| ; AVX-NEXT: movl (%rdi,%rcx), %r8d |
| ; AVX-NEXT: btl %esi, %r8d |
| ; AVX-NEXT: setae %al |
| ; AVX-NEXT: btrl %esi, %r8d |
| ; AVX-NEXT: shlxl %esi, %edx, %edx |
| ; AVX-NEXT: orl %r8d, %edx |
| ; AVX-NEXT: movl %edx, (%rdi,%rcx) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 511 |
| %ofs = zext nneg i32 %rem to i512 |
| %bit = shl nuw i512 1, %ofs |
| %mask = xor i512 %bit, -1 |
| %val0 = zext i1 %value to i512 |
| %val = shl nuw i512 %val0, %ofs |
| %ld = load i512, ptr %word |
| %test = and i512 %ld, %bit |
| %res0 = and i512 %ld, %mask |
| %res = or i512 %res0, %val |
| %cmp = icmp eq i512 %test, 0 |
| store i512 %res, ptr %word |
| ret i1 %cmp |
| } |
| |
| ; i4096 |
| |
| define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: test_ne_i4096: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $4064, %edx # imm = 0xFE0 |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%eax,%edx), %eax |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setb %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: test_ne_i4096: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: andl $4064, %eax # imm = 0xFE0 |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: movl (%rdi,%rax), %eax |
| ; X64-NEXT: btl %esi, %eax |
| ; X64-NEXT: setb %al |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 4095 |
| %ofs = zext nneg i32 %rem to i4096 |
| %bit = shl nuw i4096 1, %ofs |
| %ld = load i4096, ptr %word |
| %test = and i4096 %ld, %bit |
| %cmp = icmp ne i4096 %test, 0 |
| ret i1 %cmp |
| } |
| |
| ; Special Cases |
| |
| ; Multiple uses of the stored value |
| define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_cmpz_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $1, %edx |
| ; X86-NEXT: shll %cl, %edx |
| ; X86-NEXT: andl $96, %ecx |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: xorl %edx, (%eax,%ecx) |
| ; X86-NEXT: movl (%eax), %ecx |
| ; X86-NEXT: movl 4(%eax), %edx |
| ; X86-NEXT: orl 12(%eax), %edx |
| ; X86-NEXT: orl 8(%eax), %ecx |
| ; X86-NEXT: orl %edx, %ecx |
| ; X86-NEXT: setne %al |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: complement_cmpz_i128: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: movl $1, %eax |
| ; SSE-NEXT: shll %cl, %eax |
| ; SSE-NEXT: andl $96, %ecx |
| ; SSE-NEXT: shrl $3, %ecx |
| ; SSE-NEXT: xorl %eax, (%rdi,%rcx) |
| ; SSE-NEXT: movq (%rdi), %rax |
| ; SSE-NEXT: orq 8(%rdi), %rax |
| ; SSE-NEXT: setne %al |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: complement_cmpz_i128: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $esi killed $esi def $rsi |
| ; AVX-NEXT: movl $1, %eax |
| ; AVX-NEXT: shlxl %esi, %eax, %eax |
| ; AVX-NEXT: andl $96, %esi |
| ; AVX-NEXT: shrl $3, %esi |
| ; AVX-NEXT: xorl %eax, (%rdi,%rsi) |
| ; AVX-NEXT: movq (%rdi), %rax |
| ; AVX-NEXT: orq 8(%rdi), %rax |
| ; AVX-NEXT: setne %al |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ld = load i128, ptr %word |
| %res = xor i128 %ld, %bit |
| store i128 %res, ptr %word |
| %cmp = icmp ne i128 %res, 0 |
| ret i1 %cmp |
| } |
| |
| ; Load hidden behind bitcast |
| define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind { |
| ; X86-LABEL: complement_ne_i128_bitcast: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $80, %esp |
| ; X86-NEXT: movl 12(%ebp), %eax |
| ; X86-NEXT: movzwl (%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzwl 12(%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzwl 14(%eax), %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shll $16, %edi |
| ; X86-NEXT: orl %ecx, %edi |
| ; X86-NEXT: movzwl 2(%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzwl 4(%eax), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzwl 6(%eax), %esi |
| ; X86-NEXT: movzwl 8(%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzwl 10(%eax), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: shll $16, %eax |
| ; X86-NEXT: orl %ecx, %eax |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, %eax |
| ; X86-NEXT: shll $16, %eax |
| ; X86-NEXT: orl %edx, %eax |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shll $16, %eax |
| ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 16(%ebp), %eax |
| ; X86-NEXT: movl %eax, %ebx |
| ; X86-NEXT: andb $96, %bl |
| ; X86-NEXT: shrb $3, %bl |
| ; X86-NEXT: movzbl %bl, %edi |
| ; X86-NEXT: movl 32(%esp,%edi), %edi |
| ; X86-NEXT: btcl %eax, %edi |
| ; X86-NEXT: andl $96, %eax |
| ; X86-NEXT: shrl $3, %eax |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl %edi, (%ecx,%eax) |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movw %dx, 14(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movw %dx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movw %cx, 10(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movw %cx, 8(%eax) |
| ; X86-NEXT: movw %si, 6(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movw %cx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movw %cx, 2(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movw %cx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| ; |
| ; SSE2-LABEL: complement_ne_i128_bitcast: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: # kill: def $esi killed $esi def $rsi |
| ; SSE2-NEXT: movdqa (%rdi), %xmm0 |
| ; SSE2-NEXT: movq 8(%rdi), %rax |
| ; SSE2-NEXT: movq %xmm0, %rdx |
| ; SSE2-NEXT: movl %esi, %ecx |
| ; SSE2-NEXT: andb $32, %cl |
| ; SSE2-NEXT: shrdq %cl, %rax, %rdx |
| ; SSE2-NEXT: shrq %cl, %rax |
| ; SSE2-NEXT: testb $64, %sil |
| ; SSE2-NEXT: cmoveq %rdx, %rax |
| ; SSE2-NEXT: btcl %esi, %eax |
| ; SSE2-NEXT: andl $96, %esi |
| ; SSE2-NEXT: shrl $3, %esi |
| ; SSE2-NEXT: movl %eax, (%rdi,%rsi) |
| ; SSE2-NEXT: retq |
| ; |
| ; SSE4-LABEL: complement_ne_i128_bitcast: |
| ; SSE4: # %bb.0: |
| ; SSE4-NEXT: # kill: def $esi killed $esi def $rsi |
| ; SSE4-NEXT: movdqa (%rdi), %xmm0 |
| ; SSE4-NEXT: pextrq $1, %xmm0, %rax |
| ; SSE4-NEXT: movq %xmm0, %rdx |
| ; SSE4-NEXT: movl %esi, %ecx |
| ; SSE4-NEXT: andb $32, %cl |
| ; SSE4-NEXT: shrdq %cl, %rax, %rdx |
| ; SSE4-NEXT: shrq %cl, %rax |
| ; SSE4-NEXT: testb $64, %sil |
| ; SSE4-NEXT: cmoveq %rdx, %rax |
| ; SSE4-NEXT: btcl %esi, %eax |
| ; SSE4-NEXT: andl $96, %esi |
| ; SSE4-NEXT: shrl $3, %esi |
| ; SSE4-NEXT: movl %eax, (%rdi,%rsi) |
| ; SSE4-NEXT: retq |
| ; |
| ; AVX-LABEL: complement_ne_i128_bitcast: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $esi killed $esi def $rsi |
| ; AVX-NEXT: vmovdqa (%rdi), %xmm0 |
| ; AVX-NEXT: vpextrq $1, %xmm0, %rax |
| ; AVX-NEXT: vmovq %xmm0, %rdx |
| ; AVX-NEXT: movl %esi, %ecx |
| ; AVX-NEXT: andb $32, %cl |
| ; AVX-NEXT: shrdq %cl, %rax, %rdx |
| ; AVX-NEXT: shrxq %rcx, %rax, %rax |
| ; AVX-NEXT: testb $64, %sil |
| ; AVX-NEXT: cmoveq %rdx, %rax |
| ; AVX-NEXT: btcl %esi, %eax |
| ; AVX-NEXT: andl $96, %esi |
| ; AVX-NEXT: shrl $3, %esi |
| ; AVX-NEXT: movl %eax, (%rdi,%rsi) |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %ldv = load <8 x i16>, ptr %word |
| %ld = bitcast <8 x i16> %ldv to i128 |
| %test = and i128 %ld, %bit |
| %res = xor i128 %ld, %bit |
| store i128 %res, ptr %word |
| ret <8 x i16> %ldv |
| } |
| |
| ; Multiple loads in store chain |
| define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { |
| ; X86-LABEL: reset_multiload_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl (%eax), %eax |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: andl $96, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: movl (%ecx,%esi), %edi |
| ; X86-NEXT: movl %edi, %ebx |
| ; X86-NEXT: btrl %edx, %ebx |
| ; X86-NEXT: btl %edx, %edi |
| ; X86-NEXT: movl %ebx, (%ecx,%esi) |
| ; X86-NEXT: jae .LBB23_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: xorl %eax, %eax |
| ; X86-NEXT: .LBB23_2: |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: reset_multiload_i128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %ecx |
| ; X64-NEXT: andl $96, %ecx |
| ; X64-NEXT: shrl $3, %ecx |
| ; X64-NEXT: movl (%rdi,%rcx), %r9d |
| ; X64-NEXT: movl %r9d, %r8d |
| ; X64-NEXT: btrl %esi, %r8d |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: btl %esi, %r9d |
| ; X64-NEXT: jb .LBB23_2 |
| ; X64-NEXT: # %bb.1: |
| ; X64-NEXT: movl (%rdx), %eax |
| ; X64-NEXT: .LBB23_2: |
| ; X64-NEXT: movl %r8d, (%rdi,%rcx) |
| ; X64-NEXT: retq |
| %rem = and i32 %position, 127 |
| %ofs = zext nneg i32 %rem to i128 |
| %bit = shl nuw i128 1, %ofs |
| %mask = xor i128 %bit, -1 |
| %ld = load i128, ptr %word |
| %sel = load i32, ptr %p |
| %test = and i128 %ld, %bit |
| %res = and i128 %ld, %mask |
| %cmp = icmp eq i128 %test, 0 |
| store i128 %res, ptr %word |
| %ret = select i1 %cmp, i32 %sel, i32 0 |
| ret i32 %ret |
| } |
| |
| ; Multiple uses of the store chain AND stored value |
| define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind { |
| ; X86-LABEL: chain_reset_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $-2, %edi |
| ; X86-NEXT: roll %cl, %edi |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: andl $28, %ecx |
| ; X86-NEXT: andl %edi, (%esi,%ecx) |
| ; X86-NEXT: movl 8(%esi), %ebx |
| ; X86-NEXT: movl (%esi), %edi |
| ; X86-NEXT: movl 4(%esi), %ecx |
| ; X86-NEXT: movl 12(%esi), %ebp |
| ; X86-NEXT: orl 28(%esi), %ebp |
| ; X86-NEXT: orl 20(%esi), %ecx |
| ; X86-NEXT: orl %ebp, %ecx |
| ; X86-NEXT: orl 24(%esi), %ebx |
| ; X86-NEXT: movl 16(%esi), %ebp |
| ; X86-NEXT: orl %edi, %ebp |
| ; X86-NEXT: orl %ebx, %ebp |
| ; X86-NEXT: movl (%edx), %esi |
| ; X86-NEXT: movl %edi, (%edx) |
| ; X86-NEXT: movl (%eax), %eax |
| ; X86-NEXT: orl %ecx, %ebp |
| ; X86-NEXT: jne .LBB24_2 |
| ; X86-NEXT: # %bb.1: |
| ; X86-NEXT: addl %esi, %eax |
| ; X86-NEXT: .LBB24_2: |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: chain_reset_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: # kill: def $ecx killed $ecx def $rcx |
| ; SSE-NEXT: movl $-2, %eax |
| ; SSE-NEXT: roll %cl, %eax |
| ; SSE-NEXT: shrl $3, %ecx |
| ; SSE-NEXT: andl $28, %ecx |
| ; SSE-NEXT: andl %eax, (%rdi,%rcx) |
| ; SSE-NEXT: movq (%rdi), %rcx |
| ; SSE-NEXT: movq 8(%rdi), %r8 |
| ; SSE-NEXT: orq 24(%rdi), %r8 |
| ; SSE-NEXT: movq 16(%rdi), %rdi |
| ; SSE-NEXT: orq %rcx, %rdi |
| ; SSE-NEXT: movl (%rsi), %eax |
| ; SSE-NEXT: movl %ecx, (%rsi) |
| ; SSE-NEXT: movl (%rdx), %ecx |
| ; SSE-NEXT: addl %ecx, %eax |
| ; SSE-NEXT: orq %r8, %rdi |
| ; SSE-NEXT: cmovnel %ecx, %eax |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: chain_reset_i256: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: # kill: def $ecx killed $ecx def $rcx |
| ; AVX-NEXT: movl $-2, %eax |
| ; AVX-NEXT: roll %cl, %eax |
| ; AVX-NEXT: shrl $3, %ecx |
| ; AVX-NEXT: andl $28, %ecx |
| ; AVX-NEXT: andl %eax, (%rdi,%rcx) |
| ; AVX-NEXT: vmovdqu (%rdi), %ymm0 |
| ; AVX-NEXT: movl (%rdi), %ecx |
| ; AVX-NEXT: movl (%rsi), %eax |
| ; AVX-NEXT: movl %ecx, (%rsi) |
| ; AVX-NEXT: movl (%rdx), %ecx |
| ; AVX-NEXT: addl %ecx, %eax |
| ; AVX-NEXT: vptest %ymm0, %ymm0 |
| ; AVX-NEXT: cmovnel %ecx, %eax |
| ; AVX-NEXT: vzeroupper |
| ; AVX-NEXT: retq |
| %rem = and i32 %position, 255 |
| %ofs = zext nneg i32 %rem to i256 |
| %bit = shl nuw i256 1, %ofs |
| %ld0 = load i256, ptr %p0 |
| %msk = xor i256 %bit, -1 |
| %res = and i256 %ld0, %msk |
| store i256 %res, ptr %p0 |
| %cmp = icmp ne i256 %res, 0 |
| %ld1 = load i32, ptr %p1 |
| %trunc = trunc i256 %res to i32 |
| store i32 %trunc, ptr %p1 |
| %ld2 = load i32, ptr %p2 |
| %add = add i32 %ld1, %ld2 |
| %sel = select i1 %cmp, i32 %ld2, i32 %add |
| ret i32 %sel |
| } |
| |
| ; BTC/BT/BTS sequence on same i128 |
| define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind { |
| ; X86-LABEL: sequence_i128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $144, %esp |
| ; X86-NEXT: movb 20(%ebp), %ch |
| ; X86-NEXT: movb 12(%ebp), %cl |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $12, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 56(%esp,%eax), %edx |
| ; X86-NEXT: movl 60(%esp,%eax), %esi |
| ; X86-NEXT: shldl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax), %edi |
| ; X86-NEXT: movl 52(%esp,%eax), %ebx |
| ; X86-NEXT: shldl %cl, %ebx, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %edi, %ebx |
| ; X86-NEXT: shll %cl, %edi |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movb %ch, %al |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $12, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 84(%esp,%eax), %edx |
| ; X86-NEXT: movl 88(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movzbl 20(%ebp), %ecx |
| ; X86-NEXT: shldl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 80(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 92(%esp,%eax), %eax |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %esi, %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: movl %esi, %eax |
| ; X86-NEXT: shll %cl, %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: xorl 8(%eax), %edx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: xorl 12(%eax), %esi |
| ; X86-NEXT: xorl (%eax), %edi |
| ; X86-NEXT: xorl 4(%eax), %ebx |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 16(%ebp), %eax |
| ; X86-NEXT: # kill: def $al killed $al killed $eax |
| ; X86-NEXT: andb $96, %al |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 96(%esp,%eax), %eax |
| ; X86-NEXT: movl 16(%ebp), %ecx |
| ; X86-NEXT: btl %ecx, %eax |
| ; X86-NEXT: setae %al |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload |
| ; X86-NEXT: movl 8(%ebp), %ecx |
| ; X86-NEXT: movl %edx, 8(%ecx) |
| ; X86-NEXT: movl %esi, 12(%ecx) |
| ; X86-NEXT: movl %edi, (%ecx) |
| ; X86-NEXT: movl %ebx, 4(%ecx) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: sequence_i128: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: movl $1, %r8d |
| ; SSE-NEXT: xorl %esi, %esi |
| ; SSE-NEXT: shldq %cl, %r8, %rsi |
| ; SSE-NEXT: movl $1, %r9d |
| ; SSE-NEXT: shlq %cl, %r9 |
| ; SSE-NEXT: xorl %r11d, %r11d |
| ; SSE-NEXT: testb $64, %cl |
| ; SSE-NEXT: cmovneq %r9, %rsi |
| ; SSE-NEXT: cmovneq %r11, %r9 |
| ; SSE-NEXT: xorl %r10d, %r10d |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: shldq %cl, %r8, %r10 |
| ; SSE-NEXT: shlq %cl, %r8 |
| ; SSE-NEXT: testb $64, %al |
| ; SSE-NEXT: cmovneq %r8, %r10 |
| ; SSE-NEXT: cmovneq %r11, %r8 |
| ; SSE-NEXT: xorq 8(%rdi), %rsi |
| ; SSE-NEXT: xorq (%rdi), %r9 |
| ; SSE-NEXT: movl %edx, %ecx |
| ; SSE-NEXT: andb $32, %cl |
| ; SSE-NEXT: movq %r9, %rax |
| ; SSE-NEXT: shrdq %cl, %rsi, %rax |
| ; SSE-NEXT: movq %rsi, %r11 |
| ; SSE-NEXT: shrq %cl, %r11 |
| ; SSE-NEXT: testb $64, %dl |
| ; SSE-NEXT: cmoveq %rax, %r11 |
| ; SSE-NEXT: btl %edx, %r11d |
| ; SSE-NEXT: setae %al |
| ; SSE-NEXT: orq %r10, %rsi |
| ; SSE-NEXT: orq %r8, %r9 |
| ; SSE-NEXT: movq %r9, (%rdi) |
| ; SSE-NEXT: movq %rsi, 8(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: sequence_i128: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: xorl %r9d, %r9d |
| ; AVX2-NEXT: movl $1, %r10d |
| ; AVX2-NEXT: xorl %esi, %esi |
| ; AVX2-NEXT: shldq %cl, %r10, %rsi |
| ; AVX2-NEXT: shlxq %rcx, %r10, %r8 |
| ; AVX2-NEXT: testb $64, %cl |
| ; AVX2-NEXT: cmovneq %r8, %rsi |
| ; AVX2-NEXT: cmovneq %r9, %r8 |
| ; AVX2-NEXT: xorl %r11d, %r11d |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shldq %cl, %r10, %r11 |
| ; AVX2-NEXT: shlxq %rax, %r10, %r10 |
| ; AVX2-NEXT: testb $64, %al |
| ; AVX2-NEXT: cmovneq %r10, %r11 |
| ; AVX2-NEXT: cmovneq %r9, %r10 |
| ; AVX2-NEXT: xorq 8(%rdi), %rsi |
| ; AVX2-NEXT: xorq (%rdi), %r8 |
| ; AVX2-NEXT: movl %edx, %ecx |
| ; AVX2-NEXT: andb $32, %cl |
| ; AVX2-NEXT: movq %r8, %rax |
| ; AVX2-NEXT: shrdq %cl, %rsi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %rsi, %rcx |
| ; AVX2-NEXT: testb $64, %dl |
| ; AVX2-NEXT: cmoveq %rax, %rcx |
| ; AVX2-NEXT: btl %edx, %ecx |
| ; AVX2-NEXT: setae %al |
| ; AVX2-NEXT: orq %r11, %rsi |
| ; AVX2-NEXT: orq %r10, %r8 |
| ; AVX2-NEXT: movq %r8, (%rdi) |
| ; AVX2-NEXT: movq %rsi, 8(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: sequence_i128: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movl %ecx, %eax |
| ; AVX512-NEXT: movl %esi, %ecx |
| ; AVX512-NEXT: movl $1, %r9d |
| ; AVX512-NEXT: xorl %esi, %esi |
| ; AVX512-NEXT: shldq %cl, %r9, %rsi |
| ; AVX512-NEXT: xorl %r10d, %r10d |
| ; AVX512-NEXT: shlxq %rcx, %r9, %r8 |
| ; AVX512-NEXT: testb $64, %cl |
| ; AVX512-NEXT: cmovneq %r8, %rsi |
| ; AVX512-NEXT: cmovneq %r10, %r8 |
| ; AVX512-NEXT: xorl %r11d, %r11d |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shldq %cl, %r9, %r11 |
| ; AVX512-NEXT: shlxq %rax, %r9, %r9 |
| ; AVX512-NEXT: testb $64, %al |
| ; AVX512-NEXT: cmovneq %r9, %r11 |
| ; AVX512-NEXT: cmovneq %r10, %r9 |
| ; AVX512-NEXT: xorq 8(%rdi), %rsi |
| ; AVX512-NEXT: xorq (%rdi), %r8 |
| ; AVX512-NEXT: movl %edx, %ecx |
| ; AVX512-NEXT: andb $32, %cl |
| ; AVX512-NEXT: movq %r8, %rax |
| ; AVX512-NEXT: shrdq %cl, %rsi, %rax |
| ; AVX512-NEXT: shrxq %rcx, %rsi, %rcx |
| ; AVX512-NEXT: testb $64, %dl |
| ; AVX512-NEXT: cmoveq %rax, %rcx |
| ; AVX512-NEXT: btl %edx, %ecx |
| ; AVX512-NEXT: setae %al |
| ; AVX512-NEXT: orq %r11, %rsi |
| ; AVX512-NEXT: orq %r9, %r8 |
| ; AVX512-NEXT: movq %r8, (%rdi) |
| ; AVX512-NEXT: movq %rsi, 8(%rdi) |
| ; AVX512-NEXT: retq |
| %rem0 = and i32 %pos0, 127 |
| %rem1 = and i32 %pos1, 127 |
| %rem2 = and i32 %pos2, 127 |
| %ofs0 = zext nneg i32 %rem0 to i128 |
| %ofs1 = zext nneg i32 %rem1 to i128 |
| %ofs2 = zext nneg i32 %rem2 to i128 |
| %bit0 = shl nuw i128 1, %ofs0 |
| %bit1 = shl nuw i128 1, %ofs1 |
| %bit2 = shl nuw i128 1, %ofs2 |
| %ld = load i128, ptr %word |
| %res0 = xor i128 %ld, %bit0 |
| %test1 = and i128 %res0, %bit1 |
| %cmp1 = icmp eq i128 %test1, 0 |
| %res2 = or i128 %res0, %bit2 |
| store i128 %res2, ptr %word |
| ret i1 %cmp1 |
| } |
| |
| define i32 @blsr_u512(ptr %word) nounwind { |
| ; X86-LABEL: blsr_u512: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $240, %esp |
| ; X86-NEXT: movl 8(%ebp), %ebx |
| ; X86-NEXT: movl 12(%ebx), %esi |
| ; X86-NEXT: movl 28(%ebx), %eax |
| ; X86-NEXT: movl 60(%ebx), %ecx |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %ecx, %eax |
| ; X86-NEXT: movl 44(%ebx), %edx |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %esi, %ecx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %edx, %ecx |
| ; X86-NEXT: orl %eax, %ecx |
| ; X86-NEXT: movl 20(%ebx), %edx |
| ; X86-NEXT: movl 52(%ebx), %eax |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %eax, %edx |
| ; X86-NEXT: movl 4(%ebx), %edi |
| ; X86-NEXT: movl 36(%ebx), %esi |
| ; X86-NEXT: movl %edi, %eax |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %esi, %eax |
| ; X86-NEXT: orl %edx, %eax |
| ; X86-NEXT: orl %ecx, %eax |
| ; X86-NEXT: movl 24(%ebx), %edx |
| ; X86-NEXT: movl 56(%ebx), %ecx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %ecx, %edx |
| ; X86-NEXT: movl 8(%ebx), %ecx |
| ; X86-NEXT: movl 40(%ebx), %esi |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %esi, %ecx |
| ; X86-NEXT: orl %edx, %ecx |
| ; X86-NEXT: movl 16(%ebx), %edx |
| ; X86-NEXT: movl 48(%ebx), %esi |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %esi, %edx |
| ; X86-NEXT: movl (%ebx), %esi |
| ; X86-NEXT: movl 32(%ebx), %ebx |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: orl %ebx, %esi |
| ; X86-NEXT: orl %edx, %esi |
| ; X86-NEXT: orl %ecx, %esi |
| ; X86-NEXT: orl %eax, %esi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: je .LBB26_1 |
| ; X86-NEXT: # %bb.2: # %cond.false |
| ; X86-NEXT: testl %ebx, %ebx |
| ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: jne .LBB26_3 |
| ; X86-NEXT: # %bb.4: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %eax |
| ; X86-NEXT: jmp .LBB26_5 |
| ; X86-NEXT: .LBB26_1: |
| ; X86-NEXT: movl $512, %ecx # imm = 0x200 |
| ; X86-NEXT: jmp .LBB26_41 |
| ; X86-NEXT: .LBB26_3: |
| ; X86-NEXT: rep bsfl %ebx, %eax |
| ; X86-NEXT: .LBB26_5: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: testl %ecx, %ecx |
| ; X86-NEXT: jne .LBB26_6 |
| ; X86-NEXT: # %bb.7: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %ecx |
| ; X86-NEXT: jmp .LBB26_8 |
| ; X86-NEXT: .LBB26_6: |
| ; X86-NEXT: rep bsfl %ecx, %ecx |
| ; X86-NEXT: .LBB26_8: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: jne .LBB26_10 |
| ; X86-NEXT: # %bb.9: # %cond.false |
| ; X86-NEXT: addl $64, %ecx |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: .LBB26_10: # %cond.false |
| ; X86-NEXT: testl %esi, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: jne .LBB26_11 |
| ; X86-NEXT: # %bb.12: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %ecx |
| ; X86-NEXT: testl %edx, %edx |
| ; X86-NEXT: je .LBB26_15 |
| ; X86-NEXT: .LBB26_14: |
| ; X86-NEXT: rep bsfl %edx, %edx |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: je .LBB26_17 |
| ; X86-NEXT: jmp .LBB26_18 |
| ; X86-NEXT: .LBB26_11: |
| ; X86-NEXT: rep bsfl %esi, %ecx |
| ; X86-NEXT: testl %edx, %edx |
| ; X86-NEXT: jne .LBB26_14 |
| ; X86-NEXT: .LBB26_15: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %edx |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: jne .LBB26_18 |
| ; X86-NEXT: .LBB26_17: # %cond.false |
| ; X86-NEXT: addl $64, %edx |
| ; X86-NEXT: movl %edx, %ecx |
| ; X86-NEXT: .LBB26_18: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: orl %edx, %esi |
| ; X86-NEXT: jne .LBB26_20 |
| ; X86-NEXT: # %bb.19: # %cond.false |
| ; X86-NEXT: subl $-128, %ecx |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: .LBB26_20: # %cond.false |
| ; X86-NEXT: addl $256, %eax # imm = 0x100 |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: testl %edx, %edx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: jne .LBB26_21 |
| ; X86-NEXT: # %bb.22: # %cond.false |
| ; X86-NEXT: rep bsfl %edi, %ebx |
| ; X86-NEXT: addl $32, %ebx |
| ; X86-NEXT: jmp .LBB26_23 |
| ; X86-NEXT: .LBB26_21: |
| ; X86-NEXT: rep bsfl %edx, %ebx |
| ; X86-NEXT: .LBB26_23: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: testl %ecx, %ecx |
| ; X86-NEXT: jne .LBB26_24 |
| ; X86-NEXT: # %bb.25: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %ecx |
| ; X86-NEXT: orl %edi, %edx |
| ; X86-NEXT: je .LBB26_27 |
| ; X86-NEXT: jmp .LBB26_28 |
| ; X86-NEXT: .LBB26_24: |
| ; X86-NEXT: rep bsfl %ecx, %ecx |
| ; X86-NEXT: orl %edi, %edx |
| ; X86-NEXT: jne .LBB26_28 |
| ; X86-NEXT: .LBB26_27: # %cond.false |
| ; X86-NEXT: addl $64, %ecx |
| ; X86-NEXT: movl %ecx, %ebx |
| ; X86-NEXT: .LBB26_28: # %cond.false |
| ; X86-NEXT: testl %esi, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: jne .LBB26_29 |
| ; X86-NEXT: # %bb.30: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %ecx |
| ; X86-NEXT: testl %edx, %edx |
| ; X86-NEXT: je .LBB26_33 |
| ; X86-NEXT: .LBB26_32: |
| ; X86-NEXT: rep bsfl %edx, %edx |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: je .LBB26_35 |
| ; X86-NEXT: jmp .LBB26_36 |
| ; X86-NEXT: .LBB26_29: |
| ; X86-NEXT: rep bsfl %esi, %ecx |
| ; X86-NEXT: testl %edx, %edx |
| ; X86-NEXT: jne .LBB26_32 |
| ; X86-NEXT: .LBB26_33: # %cond.false |
| ; X86-NEXT: rep bsfl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: addl $32, %edx |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: jne .LBB26_36 |
| ; X86-NEXT: .LBB26_35: # %cond.false |
| ; X86-NEXT: addl $64, %edx |
| ; X86-NEXT: movl %edx, %ecx |
| ; X86-NEXT: .LBB26_36: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: movl %edi, %esi |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: orl %edx, %esi |
| ; X86-NEXT: jne .LBB26_38 |
| ; X86-NEXT: # %bb.37: # %cond.false |
| ; X86-NEXT: subl $-128, %ecx |
| ; X86-NEXT: movl %ecx, %ebx |
| ; X86-NEXT: .LBB26_38: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: orl %ecx, %edx |
| ; X86-NEXT: movl %edi, %ecx |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: orl %ecx, %esi |
| ; X86-NEXT: orl %edx, %esi |
| ; X86-NEXT: movl %ebx, %ecx |
| ; X86-NEXT: jne .LBB26_40 |
| ; X86-NEXT: # %bb.39: # %cond.false |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: .LBB26_40: # %cond.false |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload |
| ; X86-NEXT: .LBB26_41: # %cond.end |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %ecx, %esi |
| ; X86-NEXT: shrl $3, %esi |
| ; X86-NEXT: andl $60, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: subl %esi, %edx |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: andl $31, %ecx |
| ; X86-NEXT: movl 56(%edx), %edi |
| ; X86-NEXT: movl 60(%edx), %esi |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%edx), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: notl %edi |
| ; X86-NEXT: andl %eax, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 40(%edx), %eax |
| ; X86-NEXT: movl 44(%edx), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %eax, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 36(%edx), %esi |
| ; X86-NEXT: shldl %cl, %esi, %eax |
| ; X86-NEXT: notl %eax |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 32(%edx), %eax |
| ; X86-NEXT: shldl %cl, %eax, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 28(%edx), %esi |
| ; X86-NEXT: shldl %cl, %esi, %eax |
| ; X86-NEXT: notl %eax |
| ; X86-NEXT: andl %ebx, %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 24(%edx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shldl %cl, %eax, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%edx), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%edx), %eax |
| ; X86-NEXT: movl %edx, %ebx |
| ; X86-NEXT: movl %eax, %edx |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: notl %edx |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 12(%ebx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shldl %cl, %eax, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 16(%ebx), %eax |
| ; X86-NEXT: movl %eax, %esi |
| ; X86-NEXT: shldl %cl, %edx, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 20(%ebx), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: shldl %cl, %eax, %esi |
| ; X86-NEXT: notl %esi |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edx, %eax |
| ; X86-NEXT: notl %eax |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: negl %eax |
| ; X86-NEXT: movl 208(%esp,%eax), %edx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edx, %eax |
| ; X86-NEXT: notl %eax |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %eax, %edx |
| ; X86-NEXT: notl %edx |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload |
| ; X86-NEXT: movl (%ebx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shll %cl, %eax |
| ; X86-NEXT: notl %eax |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edi, %ebx |
| ; X86-NEXT: notl %ebx |
| ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload |
| ; X86-NEXT: movl %ebx, %edi |
| ; X86-NEXT: movl 8(%ebp), %ecx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload |
| ; X86-NEXT: movl %ebx, 24(%ecx) |
| ; X86-NEXT: movl %esi, 20(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: movl %esi, 16(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: movl %esi, 12(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: movl %esi, 8(%ecx) |
| ; X86-NEXT: movl %edi, 4(%ecx) |
| ; X86-NEXT: movl %eax, (%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 28(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 32(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 36(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 40(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 44(%ecx) |
| ; X86-NEXT: movl %edx, 48(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 52(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 56(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 60(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; SSE-LABEL: blsr_u512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r15 |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: movq 48(%rdi), %r11 |
| ; SSE-NEXT: movq 40(%rdi), %r9 |
| ; SSE-NEXT: movq 24(%rdi), %r8 |
| ; SSE-NEXT: movq 16(%rdi), %rdx |
| ; SSE-NEXT: movq (%rdi), %rcx |
| ; SSE-NEXT: movq 8(%rdi), %rsi |
| ; SSE-NEXT: rep bsfq %rcx, %rax |
| ; SSE-NEXT: rep bsfq %rsi, %rbx |
| ; SSE-NEXT: addq $64, %rbx |
| ; SSE-NEXT: testq %rcx, %rcx |
| ; SSE-NEXT: cmovneq %rax, %rbx |
| ; SSE-NEXT: rep bsfq %rdx, %rax |
| ; SSE-NEXT: rep bsfq %r8, %r10 |
| ; SSE-NEXT: addq $64, %r10 |
| ; SSE-NEXT: testq %rdx, %rdx |
| ; SSE-NEXT: cmovneq %rax, %r10 |
| ; SSE-NEXT: movq 32(%rdi), %r14 |
| ; SSE-NEXT: subq $-128, %r10 |
| ; SSE-NEXT: movq %rcx, %rax |
| ; SSE-NEXT: orq %rsi, %rax |
| ; SSE-NEXT: cmovneq %rbx, %r10 |
| ; SSE-NEXT: rep bsfq %r14, %rax |
| ; SSE-NEXT: rep bsfq %r9, %rbx |
| ; SSE-NEXT: addq $64, %rbx |
| ; SSE-NEXT: testq %r14, %r14 |
| ; SSE-NEXT: cmovneq %rax, %rbx |
| ; SSE-NEXT: rep bsfq %r11, %r15 |
| ; SSE-NEXT: movl $64, %eax |
| ; SSE-NEXT: rep bsfq 56(%rdi), %rax |
| ; SSE-NEXT: addq $64, %rax |
| ; SSE-NEXT: testq %r11, %r11 |
| ; SSE-NEXT: cmovneq %r15, %rax |
| ; SSE-NEXT: subq $-128, %rax |
| ; SSE-NEXT: orq %r9, %r14 |
| ; SSE-NEXT: cmovneq %rbx, %rax |
| ; SSE-NEXT: addq $256, %rax # imm = 0x100 |
| ; SSE-NEXT: orq %r8, %rsi |
| ; SSE-NEXT: orq %rdx, %rcx |
| ; SSE-NEXT: orq %rsi, %rcx |
| ; SSE-NEXT: cmovneq %r10, %rax |
| ; SSE-NEXT: movl $-2, %edx |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: roll %cl, %edx |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: shrl $3, %ecx |
| ; SSE-NEXT: andl $60, %ecx |
| ; SSE-NEXT: andl %edx, (%rdi,%rcx) |
| ; SSE-NEXT: # kill: def $eax killed $eax killed $rax |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: popq %r15 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: blsr_u512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r15 |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: movq 40(%rdi), %r9 |
| ; AVX2-NEXT: movq 32(%rdi), %r10 |
| ; AVX2-NEXT: movq 24(%rdi), %r8 |
| ; AVX2-NEXT: movq 16(%rdi), %rdx |
| ; AVX2-NEXT: movq (%rdi), %rcx |
| ; AVX2-NEXT: movq 8(%rdi), %rsi |
| ; AVX2-NEXT: tzcntq %rcx, %rax |
| ; AVX2-NEXT: xorl %ebx, %ebx |
| ; AVX2-NEXT: tzcntq %rsi, %rbx |
| ; AVX2-NEXT: addq $64, %rbx |
| ; AVX2-NEXT: testq %rcx, %rcx |
| ; AVX2-NEXT: cmovneq %rax, %rbx |
| ; AVX2-NEXT: xorl %eax, %eax |
| ; AVX2-NEXT: tzcntq %rdx, %rax |
| ; AVX2-NEXT: tzcntq %r8, %r11 |
| ; AVX2-NEXT: addq $64, %r11 |
| ; AVX2-NEXT: testq %rdx, %rdx |
| ; AVX2-NEXT: cmovneq %rax, %r11 |
| ; AVX2-NEXT: subq $-128, %r11 |
| ; AVX2-NEXT: movq %rcx, %rax |
| ; AVX2-NEXT: orq %rsi, %rax |
| ; AVX2-NEXT: cmovneq %rbx, %r11 |
| ; AVX2-NEXT: xorl %eax, %eax |
| ; AVX2-NEXT: tzcntq %r10, %rax |
| ; AVX2-NEXT: xorl %ebx, %ebx |
| ; AVX2-NEXT: tzcntq %r9, %rbx |
| ; AVX2-NEXT: addq $64, %rbx |
| ; AVX2-NEXT: testq %r10, %r10 |
| ; AVX2-NEXT: cmovneq %rax, %rbx |
| ; AVX2-NEXT: movq 48(%rdi), %r14 |
| ; AVX2-NEXT: xorl %r15d, %r15d |
| ; AVX2-NEXT: tzcntq %r14, %r15 |
| ; AVX2-NEXT: xorl %eax, %eax |
| ; AVX2-NEXT: tzcntq 56(%rdi), %rax |
| ; AVX2-NEXT: addq $64, %rax |
| ; AVX2-NEXT: testq %r14, %r14 |
| ; AVX2-NEXT: cmovneq %r15, %rax |
| ; AVX2-NEXT: subq $-128, %rax |
| ; AVX2-NEXT: orq %r9, %r10 |
| ; AVX2-NEXT: cmovneq %rbx, %rax |
| ; AVX2-NEXT: addq $256, %rax # imm = 0x100 |
| ; AVX2-NEXT: orq %r8, %rsi |
| ; AVX2-NEXT: orq %rdx, %rcx |
| ; AVX2-NEXT: orq %rsi, %rcx |
| ; AVX2-NEXT: cmovneq %r11, %rax |
| ; AVX2-NEXT: movl $-2, %edx |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: roll %cl, %edx |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: shrl $3, %ecx |
| ; AVX2-NEXT: andl $60, %ecx |
| ; AVX2-NEXT: andl %edx, (%rdi,%rcx) |
| ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: popq %r15 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: blsr_u512: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 |
| ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm1 |
| ; AVX512-NEXT: vpandnq %zmm1, %zmm0, %zmm1 |
| ; AVX512-NEXT: vplzcntq %zmm1, %zmm1 |
| ; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [64,128,192,256,320,384,448,512] |
| ; AVX512-NEXT: vpsubq %zmm1, %zmm2, %zmm1 |
| ; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1 |
| ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [512,512,512,512,512,512,512,512] |
| ; AVX512-NEXT: vpcompressq %zmm1, %zmm0 {%k1} |
| ; AVX512-NEXT: vmovq %xmm0, %rax |
| ; AVX512-NEXT: movl $-2, %edx |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: roll %cl, %edx |
| ; AVX512-NEXT: movl %eax, %ecx |
| ; AVX512-NEXT: shrl $3, %ecx |
| ; AVX512-NEXT: andl $60, %ecx |
| ; AVX512-NEXT: andl %edx, (%rdi,%rcx) |
| ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| %ld = load i512, ptr %word |
| %tz = tail call range(i512 0, 513) i512 @llvm.cttz.i512(i512 %ld, i1 false) |
| %tz.cast = trunc nuw nsw i512 %tz to i32 |
| %tz.mask = and i512 %tz, 511 |
| %mask = shl nuw i512 1, %tz.mask |
| %mask.not = xor i512 %mask, -1 |
| %blsr = and i512 %ld, %mask.not |
| store i512 %blsr, ptr %word |
| ret i32 %tz.cast |
| } |