| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64 |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64 |
| |
| ; Tests showing for the analysis of non-constant shift amounts to improve load address math |
| |
| ; Alignment of shift amounts should allow sub-integer loads. |
| |
| define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub64_16: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: andl $48, %ecx |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: movzwl (%eax,%ecx), %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub64_16: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: andl $48, %esi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: movzwl (%rdi,%rsi), %eax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 63 |
| %idx_align = and i32 %idx_bounds, -16 |
| %sh = zext nneg i32 %idx_align to i64 |
| %ld = load i64, ptr %word, align 8 |
| %sub = lshr i64 %ld, %sh |
| %res = trunc i64 %sub to i16 |
| ret i16 %res |
| } |
| |
| define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub128_16: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: andl $112, %ecx |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: movzwl (%eax,%ecx), %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub128_16: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: andl $112, %esi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: movzwl (%rdi,%rsi), %eax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 127 |
| %idx_align = and i32 %idx_bounds, -16 |
| %sh = zext nneg i32 %idx_align to i128 |
| %ld = load i128, ptr %word, align 8 |
| %sub = lshr i128 %ld, %sh |
| %res = trunc i128 %sub to i16 |
| ret i16 %res |
| } |
| |
| define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub128_32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: andl $96, %ecx |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: movl (%eax,%ecx), %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub128_32: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: andl $96, %esi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: movl (%rdi,%rsi), %eax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 127 |
| %idx_align = and i32 %idx_bounds, -32 |
| %sh = zext nneg i32 %idx_align to i128 |
| %ld = load i128, ptr %word, align 8 |
| %sub = lshr i128 %ld, %sh |
| %res = trunc i128 %sub to i32 |
| ret i32 %res |
| } |
| |
| define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub128_64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: andl $64, %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%ecx,%edx), %eax |
| ; X86-NEXT: movl 4(%ecx,%edx), %edx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub128_64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: andl $64, %esi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: movq (%rdi,%rsi), %rax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 127 |
| %idx_align = and i32 %idx_bounds, -64 |
| %sh = zext nneg i32 %idx_align to i128 |
| %ld = load i128, ptr %word, align 8 |
| %sub = lshr i128 %ld, %sh |
| %res = trunc i128 %sub to i64 |
| ret i64 %res |
| } |
| |
| define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub512_8: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: shrl $3, %ecx |
| ; X86-NEXT: andl $63, %ecx |
| ; X86-NEXT: movzbl (%eax,%ecx), %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub512_8: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: andl $63, %esi |
| ; X64-NEXT: movzbl (%rdi,%rsi), %eax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 511 |
| %idx_align = and i32 %idx_bounds, -8 |
| %ld = load i512, ptr %word, align 8 |
| %sh = zext nneg i32 %idx_align to i512 |
| %sub = lshr i512 %ld, %sh |
| %res = trunc i512 %sub to i8 |
| ret i8 %res |
| } |
| |
| define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub512_64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: andl $56, %edx |
| ; X86-NEXT: movl (%ecx,%edx), %eax |
| ; X86-NEXT: movl 4(%ecx,%edx), %edx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub512_64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: andl $56, %esi |
| ; X64-NEXT: movq (%rdi,%rsi), %rax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 511 |
| %idx_align = and i32 %idx_bounds, -64 |
| %sh = zext nneg i32 %idx_align to i512 |
| %ld = load i512, ptr %word, align 8 |
| %sub = lshr i512 %ld, %sh |
| %res = trunc i512 %sub to i64 |
| ret i64 %res |
| } |
| |
| define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub512_128: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: andl $48, %edx |
| ; X86-NEXT: movl (%ecx,%edx), %esi |
| ; X86-NEXT: movl 4(%ecx,%edx), %edi |
| ; X86-NEXT: movl 8(%ecx,%edx), %ebx |
| ; X86-NEXT: movl 12(%ecx,%edx), %ecx |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl %ebx, 8(%eax) |
| ; X86-NEXT: movl %edi, 4(%eax) |
| ; X86-NEXT: movl %esi, (%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl $4 |
| ; |
| ; X64-LABEL: extractSub512_128: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: andl $48, %esi |
| ; X64-NEXT: movq (%rdi,%rsi), %rax |
| ; X64-NEXT: movq 8(%rdi,%rsi), %rdx |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 511 |
| %idx_align = and i32 %idx_bounds, -128 |
| %sh = zext nneg i32 %idx_align to i512 |
| %ld = load i512, ptr %word, align 8 |
| %sub = lshr i512 %ld, %sh |
| %res = trunc i512 %sub to i128 |
| ret i128 %res |
| } |
| |
| define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind { |
| ; X86-LABEL: extractSub4096_64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl $4032, %edx # imm = 0xFC0 |
| ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shrl $3, %edx |
| ; X86-NEXT: movl (%ecx,%edx), %eax |
| ; X86-NEXT: movl 4(%ecx,%edx), %edx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: extractSub4096_64: |
| ; X64: # %bb.0: |
| ; X64-NEXT: # kill: def $esi killed $esi def $rsi |
| ; X64-NEXT: andl $4032, %esi # imm = 0xFC0 |
| ; X64-NEXT: shrl $3, %esi |
| ; X64-NEXT: movq (%rdi,%rsi), %rax |
| ; X64-NEXT: retq |
| %idx_bounds = and i32 %idx, 4095 |
| %idx_align = and i32 %idx_bounds, -64 |
| %sh = zext nneg i32 %idx_align to i4096 |
| %ld = load i4096, ptr %word, align 8 |
| %sub = lshr i4096 %ld, %sh |
| %res = trunc i4096 %sub to i64 |
| ret i64 %res |
| } |