| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O0 %s |
| ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O3 %s |
| |
| define i8 @load_i8(i8* %ptr) { |
| ; CHECK-O0-LABEL: load_i8: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movb (%rdi), %al |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: load_i8: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movb (%rdi), %al |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i8, i8* %ptr unordered, align 1 |
| ret i8 %v |
| } |
| |
| define void @store_i8(i8* %ptr, i8 %v) { |
| ; CHECK-O0-LABEL: store_i8: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movb %sil, %al |
| ; CHECK-O0-NEXT: movb %al, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: store_i8: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movb %sil, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| store atomic i8 %v, i8* %ptr unordered, align 1 |
| ret void |
| } |
| |
| define i16 @load_i16(i16* %ptr) { |
| ; CHECK-O0-LABEL: load_i16: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movw (%rdi), %ax |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: load_i16: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movzwl (%rdi), %eax |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i16, i16* %ptr unordered, align 2 |
| ret i16 %v |
| } |
| |
| |
| define void @store_i16(i16* %ptr, i16 %v) { |
| ; CHECK-O0-LABEL: store_i16: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movw %si, %ax |
| ; CHECK-O0-NEXT: movw %ax, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: store_i16: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movw %si, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| store atomic i16 %v, i16* %ptr unordered, align 2 |
| ret void |
| } |
| |
| define i32 @load_i32(i32* %ptr) { |
| ; CHECK-O0-LABEL: load_i32: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movl (%rdi), %eax |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: load_i32: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movl (%rdi), %eax |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i32, i32* %ptr unordered, align 4 |
| ret i32 %v |
| } |
| |
| define void @store_i32(i32* %ptr, i32 %v) { |
| ; CHECK-O0-LABEL: store_i32: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movl %esi, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: store_i32: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movl %esi, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| store atomic i32 %v, i32* %ptr unordered, align 4 |
| ret void |
| } |
| |
| define i64 @load_i64(i64* %ptr) { |
| ; CHECK-O0-LABEL: load_i64: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: load_i64: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movq (%rdi), %rax |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i64, i64* %ptr unordered, align 8 |
| ret i64 %v |
| } |
| |
| define void @store_i64(i64* %ptr, i64 %v) { |
| ; CHECK-O0-LABEL: store_i64: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movq %rsi, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: store_i64: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movq %rsi, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| store atomic i64 %v, i64* %ptr unordered, align 8 |
| ret void |
| } |
| |
| ;; The next batch of tests are intended to show transforms which we |
| ;; either *can't* do for legality, or don't currently implement. The later |
| ;; are noted carefully where relevant. |
| |
| ; Must use a full width op, not a byte op |
| define void @narrow_writeback_or(i64* %ptr) { |
| ; CHECK-O0-LABEL: narrow_writeback_or: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: orq $7, %rax |
| ; CHECK-O0-NEXT: movq %rax, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: narrow_writeback_or: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: orq $7, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i64, i64* %ptr unordered, align 8 |
| %v.new = or i64 %v, 7 |
| store atomic i64 %v.new, i64* %ptr unordered, align 8 |
| ret void |
| } |
| |
| ; Must use a full width op, not a byte op |
| define void @narrow_writeback_and(i64* %ptr) { |
| ; CHECK-O0-LABEL: narrow_writeback_and: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: movl %eax, %ecx |
| ; CHECK-O0-NEXT: andl $-256, %ecx |
| ; CHECK-O0-NEXT: movl %ecx, %eax |
| ; CHECK-O0-NEXT: movq %rax, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: narrow_writeback_and: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 |
| ; CHECK-O3-NEXT: andq %rax, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i64, i64* %ptr unordered, align 8 |
| %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 |
| store atomic i64 %v.new, i64* %ptr unordered, align 8 |
| ret void |
| } |
| |
| ; Must use a full width op, not a byte op |
| define void @narrow_writeback_xor(i64* %ptr) { |
| ; CHECK-O0-LABEL: narrow_writeback_xor: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movq (%rdi), %rax |
| ; CHECK-O0-NEXT: xorq $7, %rax |
| ; CHECK-O0-NEXT: movq %rax, (%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: narrow_writeback_xor: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: xorq $7, (%rdi) |
| ; CHECK-O3-NEXT: retq |
| %v = load atomic i64, i64* %ptr unordered, align 8 |
| %v.new = xor i64 %v, 7 |
| store atomic i64 %v.new, i64* %ptr unordered, align 8 |
| ret void |
| } |
| |
| ; Legal if wider type is also atomic (TODO) |
| define void @widen_store(i32* %p0, i32 %v1, i32 %v2) { |
| ; CHECK-O0-LABEL: widen_store: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movl %esi, (%rdi) |
| ; CHECK-O0-NEXT: movl %edx, 4(%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: widen_store: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movl %esi, (%rdi) |
| ; CHECK-O3-NEXT: movl %edx, 4(%rdi) |
| ; CHECK-O3-NEXT: retq |
| %p1 = getelementptr i32, i32* %p0, i64 1 |
| store atomic i32 %v1, i32* %p0 unordered, align 8 |
| store atomic i32 %v2, i32* %p1 unordered, align 4 |
| ret void |
| } |
| |
| ; Legal if wider type is also atomic (TODO) |
| define void @widen_broadcast(i32* %p0, i32 %v) { |
| ; CHECK-O0-LABEL: widen_broadcast: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movl %esi, (%rdi) |
| ; CHECK-O0-NEXT: movl %esi, 4(%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: widen_broadcast: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movl %esi, (%rdi) |
| ; CHECK-O3-NEXT: movl %esi, 4(%rdi) |
| ; CHECK-O3-NEXT: retq |
| %p1 = getelementptr i32, i32* %p0, i64 1 |
| store atomic i32 %v, i32* %p0 unordered, align 8 |
| store atomic i32 %v, i32* %p1 unordered, align 4 |
| ret void |
| } |
| |
| ; Legal if wider type is also atomic (TODO) |
| define void @vec_store(i32* %p0, <2 x i32> %vec) { |
| ; CHECK-O0-LABEL: vec_store: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movd %xmm0, %eax |
| ; CHECK-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
| ; CHECK-O0-NEXT: movd %xmm0, %ecx |
| ; CHECK-O0-NEXT: movl %eax, (%rdi) |
| ; CHECK-O0-NEXT: movl %ecx, 4(%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: vec_store: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movd %xmm0, %eax |
| ; CHECK-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] |
| ; CHECK-O3-NEXT: movd %xmm0, %ecx |
| ; CHECK-O3-NEXT: movl %eax, (%rdi) |
| ; CHECK-O3-NEXT: movl %ecx, 4(%rdi) |
| ; CHECK-O3-NEXT: retq |
| %v1 = extractelement <2 x i32> %vec, i32 0 |
| %v2 = extractelement <2 x i32> %vec, i32 1 |
| %p1 = getelementptr i32, i32* %p0, i64 1 |
| store atomic i32 %v1, i32* %p0 unordered, align 8 |
| store atomic i32 %v2, i32* %p1 unordered, align 4 |
| ret void |
| } |
| |
| |
| ; Legal if wider type is also atomic (TODO) |
| ; Also, can avoid register move from xmm to eax (TODO) |
| define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { |
| ; CHECK-O0-LABEL: widen_broadcast2: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movd %xmm0, %eax |
| ; CHECK-O0-NEXT: movl %eax, (%rdi) |
| ; CHECK-O0-NEXT: movl %eax, 4(%rdi) |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: widen_broadcast2: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movd %xmm0, %eax |
| ; CHECK-O3-NEXT: movl %eax, (%rdi) |
| ; CHECK-O3-NEXT: movl %eax, 4(%rdi) |
| ; CHECK-O3-NEXT: retq |
| %v1 = extractelement <2 x i32> %vec, i32 0 |
| %p1 = getelementptr i32, i32* %p0, i64 1 |
| store atomic i32 %v1, i32* %p0 unordered, align 8 |
| store atomic i32 %v1, i32* %p1 unordered, align 4 |
| ret void |
| } |
| |
| |
| ; Legal if wider type is also atomic (TODO) |
| define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) { |
| ; CHECK-O0-LABEL: widen_zero_init: |
| ; CHECK-O0: # %bb.0: |
| ; CHECK-O0-NEXT: movl $0, (%rdi) |
| ; CHECK-O0-NEXT: movl $0, 4(%rdi) |
| ; CHECK-O0-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; CHECK-O0-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| ; CHECK-O0-NEXT: retq |
| ; |
| ; CHECK-O3-LABEL: widen_zero_init: |
| ; CHECK-O3: # %bb.0: |
| ; CHECK-O3-NEXT: movl $0, (%rdi) |
| ; CHECK-O3-NEXT: movl $0, 4(%rdi) |
| ; CHECK-O3-NEXT: retq |
| %p1 = getelementptr i32, i32* %p0, i64 1 |
| store atomic i32 0, i32* %p0 unordered, align 8 |
| store atomic i32 0, i32* %p1 unordered, align 4 |
| ret void |
| } |