| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mcpu cortex-a53 -mtriple=aarch64-eabi | FileCheck %s --check-prefix=A53 |
| |
| ; PR26827 - Merge stores causes wrong dependency. |
| %struct1 = type { ptr, ptr, i32, i32, i16, i16, ptr, ptr } |
| @gv0 = internal unnamed_addr global i32 0, align 4 |
| @gv1 = internal unnamed_addr global ptr null, align 8 |
| |
| define void @test(ptr %fde, i32 %fd, ptr %func, ptr %arg) uwtable { |
| ;CHECK-LABEL: test |
| ; A53-LABEL: test: |
| ; A53: // %bb.0: // %entry |
| ; A53-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill |
| ; A53-NEXT: .cfi_def_cfa_offset 16 |
| ; A53-NEXT: .cfi_offset w19, -8 |
| ; A53-NEXT: .cfi_offset w30, -16 |
| ; A53-NEXT: .cfi_remember_state |
| ; A53-NEXT: movi v0.2d, #0000000000000000 |
| ; A53-NEXT: mov x8, x0 |
| ; A53-NEXT: mov x19, x8 |
| ; A53-NEXT: mov w0, w1 |
| ; A53-NEXT: mov w9, #256 |
| ; A53-NEXT: stp x2, x3, [x8, #32] |
| ; A53-NEXT: mov x2, x8 |
| ; A53-NEXT: str q0, [x19, #16]! |
| ; A53-NEXT: str w1, [x19] |
| ; A53-NEXT: mov w1, #4 |
| ; A53-NEXT: str q0, [x8] |
| ; A53-NEXT: strh w9, [x8, #24] |
| ; A53-NEXT: str wzr, [x8, #20] |
| ; A53-NEXT: bl fcntl |
| ; A53-NEXT: adrp x9, gv0 |
| ; A53-NEXT: add x9, x9, :lo12:gv0 |
| ; A53-NEXT: cmp x19, x9 |
| ; A53-NEXT: b.eq .LBB0_4 |
| ; A53-NEXT: // %bb.1: |
| ; A53-NEXT: ldr w8, [x19] |
| ; A53-NEXT: ldr w9, [x9] |
| ; A53-NEXT: .p2align 4, , 8 |
| ; A53-NEXT: .LBB0_2: // %while.body.i.split.ver.us |
| ; A53-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; A53-NEXT: lsl w9, w9, #1 |
| ; A53-NEXT: cmp w9, w8 |
| ; A53-NEXT: b.le .LBB0_2 |
| ; A53-NEXT: // %bb.3: // %while.end.i |
| ; A53-NEXT: bl foo |
| ; A53-NEXT: adrp x8, gv1 |
| ; A53-NEXT: str x0, [x8, :lo12:gv1] |
| ; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload |
| ; A53-NEXT: .cfi_def_cfa_offset 0 |
| ; A53-NEXT: .cfi_restore w19 |
| ; A53-NEXT: .cfi_restore w30 |
| ; A53-NEXT: ret |
| ; A53-NEXT: .p2align 4, , 8 |
| ; A53-NEXT: .LBB0_4: // %while.body.i.split |
| ; A53-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; A53-NEXT: .cfi_restore_state |
| ; A53-NEXT: b .LBB0_4 |
| entry: |
| tail call void @llvm.memset.p0.i64(ptr align 8 %fde, i8 0, i64 40, i1 false) |
| %state = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 4 |
| store i16 256, ptr %state, align 8 |
| %fd1 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 2 |
| store i32 %fd, ptr %fd1, align 8 |
| %force_eof = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 3 |
| store i32 0, ptr %force_eof, align 4 |
| %func2 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 6 |
| store ptr %func, ptr %func2, align 8 |
| %arg3 = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 7 |
| store ptr %arg, ptr %arg3, align 8 |
| %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, ptr %fde) #6 |
| %0 = load i32, ptr %fd1, align 8 |
| %cmp.i = icmp slt i32 %0, 0 |
| br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader |
| if.then.i: |
| unreachable |
| |
| while.body.i.preheader: |
| %1 = load i32, ptr @gv0, align 4 |
| %2 = icmp eq ptr %fd1, @gv0 |
| br i1 %2, label %while.body.i.split, label %while.body.i.split.ver.us.preheader |
| |
| while.body.i.split.ver.us.preheader: |
| br label %while.body.i.split.ver.us |
| |
| while.body.i.split.ver.us: |
| %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %1, %while.body.i.split.ver.us.preheader ] |
| %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1 |
| %3 = icmp sgt i32 %mul.i.ver.us, %0 |
| br i1 %3, label %while.end.i, label %while.body.i.split.ver.us |
| |
| while.body.i.split: |
| br label %while.body.i.split |
| |
| while.end.i: |
| %call.i = tail call ptr @foo() |
| store ptr %call.i, ptr @gv1, align 8 |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; TODO: rev16? |
| |
| define void @rotate16_in_place(ptr %p) { |
| ; A53-LABEL: rotate16_in_place: |
| ; A53: // %bb.0: |
| ; A53-NEXT: ldrb w8, [x0, #1] |
| ; A53-NEXT: ldrb w9, [x0] |
| ; A53-NEXT: strb w8, [x0] |
| ; A53-NEXT: strb w9, [x0, #1] |
| ; A53-NEXT: ret |
| %p1 = getelementptr i8, ptr %p, i64 1 |
| %i0 = load i8, ptr %p, align 1 |
| %i1 = load i8, ptr %p1, align 1 |
| store i8 %i1, ptr %p, align 1 |
| store i8 %i0, ptr %p1, align 1 |
| ret void |
| } |
| |
| ; TODO: rev16? |
| |
| define void @rotate16(ptr %p, ptr %q) { |
| ; A53-LABEL: rotate16: |
| ; A53: // %bb.0: |
| ; A53-NEXT: ldrb w8, [x0, #1] |
| ; A53-NEXT: ldrb w9, [x0] |
| ; A53-NEXT: strb w8, [x1] |
| ; A53-NEXT: strb w9, [x1, #1] |
| ; A53-NEXT: ret |
| %p1 = getelementptr i8, ptr %p, i64 1 |
| %q1 = getelementptr i8, ptr %q, i64 1 |
| %i0 = load i8, ptr %p, align 1 |
| %i1 = load i8, ptr %p1, align 1 |
| store i8 %i1, ptr %q, align 1 |
| store i8 %i0, ptr %q1, align 1 |
| ret void |
| } |
| |
| define void @rotate32_in_place(ptr %p) { |
| ; A53-LABEL: rotate32_in_place: |
| ; A53: // %bb.0: |
| ; A53-NEXT: ldr w8, [x0] |
| ; A53-NEXT: ror w8, w8, #16 |
| ; A53-NEXT: str w8, [x0] |
| ; A53-NEXT: ret |
| %p1 = getelementptr i16, ptr %p, i64 1 |
| %i0 = load i16, ptr %p, align 2 |
| %i1 = load i16, ptr %p1, align 2 |
| store i16 %i1, ptr %p, align 2 |
| store i16 %i0, ptr %p1, align 2 |
| ret void |
| } |
| |
| define void @rotate32(ptr %p) { |
| ; A53-LABEL: rotate32: |
| ; A53: // %bb.0: |
| ; A53-NEXT: ldr w8, [x0] |
| ; A53-NEXT: ror w8, w8, #16 |
| ; A53-NEXT: str w8, [x0, #84] |
| ; A53-NEXT: ret |
| %p1 = getelementptr i16, ptr %p, i64 1 |
| %p42 = getelementptr i16, ptr %p, i64 42 |
| %p43 = getelementptr i16, ptr %p, i64 43 |
| %i0 = load i16, ptr %p, align 2 |
| %i1 = load i16, ptr %p1, align 2 |
| store i16 %i1, ptr %p42, align 2 |
| store i16 %i0, ptr %p43, align 2 |
| ret void |
| } |
| |
| ; Prefer paired memops over rotate. |
| |
| define void @rotate64_in_place(ptr %p) { |
| ; A53-LABEL: rotate64_in_place: |
| ; A53: // %bb.0: |
| ; A53-NEXT: ldp w9, w8, [x0] |
| ; A53-NEXT: stp w8, w9, [x0] |
| ; A53-NEXT: ret |
| %p1 = getelementptr i32, ptr %p, i64 1 |
| %i0 = load i32, ptr %p, align 4 |
| %i1 = load i32, ptr %p1, align 4 |
| store i32 %i1, ptr %p, align 4 |
| store i32 %i0, ptr %p1, align 4 |
| ret void |
| } |
| |
| ; Prefer paired memops over rotate. |
| |
| define void @rotate64(ptr %p) { |
| ; A53-LABEL: rotate64: |
| ; A53: // %bb.0: |
| ; A53-NEXT: ldp w9, w8, [x0] |
| ; A53-NEXT: stp w8, w9, [x0, #8] |
| ; A53-NEXT: ret |
| %p1 = getelementptr i32, ptr %p, i64 1 |
| %p2 = getelementptr i32, ptr %p, i64 2 |
| %p3 = getelementptr i32, ptr %p, i64 3 |
| %i0 = load i32, ptr %p, align 4 |
| %i1 = load i32, ptr %p1, align 4 |
| store i32 %i1, ptr %p2, align 4 |
| store i32 %i0, ptr %p3, align 4 |
| ret void |
| } |
| |
| declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) |
| declare i32 @fcntl(i32, i32, ...) |
| declare noalias ptr @foo() |