| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -o -| FileCheck %s |
| |
| target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| ; This test covers a case where an AArch64 DUP instruction is generated with an |
| ; invalid encoding, resulting in a crash. We don't care about the specific output |
| ; here, only that this case no longer causes said crash. |
| define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr { |
| ; CHECK-LABEL: dupext_crashtest: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: dup v0.2s, w0 |
| ; CHECK-NEXT: .LBB0_1: // %vector.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d1, [x8] |
| ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s |
| ; CHECK-NEXT: xtn v1.2s, v1.2d |
| ; CHECK-NEXT: str d1, [x8] |
| ; CHECK-NEXT: b .LBB0_1 |
| entry: |
| %conv314 = zext i32 %e to i64 |
| br label %vector.memcheck |
| |
| vector.memcheck: ; preds = %for.body.lr.ph |
| br label %vector.ph |
| |
| vector.ph: ; preds = %vector.memcheck |
| %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %conv314, i32 0 |
| %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: ; preds = %vector.body, %vector.ph |
| %wide.load = load <2 x i32>, ptr undef, align 4 |
| %0 = zext <2 x i32> %wide.load to <2 x i64> |
| %1 = mul nuw <2 x i64> %broadcast.splat, %0 |
| %2 = trunc <2 x i64> %1 to <2 x i32> |
| %3 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> %2 |
| %4 = bitcast ptr undef to ptr |
| store <2 x i32> %3, ptr %4, align 4 |
| br label %vector.body |
| } |
| |
| ; This test got stuck in a loop hoisting the and to the load, and sinking it back to the mull |
| define i32 @dup_and_load(ptr %p, i1 %c) { |
| ; CHECK-LABEL: dup_and_load: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: ldrb w0, [x0] |
| ; CHECK-NEXT: tbz w1, #0, .LBB1_3 |
| ; CHECK-NEXT: // %bb.1: // %ph |
| ; CHECK-NEXT: dup v0.8h, w0 |
| ; CHECK-NEXT: mov w9, wzr |
| ; CHECK-NEXT: .LBB1_2: // %vector.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d1, [x8] |
| ; CHECK-NEXT: add w9, w9, #1 |
| ; CHECK-NEXT: cmp w9, #100 |
| ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 |
| ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h |
| ; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h |
| ; CHECK-NEXT: stp q1, q2, [x8] |
| ; CHECK-NEXT: b.lt .LBB1_2 |
| ; CHECK-NEXT: .LBB1_3: // %end |
| ; CHECK-NEXT: ret |
| entry: |
| %l = load i32, ptr %p |
| %and255 = and i32 %l, 255 |
| br i1 %c, label %ph, label %end |
| |
| ph: |
| %broadcast.splatinsert = insertelement <8 x i32> poison, i32 %and255, i32 0 |
| %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: ; preds = %vector.body, %vector.ph |
| %iv = phi i32 [ 0, %ph ], [ %iv.next, %vector.body ] |
| %wide.load = load <8 x i8>, ptr %p, align 4 |
| %0 = zext <8 x i8> %wide.load to <8 x i32> |
| %1 = mul <8 x i32> %broadcast.splat, %0 |
| store <8 x i32> %1, ptr %p, align 4 |
| %iv.next = add i32 %iv, 1 |
| %e = icmp slt i32 %iv.next, 100 |
| br i1 %e, label %vector.body, label %end |
| |
| end: |
| ret i32 %and255 |
| } |