| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s |
| |
| define <8 x i16> @mul_splat_sext_v8i16(ptr %x, ptr %y) { |
| ; CHECK-LABEL: mul_splat_sext_v8i16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: dup v1.8b, v1.b[3] |
| ; CHECK-NEXT: .LBB0_1: // %l1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x1, x8] |
| ; CHECK-NEXT: add x8, x8, #4 |
| ; CHECK-NEXT: cmp w8, #4 |
| ; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b |
| ; CHECK-NEXT: b.eq .LBB0_1 |
| ; CHECK-NEXT: // %bb.2: // %l2 |
| ; CHECK-NEXT: ret |
| entry: |
| %x.val = load <8 x i8>, ptr %x |
| %x.ext = sext <8 x i8> %x.val to <8 x i16> |
| %a = shufflevector <8 x i16> %x.ext, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
| br label %l1 |
| |
| l1: |
| %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| %q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| %y.idx = mul nuw nsw i32 %p, 4 |
| %y.ptr = getelementptr i8, ptr %y, i32 %y.idx |
| %y.val = load <8 x i8>, ptr %y.ptr |
| %y.ext = sext <8 x i8> %y.val to <8 x i16> |
| %b = mul <8 x i16> %y.ext, %a |
| %c = add <8 x i16> %q, %b |
| %pa = add i32 %p, 1 |
| %c1 = icmp eq i32 %p, 0 |
| br i1 %c1, label %l1, label %l2 |
| |
| l2: |
| ret <8 x i16> %c |
| } |
| |
| define <4 x i32> @mul_splat_sext_v4i32(ptr %x, ptr %y) { |
| ; CHECK-LABEL: mul_splat_sext_v4i32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB1_1: // %l1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x1, x8] |
| ; CHECK-NEXT: add x8, x8, #8 |
| ; CHECK-NEXT: cmp w8, #8 |
| ; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3] |
| ; CHECK-NEXT: b.eq .LBB1_1 |
| ; CHECK-NEXT: // %bb.2: // %l2 |
| ; CHECK-NEXT: ret |
| entry: |
| %x.val = load <4 x i16>, ptr %x |
| %x.ext = sext <4 x i16> %x.val to <4 x i32> |
| %a = shufflevector <4 x i32> %x.ext, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| br label %l1 |
| |
| l1: |
| %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| %y.idx = mul nuw nsw i32 %p, 4 |
| %y.ptr = getelementptr i16, ptr %y, i32 %y.idx |
| %y.val = load <4 x i16>, ptr %y.ptr |
| %y.ext = sext <4 x i16> %y.val to <4 x i32> |
| %b = mul <4 x i32> %y.ext, %a |
| %c = add <4 x i32> %q, %b |
| %pa = add i32 %p, 1 |
| %c1 = icmp eq i32 %p, 0 |
| br i1 %c1, label %l1, label %l2 |
| |
| l2: |
| ret <4 x i32> %c |
| } |
| |
| define <2 x i64> @mul_splat_sext_v2i64(ptr %x, ptr %y) { |
| ; CHECK-LABEL: mul_splat_sext_v2i64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB2_1: // %l1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x1, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp w8, #16 |
| ; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1] |
| ; CHECK-NEXT: b.eq .LBB2_1 |
| ; CHECK-NEXT: // %bb.2: // %l2 |
| ; CHECK-NEXT: ret |
| entry: |
| %x.val = load <2 x i32>, ptr %x |
| %x.ext = sext <2 x i32> %x.val to <2 x i64> |
| %a = shufflevector <2 x i64> %x.ext, <2 x i64> undef, <2 x i32> <i32 1, i32 1> |
| br label %l1 |
| |
| l1: |
| %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| %q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| %y.idx = mul nuw nsw i32 %p, 4 |
| %y.ptr = getelementptr i32, ptr %y, i32 %y.idx |
| %y.val = load <2 x i32>, ptr %y.ptr |
| %y.ext = sext <2 x i32> %y.val to <2 x i64> |
| %b = mul <2 x i64> %y.ext, %a |
| %c = add <2 x i64> %q, %b |
| %pa = add i32 %p, 1 |
| %c1 = icmp eq i32 %p, 0 |
| br i1 %c1, label %l1, label %l2 |
| |
| l2: |
| ret <2 x i64> %c |
| } |
| |
| define <8 x i16> @mul_sext_splat_v8i16(ptr %x, ptr %y) { |
| ; CHECK-LABEL: mul_sext_splat_v8i16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: dup v1.8b, v1.b[3] |
| ; CHECK-NEXT: .LBB3_1: // %l1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x1, x8] |
| ; CHECK-NEXT: add x8, x8, #4 |
| ; CHECK-NEXT: cmp w8, #4 |
| ; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b |
| ; CHECK-NEXT: b.eq .LBB3_1 |
| ; CHECK-NEXT: // %bb.2: // %l2 |
| ; CHECK-NEXT: ret |
| entry: |
| %x.val = load <8 x i8>, ptr %x |
| %x.spt = shufflevector <8 x i8> %x.val, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
| %a = sext <8 x i8> %x.spt to <8 x i16> |
| br label %l1 |
| |
| l1: |
| %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| %q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| %y.idx = mul nuw nsw i32 %p, 4 |
| %y.ptr = getelementptr i8, ptr %y, i32 %y.idx |
| %y.val = load <8 x i8>, ptr %y.ptr |
| %y.ext = sext <8 x i8> %y.val to <8 x i16> |
| %b = mul <8 x i16> %y.ext, %a |
| %c = add <8 x i16> %q, %b |
| %pa = add i32 %p, 1 |
| %c1 = icmp eq i32 %p, 0 |
| br i1 %c1, label %l1, label %l2 |
| |
| l2: |
| ret <8 x i16> %c |
| } |
| |
| define <4 x i32> @mul_sext_splat_v4i32(ptr %x, ptr %y) { |
| ; CHECK-LABEL: mul_sext_splat_v4i32: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB4_1: // %l1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x1, x8] |
| ; CHECK-NEXT: add x8, x8, #8 |
| ; CHECK-NEXT: cmp w8, #8 |
| ; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3] |
| ; CHECK-NEXT: b.eq .LBB4_1 |
| ; CHECK-NEXT: // %bb.2: // %l2 |
| ; CHECK-NEXT: ret |
| entry: |
| %x.val = load <4 x i16>, ptr %x |
| %x.spt = shufflevector <4 x i16> %x.val, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| %a = sext <4 x i16> %x.spt to <4 x i32> |
| br label %l1 |
| |
| l1: |
| %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| %y.idx = mul nuw nsw i32 %p, 4 |
| %y.ptr = getelementptr i16, ptr %y, i32 %y.idx |
| %y.val = load <4 x i16>, ptr %y.ptr |
| %y.ext = sext <4 x i16> %y.val to <4 x i32> |
| %b = mul <4 x i32> %y.ext, %a |
| %c = add <4 x i32> %q, %b |
| %pa = add i32 %p, 1 |
| %c1 = icmp eq i32 %p, 0 |
| br i1 %c1, label %l1, label %l2 |
| |
| l2: |
| ret <4 x i32> %c |
| } |
| |
| define <2 x i64> @mul_sext_splat_v2i64(ptr %x, ptr %y) { |
| ; CHECK-LABEL: mul_sext_splat_v2i64: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB5_1: // %l1 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x1, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp w8, #16 |
| ; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1] |
| ; CHECK-NEXT: b.eq .LBB5_1 |
| ; CHECK-NEXT: // %bb.2: // %l2 |
| ; CHECK-NEXT: ret |
| entry: |
| %x.val = load <2 x i32>, ptr %x |
| %x.spt = shufflevector <2 x i32> %x.val, <2 x i32> undef, <2 x i32> <i32 1, i32 1> |
| %a = sext <2 x i32> %x.spt to <2 x i64> |
| br label %l1 |
| |
| l1: |
| %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| %q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| %y.idx = mul nuw nsw i32 %p, 4 |
| %y.ptr = getelementptr i32, ptr %y, i32 %y.idx |
| %y.val = load <2 x i32>, ptr %y.ptr |
| %y.ext = sext <2 x i32> %y.val to <2 x i64> |
| %b = mul <2 x i64> %y.ext, %a |
| %c = add <2 x i64> %q, %b |
| %pa = add i32 %p, 1 |
| %c1 = icmp eq i32 %p, 0 |
| br i1 %c1, label %l1, label %l2 |
| |
| l2: |
| ret <2 x i64> %c |
| } |