blob: d52ac7847f814684ca3e0e6393853d4109ef140e [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
define <8 x i16> @mul_splat_sext_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: mul_splat_sext_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: dup v1.8b, v1.b[3]
; CHECK-NEXT: .LBB0_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d2, [x1, x8]
; CHECK-NEXT: add x8, x8, #4
; CHECK-NEXT: cmp w8, #4
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
; CHECK-NEXT: b.eq .LBB0_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
%x.val = load <8 x i8>, ptr %x
%x.ext = sext <8 x i8> %x.val to <8 x i16>
%a = shufflevector <8 x i16> %x.ext, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
%y.idx = mul nuw nsw i32 %p, 4
%y.ptr = getelementptr i8, ptr %y, i32 %y.idx
%y.val = load <8 x i8>, ptr %y.ptr
%y.ext = sext <8 x i8> %y.val to <8 x i16>
%b = mul <8 x i16> %y.ext, %a
%c = add <8 x i16> %q, %b
%pa = add i32 %p, 1
%c1 = icmp eq i32 %p, 0
br i1 %c1, label %l1, label %l2
l2:
ret <8 x i16> %c
}
define <4 x i32> @mul_splat_sext_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: mul_splat_sext_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB1_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d2, [x1, x8]
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: cmp w8, #8
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
%x.val = load <4 x i16>, ptr %x
%x.ext = sext <4 x i16> %x.val to <4 x i32>
%a = shufflevector <4 x i32> %x.ext, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
%y.idx = mul nuw nsw i32 %p, 4
%y.ptr = getelementptr i16, ptr %y, i32 %y.idx
%y.val = load <4 x i16>, ptr %y.ptr
%y.ext = sext <4 x i16> %y.val to <4 x i32>
%b = mul <4 x i32> %y.ext, %a
%c = add <4 x i32> %q, %b
%pa = add i32 %p, 1
%c1 = icmp eq i32 %p, 0
br i1 %c1, label %l1, label %l2
l2:
ret <4 x i32> %c
}
define <2 x i64> @mul_splat_sext_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: mul_splat_sext_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB2_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d2, [x1, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp w8, #16
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
; CHECK-NEXT: b.eq .LBB2_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
%x.val = load <2 x i32>, ptr %x
%x.ext = sext <2 x i32> %x.val to <2 x i64>
%a = shufflevector <2 x i64> %x.ext, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
%y.idx = mul nuw nsw i32 %p, 4
%y.ptr = getelementptr i32, ptr %y, i32 %y.idx
%y.val = load <2 x i32>, ptr %y.ptr
%y.ext = sext <2 x i32> %y.val to <2 x i64>
%b = mul <2 x i64> %y.ext, %a
%c = add <2 x i64> %q, %b
%pa = add i32 %p, 1
%c1 = icmp eq i32 %p, 0
br i1 %c1, label %l1, label %l2
l2:
ret <2 x i64> %c
}
define <8 x i16> @mul_sext_splat_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: mul_sext_splat_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: dup v1.8b, v1.b[3]
; CHECK-NEXT: .LBB3_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d2, [x1, x8]
; CHECK-NEXT: add x8, x8, #4
; CHECK-NEXT: cmp w8, #4
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
; CHECK-NEXT: b.eq .LBB3_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
%x.val = load <8 x i8>, ptr %x
%x.spt = shufflevector <8 x i8> %x.val, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
%a = sext <8 x i8> %x.spt to <8 x i16>
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
%y.idx = mul nuw nsw i32 %p, 4
%y.ptr = getelementptr i8, ptr %y, i32 %y.idx
%y.val = load <8 x i8>, ptr %y.ptr
%y.ext = sext <8 x i8> %y.val to <8 x i16>
%b = mul <8 x i16> %y.ext, %a
%c = add <8 x i16> %q, %b
%pa = add i32 %p, 1
%c1 = icmp eq i32 %p, 0
br i1 %c1, label %l1, label %l2
l2:
ret <8 x i16> %c
}
define <4 x i32> @mul_sext_splat_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: mul_sext_splat_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB4_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d2, [x1, x8]
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: cmp w8, #8
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
; CHECK-NEXT: b.eq .LBB4_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
%x.val = load <4 x i16>, ptr %x
%x.spt = shufflevector <4 x i16> %x.val, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%a = sext <4 x i16> %x.spt to <4 x i32>
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
%y.idx = mul nuw nsw i32 %p, 4
%y.ptr = getelementptr i16, ptr %y, i32 %y.idx
%y.val = load <4 x i16>, ptr %y.ptr
%y.ext = sext <4 x i16> %y.val to <4 x i32>
%b = mul <4 x i32> %y.ext, %a
%c = add <4 x i32> %q, %b
%pa = add i32 %p, 1
%c1 = icmp eq i32 %p, 0
br i1 %c1, label %l1, label %l2
l2:
ret <4 x i32> %c
}
define <2 x i64> @mul_sext_splat_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: mul_sext_splat_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB5_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d2, [x1, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp w8, #16
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
; CHECK-NEXT: b.eq .LBB5_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
%x.val = load <2 x i32>, ptr %x
%x.spt = shufflevector <2 x i32> %x.val, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
%a = sext <2 x i32> %x.spt to <2 x i64>
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
%y.idx = mul nuw nsw i32 %p, 4
%y.ptr = getelementptr i32, ptr %y, i32 %y.idx
%y.val = load <2 x i32>, ptr %y.ptr
%y.ext = sext <2 x i32> %y.val to <2 x i64>
%b = mul <2 x i64> %y.ext, %a
%c = add <2 x i64> %q, %b
%pa = add i32 %p, 1
%c1 = icmp eq i32 %p, 0
br i1 %c1, label %l1, label %l2
l2:
ret <2 x i64> %c
}