blob: dda610e5dd3cb1481fee027caf3b5af01495580a [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
define <8 x i8> @shadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: shadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @shadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @shadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: shadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @shadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @shadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @shadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <8 x i8> @uhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @uhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uhadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @uhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @uhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @uhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @uhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>)
declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>)
define <8 x i8> @srhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @srhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srhadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @srhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @srhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @srhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @srhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srhadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <8 x i8> @urhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @urhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urhadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @urhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @urhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @urhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @urhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urhadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define void @testLowerToSRHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSRHADD8b:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add1 = add nsw <8 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nsw <8 x i16> %add1, %sextsrc2
%resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSRHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSRHADD4h:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd.4h v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add1 = add nsw <4 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1>
%add2 = add nsw <4 x i32> %add1, %sextsrc2
%resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSRHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSRHADD2s:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
%sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
%add1 = add nsw <2 x i64> %sextsrc1, <i64 1, i64 1>
%add2 = add nsw <2 x i64> %add1, %sextsrc2
%resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSRHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSRHADD16b:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
%add1 = add nsw <16 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nsw <16 x i16> %add1, %sextsrc2
%resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSRHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSRHADD8h:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd.8h v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
%add1 = add nsw <8 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%add2 = add nsw <8 x i32> %add1, %sextsrc2
%resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSRHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSRHADD4s:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd.4s v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add1 = add nsw <4 x i64> %sextsrc1, <i64 1, i64 1, i64 1, i64 1>
%add2 = add nsw <4 x i64> %add1, %sextsrc2
%resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD8b:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, %sextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD4h:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.4h v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add = add nsw <4 x i32> %sextsrc1, %sextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD2s:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
%sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
%add = add nsw <2 x i64> %sextsrc1, %sextsrc2
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD16b:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
%add = add nsw <16 x i16> %sextsrc1, %sextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD8h:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.8h v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
%add = add nsw <8 x i32> %sextsrc1, %sextsrc2
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD4s:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.4s v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add = add nsw <4 x i64> %sextsrc1, %sextsrc2
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToURHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToURHADD8b:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
%add1 = add nuw nsw <8 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nuw nsw <8 x i16> %add1, %zextsrc2
%resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToURHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToURHADD4h:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd.4h v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add1 = add nuw nsw <4 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1>
%add2 = add nuw nsw <4 x i32> %add1, %zextsrc2
%resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToURHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToURHADD2s:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
%zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
%add1 = add nuw nsw <2 x i64> %zextsrc1, <i64 1, i64 1>
%add2 = add nuw nsw <2 x i64> %add1, %zextsrc2
%resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToURHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToURHADD16b:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
%add1 = add nuw nsw <16 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nuw nsw <16 x i16> %add1, %zextsrc2
%resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToURHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToURHADD8h:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd.8h v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
%add1 = add nuw nsw <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%add2 = add nuw nsw <8 x i32> %add1, %zextsrc2
%resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToURHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToURHADD4s:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd.4s v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add1 = add nuw nsw <4 x i64> %zextsrc1, <i64 1, i64 1, i64 1, i64 1>
%add2 = add nuw nsw <4 x i64> %add1, %zextsrc2
%resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD8b:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD4h:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD2s:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
%zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
%add = add nuw nsw <2 x i64> %zextsrc1, %zextsrc2
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD16b:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
%add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD8h:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.8h v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
%add = add nuw nsw <8 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD4s:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define <4 x i32> @hadd16_sext_asr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-LABEL: hadd16_sext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.4h v0, v0, v1
; CHECK-NEXT: sshll.4s v0, v0, #0
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add = add nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i32> @hadd16_zext_asr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-LABEL: hadd16_zext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i32> @hadd16_sext_lsr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-LABEL: hadd16_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: saddl.4s v0, v0, v1
; CHECK-NEXT: ushr.4s v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add = add nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i32> @hadd16_zext_lsr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-LABEL: hadd16_zext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i64> @hadd32_sext_asr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: hadd32_sext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd.4s v0, v0, v1
; CHECK-NEXT: sshll2.2d v1, v0, #0
; CHECK-NEXT: sshll.2d v0, v0, #0
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add = add nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i64> @hadd32_zext_asr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: hadd32_zext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: ushll2.2d v1, v0, #0
; CHECK-NEXT: ushll.2d v0, v0, #0
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: hadd32_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: saddl.2d v2, v0, v1
; CHECK-NEXT: saddl2.2d v0, v0, v1
; CHECK-NEXT: ushr.2d v1, v0, #1
; CHECK-NEXT: ushr.2d v0, v2, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add = add nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: hadd32_zext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: ushll2.2d v1, v0, #0
; CHECK-NEXT: ushll.2d v0, v0, #0
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: hadd8_sext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.4h v0, v0, #8
; CHECK-NEXT: shl.4h v1, v1, #8
; CHECK-NEXT: sshr.4h v0, v0, #8
; CHECK-NEXT: sshr.4h v1, v1, #8
; CHECK-NEXT: shadd.4h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @hadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: hadd8_zext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @hadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: hadd8_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.4h v0, v0, #8
; CHECK-NEXT: shl.4h v1, v1, #8
; CHECK-NEXT: sshr.4h v0, v0, #8
; CHECK-NEXT: ssra.4h v0, v1, #8
; CHECK-NEXT: ushr.4h v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: hadd8_zext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: hadd8x2_sext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: sshr.2s v1, v1, #24
; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = ashr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: hadd8x2_zext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @hadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: hadd8x2_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: ssra.2s v0, v1, #24
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: ushr.2s v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: hadd8x2_zext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <4 x i16> @rhadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: rhadd8_sext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.4h v0, v0, #8
; CHECK-NEXT: shl.4h v1, v1, #8
; CHECK-NEXT: sshr.4h v0, v0, #8
; CHECK-NEXT: sshr.4h v1, v1, #8
; CHECK-NEXT: srhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = ashr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @rhadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: rhadd8_zext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: urhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: rhadd8_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.4h v0, v0, #8
; CHECK-NEXT: shl.4h v1, v1, #8
; CHECK-NEXT: sshr.4h v0, v0, #8
; CHECK-NEXT: ssra.4h v0, v1, #8
; CHECK-NEXT: movi.4h v1, #1
; CHECK-NEXT: add.4h v0, v0, v1
; CHECK-NEXT: ushr.4h v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @rhadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-LABEL: rhadd8_zext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: urhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: rhadd8x2_sext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: sshr.2s v1, v1, #24
; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = ashr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: rhadd8x2_zext_asr:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: rhadd8x2_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: sshr.2s v1, v1, #24
; CHECK-NEXT: mvn.8b v0, v0
; CHECK-NEXT: sub.2s v0, v1, v0
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: ushr.2s v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-LABEL: rhadd8x2_zext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define void @testLowerToSHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD8b_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8b v1, #10
; CHECK-NEXT: shadd.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD4h_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4h v1, #10
; CHECK-NEXT: shadd.4h v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%add = add nsw <4 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD2s_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.2s v1, #10
; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
%add = add nsw <2 x i64> %sextsrc1, <i64 10, i64 10>
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD16b_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.16b v1, #10
; CHECK-NEXT: shadd.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
%add = add nsw <16 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD8h_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8h v1, #10
; CHECK-NEXT: shadd.8h v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%add = add nsw <8 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToSHADD4s_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: shadd.4s v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%add = add nsw <4 x i64> %sextsrc1, <i64 10, i64 10, i64 10, i64 10>
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD8b_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8b v1, #10
; CHECK-NEXT: uhadd.8b v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%add = add nuw nsw <8 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD4h_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4h v1, #10
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD2s_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.2s v1, #10
; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
%add = add nuw nsw <2 x i64> %zextsrc1, <i64 10, i64 10>
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD16b_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.16b v1, #10
; CHECK-NEXT: uhadd.16b v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
%add = add nuw nsw <16 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD8h_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8h v1, #10
; CHECK-NEXT: uhadd.8h v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%add = add nuw nsw <8 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-LABEL: testLowerToUHADD4s_c:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, <i64 10, i64 10, i64 10, i64 10>
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-LABEL: andmaskv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8b v2, #7
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: uhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
ret <8 x i8> %result
}
define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) {
; CHECK-LABEL: andmaskv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.16b v3, #7
; CHECK-NEXT: uzp1.16b v0, v0, v1
; CHECK-NEXT: and.16b v0, v0, v3
; CHECK-NEXT: uhadd.16b v0, v0, v2
; CHECK-NEXT: ret
%zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
%add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
ret <16 x i8> %result
}
define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) {
; CHECK-LABEL: andmask2v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1.16b v2, v2, v3
; CHECK-NEXT: movi.16b v3, #3
; CHECK-NEXT: uzp1.16b v0, v0, v1
; CHECK-NEXT: movi.16b v1, #7
; CHECK-NEXT: and.16b v2, v2, v3
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: uhadd.16b v0, v0, v2
; CHECK-NEXT: ret
%zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = and <16 x i16> %src2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
ret <16 x i8> %result
}
define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: andmask2v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8b v2, #7
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: xtn.8b v1, v1
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: uhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = and <8 x i16> %src2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
ret <8 x i8> %result
}
define <8 x i16> @andmask3v8i8(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: andmask3v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.8h v2, #7
; CHECK-NEXT: bic.8h v1, #254, lsl #8
; CHECK-NEXT: and.16b v0, v0, v2
; CHECK-NEXT: uhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = and <8 x i16> %src2, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %resulti16
}
define <16 x i8> @sextmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) {
; CHECK-LABEL: sextmaskv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr.8h v1, v1, #11
; CHECK-NEXT: sshr.8h v0, v0, #11
; CHECK-NEXT: uzp1.16b v0, v0, v1
; CHECK-NEXT: shadd.16b v0, v0, v2
; CHECK-NEXT: ret
%sextsrc1 = ashr <16 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
%add = add nsw <16 x i16> %sextsrc1, %sextsrc2
%1 = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %1 to <16 x i8>
ret <16 x i8> %result
}
define <8 x i8> @sextmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-LABEL: sextmaskv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr.8h v0, v0, #11
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: shadd.8b v0, v0, v1
; CHECK-NEXT: ret
%sextsrc1 = ashr <8 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, %sextsrc2
%1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %1 to <8 x i8>
ret <8 x i8> %result
}
define <8 x i8> @sextmask2v8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-LABEL: sextmask2v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shrn.8b v0, v0, #8
; CHECK-NEXT: shadd.8b v0, v0, v1
; CHECK-NEXT: ret
%sextsrc1 = ashr <8 x i16> %src1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, %sextsrc2
%1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %1 to <8 x i8>
ret <8 x i8> %result
}
define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-LABEL: sextmask3v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr.8h v0, v0, #7
; CHECK-NEXT: sshll.8h v1, v1, #0
; CHECK-NEXT: shadd.8h v0, v0, v1
; CHECK-NEXT: xtn.8b v0, v0
; CHECK-NEXT: ret
%1 = ashr <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %1, %sextsrc2
%2 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %2 to <8 x i8>
ret <8 x i8> %result
}
define <4 x i16> @ext_via_i19(<4 x i16> %a) {
; CHECK-LABEL: ext_via_i19:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4s v1, #1
; CHECK-NEXT: uaddw.4s v0, v1, v0
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: xtn.4h v0, v0
; CHECK-NEXT: ret
%t3 = zext <4 x i16> %a to <4 x i32>
%t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1>
%t5 = trunc <4 x i32> %t4 to <4 x i19>
%new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1>
%new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1>
%last = zext <4 x i19> %new1 to <4 x i32>
%t6 = trunc <4 x i32> %last to <4 x i16>
ret <4 x i16> %t6
}
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>)
declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)