blob: 09ea9eeb03914bb5a9f785db03e83b0a1a097c79 [file]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for ext_via_i19
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srhadd_v2i32_trunc
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urhadd_v2i32_trunc
define <8 x i8> @shadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: shadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @shadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @shadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: shadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @shadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @shadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @shadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: shadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <8 x i8> @uhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @uhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uhadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @uhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @uhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @uhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @uhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: uhadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uhadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>)
declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>)
define <8 x i8> @srhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @srhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srhadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @srhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @srhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @srhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @srhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: srhadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srhadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <8 x i8> @urhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urhadd.8b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A, align 8
%tmp2 = load <8 x i8>, ptr %B, align 8
%tmp3 = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <16 x i8> @urhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urhadd.16b v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A, align 16
%tmp2 = load <16 x i8>, ptr %B, align 16
%tmp3 = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <4 x i16> @urhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A, align 8
%tmp2 = load <4 x i16>, ptr %B, align 8
%tmp3 = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <8 x i16> @urhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urhadd.8h v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A, align 16
%tmp2 = load <8 x i16>, ptr %B, align 16
%tmp3 = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <2 x i32> @urhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A, align 8
%tmp2 = load <2 x i32>, ptr %B, align 8
%tmp3 = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <4 x i32> @urhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) {
; CHECK-LABEL: urhadd4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urhadd.4s v0, v0, v1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A, align 16
%tmp2 = load <4 x i32>, ptr %B, align 16
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define void @testLowerToSRHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSRHADD8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.8b v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSRHADD8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #1
; CHECK-GI-NEXT: saddl.8h v0, v0, v1
; CHECK-GI-NEXT: add.8h v0, v0, v2
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add1 = add nsw <8 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nsw <8 x i16> %add1, %sextsrc2
%resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSRHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSRHADD4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.4h v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSRHADD4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v2, #1
; CHECK-GI-NEXT: saddl.4s v0, v0, v1
; CHECK-GI-NEXT: add.4s v0, v0, v2
; CHECK-GI-NEXT: shrn.4h v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add1 = add nsw <4 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1>
%add2 = add nsw <4 x i32> %add1, %sextsrc2
%resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSRHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSRHADD2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.2s v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSRHADD2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI26_0
; CHECK-GI-NEXT: saddl.2d v0, v0, v1
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI26_0]
; CHECK-GI-NEXT: add.2d v0, v0, v1
; CHECK-GI-NEXT: shrn.2s v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
%sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
%add1 = add nsw <2 x i64> %sextsrc1, <i64 1, i64 1>
%add2 = add nsw <2 x i64> %add1, %sextsrc2
%resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSRHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSRHADD16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.16b v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSRHADD16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #1
; CHECK-GI-NEXT: saddl.8h v3, v0, v1
; CHECK-GI-NEXT: saddl2.8h v0, v0, v1
; CHECK-GI-NEXT: add.8h v1, v3, v2
; CHECK-GI-NEXT: add.8h v0, v0, v2
; CHECK-GI-NEXT: shrn.8b v1, v1, #1
; CHECK-GI-NEXT: shrn2.16b v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
%add1 = add nsw <16 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nsw <16 x i16> %add1, %sextsrc2
%resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSRHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSRHADD8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.8h v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSRHADD8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v2, #1
; CHECK-GI-NEXT: saddl.4s v3, v0, v1
; CHECK-GI-NEXT: saddl2.4s v0, v0, v1
; CHECK-GI-NEXT: add.4s v1, v3, v2
; CHECK-GI-NEXT: add.4s v0, v0, v2
; CHECK-GI-NEXT: shrn.4h v1, v1, #1
; CHECK-GI-NEXT: shrn2.8h v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
%add1 = add nsw <8 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%add2 = add nsw <8 x i32> %add1, %sextsrc2
%resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSRHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSRHADD4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.4s v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSRHADD4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-GI-NEXT: saddl.2d v2, v0, v1
; CHECK-GI-NEXT: saddl2.2d v0, v0, v1
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI29_0]
; CHECK-GI-NEXT: add.2d v1, v2, v3
; CHECK-GI-NEXT: add.2d v0, v0, v3
; CHECK-GI-NEXT: shrn.2s v1, v1, #1
; CHECK-GI-NEXT: shrn2.4s v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add1 = add nsw <4 x i64> %sextsrc1, <i64 1, i64 1, i64 1, i64 1>
%add2 = add nsw <4 x i64> %add1, %sextsrc2
%resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.8b v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.8h v0, v0, v1
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, %sextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.4h v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.4s v0, v0, v1
; CHECK-GI-NEXT: shrn.4h v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%sextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add = add nsw <4 x i32> %sextsrc1, %sextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.2s v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.2d v0, v0, v1
; CHECK-GI-NEXT: shrn.2s v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
%sextsrc2 = sext <2 x i32> %src2 to <2 x i64>
%add = add nsw <2 x i64> %sextsrc1, %sextsrc2
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.16b v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.8h v2, v0, v1
; CHECK-GI-NEXT: saddl2.8h v0, v0, v1
; CHECK-GI-NEXT: shrn.8b v1, v2, #1
; CHECK-GI-NEXT: shrn2.16b v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
%add = add nsw <16 x i16> %sextsrc1, %sextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.8h v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.4s v2, v0, v1
; CHECK-GI-NEXT: saddl2.4s v0, v0, v1
; CHECK-GI-NEXT: shrn.4h v1, v2, #1
; CHECK-GI-NEXT: shrn2.8h v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sextsrc2 = sext <8 x i16> %src2 to <8 x i32>
%add = add nsw <8 x i32> %sextsrc1, %sextsrc2
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.4s v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.2d v2, v0, v1
; CHECK-GI-NEXT: saddl2.2d v0, v0, v1
; CHECK-GI-NEXT: shrn.2s v1, v2, #1
; CHECK-GI-NEXT: shrn2.4s v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%sextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add = add nsw <4 x i64> %sextsrc1, %sextsrc2
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToURHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToURHADD8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.8b v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToURHADD8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #1
; CHECK-GI-NEXT: uaddl.8h v0, v0, v1
; CHECK-GI-NEXT: add.8h v0, v0, v2
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
%add1 = add nuw nsw <8 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nuw nsw <8 x i16> %add1, %zextsrc2
%resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToURHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToURHADD4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.4h v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToURHADD4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v2, #1
; CHECK-GI-NEXT: uaddl.4s v0, v0, v1
; CHECK-GI-NEXT: add.4s v0, v0, v2
; CHECK-GI-NEXT: shrn.4h v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add1 = add nuw nsw <4 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1>
%add2 = add nuw nsw <4 x i32> %add1, %zextsrc2
%resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToURHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToURHADD2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.2s v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToURHADD2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: uaddl.2d v0, v0, v1
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: add.2d v0, v0, v1
; CHECK-GI-NEXT: shrn.2s v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
%zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
%add1 = add nuw nsw <2 x i64> %zextsrc1, <i64 1, i64 1>
%add2 = add nuw nsw <2 x i64> %add1, %zextsrc2
%resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToURHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToURHADD16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.16b v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToURHADD16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #1
; CHECK-GI-NEXT: uaddl.8h v3, v0, v1
; CHECK-GI-NEXT: uaddl2.8h v0, v0, v1
; CHECK-GI-NEXT: add.8h v1, v3, v2
; CHECK-GI-NEXT: add.8h v0, v0, v2
; CHECK-GI-NEXT: shrn.8b v1, v1, #1
; CHECK-GI-NEXT: shrn2.16b v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
%add1 = add nuw nsw <16 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%add2 = add nuw nsw <16 x i16> %add1, %zextsrc2
%resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToURHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToURHADD8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.8h v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToURHADD8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v2, #1
; CHECK-GI-NEXT: uaddl.4s v3, v0, v1
; CHECK-GI-NEXT: uaddl2.4s v0, v0, v1
; CHECK-GI-NEXT: add.4s v1, v3, v2
; CHECK-GI-NEXT: add.4s v0, v0, v2
; CHECK-GI-NEXT: shrn.4h v1, v1, #1
; CHECK-GI-NEXT: shrn2.8h v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
%add1 = add nuw nsw <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%add2 = add nuw nsw <8 x i32> %add1, %zextsrc2
%resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToURHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToURHADD4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.4s v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToURHADD4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI41_0
; CHECK-GI-NEXT: uaddl.2d v2, v0, v1
; CHECK-GI-NEXT: uaddl2.2d v0, v0, v1
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI41_0]
; CHECK-GI-NEXT: add.2d v1, v2, v3
; CHECK-GI-NEXT: add.2d v0, v0, v3
; CHECK-GI-NEXT: shrn.2s v1, v1, #1
; CHECK-GI-NEXT: shrn2.4s v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add1 = add nuw nsw <4 x i64> %zextsrc1, <i64 1, i64 1, i64 1, i64 1>
%add2 = add nuw nsw <4 x i64> %add1, %zextsrc2
%resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.8b v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.8h v0, v0, v1
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.4h v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.4s v0, v0, v1
; CHECK-GI-NEXT: shrn.4h v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.2s v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.2d v0, v0, v1
; CHECK-GI-NEXT: shrn.2s v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
%zextsrc2 = zext <2 x i32> %src2 to <2 x i64>
%add = add nuw nsw <2 x i64> %zextsrc1, %zextsrc2
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.16b v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.8h v2, v0, v1
; CHECK-GI-NEXT: uaddl2.8h v0, v0, v1
; CHECK-GI-NEXT: shrn.8b v1, v2, #1
; CHECK-GI-NEXT: shrn2.16b v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
%add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.8h v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.4s v2, v0, v1
; CHECK-GI-NEXT: uaddl2.4s v0, v0, v1
; CHECK-GI-NEXT: shrn.4h v1, v2, #1
; CHECK-GI-NEXT: shrn2.8h v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
%add = add nuw nsw <8 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.4s v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.2d v2, v0, v1
; CHECK-GI-NEXT: uaddl2.2d v0, v0, v1
; CHECK-GI-NEXT: shrn.2s v1, v2, #1
; CHECK-GI-NEXT: shrn2.4s v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define <4 x i32> @hadd16_sext_asr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-SD-LABEL: hadd16_sext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.4h v0, v0, v1
; CHECK-SD-NEXT: sshll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd16_sext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.4s v0, v0, v1
; CHECK-GI-NEXT: sshr.4s v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add = add nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i32> @hadd16_zext_asr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-SD-LABEL: hadd16_zext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd16_zext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.4s v0, v0, v1
; CHECK-GI-NEXT: ushr.4s v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i32> @hadd16_sext_lsr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-LABEL: hadd16_sext_lsr:
; CHECK: // %bb.0:
; CHECK-NEXT: saddl.4s v0, v0, v1
; CHECK-NEXT: ushr.4s v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = sext <4 x i16> %src2 to <4 x i32>
%add = add nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i32> @hadd16_zext_lsr(<4 x i16> %src1, <4 x i16> %src2) {
; CHECK-SD-LABEL: hadd16_zext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ushll.4s v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd16_zext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.4s v0, v0, v1
; CHECK-GI-NEXT: ushr.4s v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%zextsrc2 = zext <4 x i16> %src2 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %resulti16
}
define <4 x i64> @hadd32_sext_asr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-SD-LABEL: hadd32_sext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd.4s v0, v0, v1
; CHECK-SD-NEXT: sshll2.2d v1, v0, #0
; CHECK-SD-NEXT: sshll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd32_sext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.2d v2, v0, v1
; CHECK-GI-NEXT: saddl2.2d v1, v0, v1
; CHECK-GI-NEXT: sshr.2d v0, v2, #1
; CHECK-GI-NEXT: sshr.2d v1, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add = add nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i64> @hadd32_zext_asr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-SD-LABEL: hadd32_zext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.4s v0, v0, v1
; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
; CHECK-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd32_zext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.2d v2, v0, v1
; CHECK-GI-NEXT: uaddl2.2d v1, v0, v1
; CHECK-GI-NEXT: ushr.2d v0, v2, #1
; CHECK-GI-NEXT: ushr.2d v1, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-SD-LABEL: hadd32_sext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: saddl.2d v2, v0, v1
; CHECK-SD-NEXT: saddl2.2d v0, v0, v1
; CHECK-SD-NEXT: ushr.2d v1, v0, #1
; CHECK-SD-NEXT: ushr.2d v0, v2, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd32_sext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: saddl.2d v2, v0, v1
; CHECK-GI-NEXT: saddl2.2d v1, v0, v1
; CHECK-GI-NEXT: ushr.2d v0, v2, #1
; CHECK-GI-NEXT: ushr.2d v1, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = sext <4 x i32> %src2 to <4 x i64>
%add = add nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-SD-LABEL: hadd32_zext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd.4s v0, v0, v1
; CHECK-SD-NEXT: ushll2.2d v1, v0, #0
; CHECK-SD-NEXT: ushll.2d v0, v0, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd32_zext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uaddl.2d v2, v0, v1
; CHECK-GI-NEXT: uaddl2.2d v1, v0, v1
; CHECK-GI-NEXT: ushr.2d v0, v2, #1
; CHECK-GI-NEXT: ushr.2d v1, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%zextsrc2 = zext <4 x i32> %src2 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2
%resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %resulti32
}
define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: hadd8_sext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.4h v1, v1, #8
; CHECK-SD-NEXT: shl.4h v0, v0, #8
; CHECK-SD-NEXT: sshr.4h v1, v1, #8
; CHECK-SD-NEXT: sshr.4h v0, v0, #8
; CHECK-SD-NEXT: shadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8_sext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.4h v1, v1, #8
; CHECK-GI-NEXT: shl.4h v0, v0, #8
; CHECK-GI-NEXT: sshr.4h v1, v1, #8
; CHECK-GI-NEXT: ssra.4h v1, v0, #8
; CHECK-GI-NEXT: sshr.4h v0, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @hadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: hadd8_zext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: bic.4h v1, #255, lsl #8
; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
; CHECK-SD-NEXT: uhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8_zext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: add.4h v0, v0, v1
; CHECK-GI-NEXT: ushr.4h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @hadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: hadd8_sext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.4h v0, v0, #8
; CHECK-SD-NEXT: shl.4h v1, v1, #8
; CHECK-SD-NEXT: sshr.4h v0, v0, #8
; CHECK-SD-NEXT: ssra.4h v0, v1, #8
; CHECK-SD-NEXT: ushr.4h v0, v0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8_sext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.4h v1, v1, #8
; CHECK-GI-NEXT: shl.4h v0, v0, #8
; CHECK-GI-NEXT: sshr.4h v1, v1, #8
; CHECK-GI-NEXT: ssra.4h v1, v0, #8
; CHECK-GI-NEXT: ushr.4h v0, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: hadd8_zext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: bic.4h v1, #255, lsl #8
; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
; CHECK-SD-NEXT: uhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8_zext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: add.4h v0, v0, v1
; CHECK-GI-NEXT: ushr.4h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: hadd8x2_sext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.2s v1, v1, #24
; CHECK-SD-NEXT: shl.2s v0, v0, #24
; CHECK-SD-NEXT: sshr.2s v1, v1, #24
; CHECK-SD-NEXT: sshr.2s v0, v0, #24
; CHECK-SD-NEXT: shadd.2s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8x2_sext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.2s v1, v1, #24
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: shl.2s v0, v0, #24
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: sshr.2s v1, v1, #24
; CHECK-GI-NEXT: mov.h v2[1], w8
; CHECK-GI-NEXT: ssra.2s v1, v0, #24
; CHECK-GI-NEXT: uzp1.4h v0, v1, v0
; CHECK-GI-NEXT: neg.4h v1, v2
; CHECK-GI-NEXT: sshl.4h v0, v0, v1
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = ashr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: hadd8x2_zext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
; CHECK-SD-NEXT: and.8b v1, v1, v2
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: uhadd.2s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8x2_zext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0x0000ff000000ff
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: mov.h v2[1], w8
; CHECK-GI-NEXT: uzp1.4h v0, v0, v0
; CHECK-GI-NEXT: neg.4h v1, v2
; CHECK-GI-NEXT: ushl.4h v0, v0, v1
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @hadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: hadd8x2_sext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.2s v0, v0, #24
; CHECK-SD-NEXT: shl.2s v1, v1, #24
; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-SD-NEXT: sshr.2s v0, v0, #24
; CHECK-SD-NEXT: ssra.2s v0, v1, #24
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: ushr.2s v0, v0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8x2_sext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.2s v1, v1, #24
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: shl.2s v0, v0, #24
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: sshr.2s v1, v1, #24
; CHECK-GI-NEXT: mov.h v2[1], w8
; CHECK-GI-NEXT: ssra.2s v1, v0, #24
; CHECK-GI-NEXT: uzp1.4h v0, v1, v0
; CHECK-GI-NEXT: neg.4h v1, v2
; CHECK-GI-NEXT: ushl.4h v0, v0, v1
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: hadd8x2_zext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
; CHECK-SD-NEXT: and.8b v1, v1, v2
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: uhadd.2s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadd8x2_zext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0x0000ff000000ff
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: mov.h v2[1], w8
; CHECK-GI-NEXT: uzp1.4h v0, v0, v0
; CHECK-GI-NEXT: neg.4h v1, v2
; CHECK-GI-NEXT: ushl.4h v0, v0, v1
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%resulti8 = lshr <2 x i16> %add, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <4 x i16> @rhadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8_sext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.4h v1, v1, #8
; CHECK-SD-NEXT: shl.4h v0, v0, #8
; CHECK-SD-NEXT: sshr.4h v1, v1, #8
; CHECK-SD-NEXT: sshr.4h v0, v0, #8
; CHECK-SD-NEXT: srhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8_sext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.4h v1, v1, #8
; CHECK-GI-NEXT: shl.4h v0, v0, #8
; CHECK-GI-NEXT: movi.4h v2, #1
; CHECK-GI-NEXT: sshr.4h v1, v1, #8
; CHECK-GI-NEXT: ssra.4h v1, v0, #8
; CHECK-GI-NEXT: add.4h v0, v1, v2
; CHECK-GI-NEXT: sshr.4h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = ashr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @rhadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8_zext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: bic.4h v1, #255, lsl #8
; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
; CHECK-SD-NEXT: urhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8_zext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: movi.4h v2, #1
; CHECK-GI-NEXT: add.4h v0, v0, v1
; CHECK-GI-NEXT: add.4h v0, v0, v2
; CHECK-GI-NEXT: ushr.4h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8_sext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.4h v0, v0, #8
; CHECK-SD-NEXT: shl.4h v1, v1, #8
; CHECK-SD-NEXT: movi.4h v2, #1
; CHECK-SD-NEXT: sshr.4h v0, v0, #8
; CHECK-SD-NEXT: ssra.4h v0, v1, #8
; CHECK-SD-NEXT: add.4h v0, v0, v2
; CHECK-SD-NEXT: ushr.4h v0, v0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8_sext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.4h v1, v1, #8
; CHECK-GI-NEXT: shl.4h v0, v0, #8
; CHECK-GI-NEXT: movi.4h v2, #1
; CHECK-GI-NEXT: sshr.4h v1, v1, #8
; CHECK-GI-NEXT: ssra.4h v1, v0, #8
; CHECK-GI-NEXT: add.4h v0, v1, v2
; CHECK-GI-NEXT: ushr.4h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
%add = add nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <4 x i16> @rhadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8_zext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: bic.4h v1, #255, lsl #8
; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8
; CHECK-SD-NEXT: urhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8_zext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: movi.4h v2, #1
; CHECK-GI-NEXT: add.4h v0, v0, v1
; CHECK-GI-NEXT: add.4h v0, v0, v2
; CHECK-GI-NEXT: ushr.4h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
%zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
%add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
%resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
ret <4 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8x2_sext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.2s v1, v1, #24
; CHECK-SD-NEXT: shl.2s v0, v0, #24
; CHECK-SD-NEXT: sshr.2s v1, v1, #24
; CHECK-SD-NEXT: sshr.2s v0, v0, #24
; CHECK-SD-NEXT: srhadd.2s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8x2_sext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.2s v1, v1, #24
; CHECK-GI-NEXT: shl.2s v0, v0, #24
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: dup.2s v2, w8
; CHECK-GI-NEXT: sshr.2s v1, v1, #24
; CHECK-GI-NEXT: ssra.2s v1, v0, #24
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov.h v0[1], w8
; CHECK-GI-NEXT: add.2s v1, v1, v2
; CHECK-GI-NEXT: uzp1.4h v1, v1, v0
; CHECK-GI-NEXT: neg.4h v0, v0
; CHECK-GI-NEXT: sshl.4h v0, v1, v0
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = ashr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8x2_zext_asr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
; CHECK-SD-NEXT: and.8b v1, v1, v2
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: urhadd.2s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8x2_zext_asr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0x0000ff000000ff
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: dup.2s v2, w8
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: add.2s v0, v0, v2
; CHECK-GI-NEXT: mov.h v1[1], w8
; CHECK-GI-NEXT: uzp1.4h v0, v0, v0
; CHECK-GI-NEXT: neg.4h v1, v1
; CHECK-GI-NEXT: ushl.4h v0, v0, v1
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8x2_sext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shl.2s v0, v0, #24
; CHECK-SD-NEXT: shl.2s v1, v1, #24
; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-SD-NEXT: sshr.2s v0, v0, #24
; CHECK-SD-NEXT: sshr.2s v1, v1, #24
; CHECK-SD-NEXT: mvn.8b v0, v0
; CHECK-SD-NEXT: sub.2s v0, v1, v0
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: ushr.2s v0, v0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8x2_sext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl.2s v1, v1, #24
; CHECK-GI-NEXT: shl.2s v0, v0, #24
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: dup.2s v2, w8
; CHECK-GI-NEXT: sshr.2s v1, v1, #24
; CHECK-GI-NEXT: ssra.2s v1, v0, #24
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov.h v0[1], w8
; CHECK-GI-NEXT: add.2s v1, v1, v2
; CHECK-GI-NEXT: uzp1.4h v1, v1, v0
; CHECK-GI-NEXT: neg.4h v0, v0
; CHECK-GI-NEXT: ushl.4h v0, v1, v0
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
%add = add nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-SD-LABEL: rhadd8x2_zext_lsr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
; CHECK-SD-NEXT: and.8b v1, v1, v2
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: urhadd.2s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhadd8x2_zext_lsr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d2, #0x0000ff000000ff
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
; CHECK-GI-NEXT: dup.2s v2, w8
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: add.2s v0, v0, v2
; CHECK-GI-NEXT: mov.h v1[1], w8
; CHECK-GI-NEXT: uzp1.4h v0, v0, v0
; CHECK-GI-NEXT: neg.4h v1, v1
; CHECK-GI-NEXT: ushl.4h v0, v0, v1
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
%add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2
%add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1>
%resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1>
ret <2 x i16> %resulti8
}
define void @testLowerToSHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD8b_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8b v1, #10
; CHECK-SD-NEXT: shadd.8b v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD8b_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v1, #10
; CHECK-GI-NEXT: saddw.8h v0, v1, v0
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD4h_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.4h v1, #10
; CHECK-SD-NEXT: shadd.4h v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD4h_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v1, #10
; CHECK-GI-NEXT: saddw.4s v0, v1, v0
; CHECK-GI-NEXT: shrn.4h v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
%add = add nsw <4 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD2s_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.2s v1, #10
; CHECK-SD-NEXT: shadd.2s v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD2s_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI74_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI74_0]
; CHECK-GI-NEXT: saddw.2d v0, v1, v0
; CHECK-GI-NEXT: shrn.2s v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
%add = add nsw <2 x i64> %sextsrc1, <i64 10, i64 10>
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToSHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD16b_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.16b v1, #10
; CHECK-SD-NEXT: shadd.16b v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD16b_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v1, #10
; CHECK-GI-NEXT: saddw.8h v2, v1, v0
; CHECK-GI-NEXT: saddw2.8h v0, v1, v0
; CHECK-GI-NEXT: shrn.8b v1, v2, #1
; CHECK-GI-NEXT: shrn2.16b v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
%add = add nsw <16 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD8h_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8h v1, #10
; CHECK-SD-NEXT: shadd.8h v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD8h_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v1, #10
; CHECK-GI-NEXT: saddw.4s v2, v1, v0
; CHECK-GI-NEXT: saddw2.4s v0, v1, v0
; CHECK-GI-NEXT: shrn.4h v1, v2, #1
; CHECK-GI-NEXT: shrn2.8h v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%add = add nsw <8 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToSHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToSHADD4s_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.4s v1, #10
; CHECK-SD-NEXT: shadd.4s v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToSHADD4s_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI77_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI77_0]
; CHECK-GI-NEXT: saddw.2d v2, v1, v0
; CHECK-GI-NEXT: saddw2.2d v0, v1, v0
; CHECK-GI-NEXT: shrn.2s v1, v2, #1
; CHECK-GI-NEXT: shrn2.4s v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
%add = add nsw <4 x i64> %sextsrc1, <i64 10, i64 10, i64 10, i64 10>
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD8b_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8b v1, #10
; CHECK-SD-NEXT: uhadd.8b v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD8b_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v1, #10
; CHECK-GI-NEXT: uaddw.8h v0, v1, v0
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
%add = add nuw nsw <8 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
store <8 x i8> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD4h_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.4h v1, #10
; CHECK-SD-NEXT: uhadd.4h v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD4h_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v1, #10
; CHECK-GI-NEXT: uaddw.4s v0, v1, v0
; CHECK-GI-NEXT: shrn.4h v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
%add = add nuw nsw <4 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%result = trunc <4 x i32> %resulti16 to <4 x i16>
store <4 x i16> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD2s_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.2s v1, #10
; CHECK-SD-NEXT: uhadd.2s v0, v0, v1
; CHECK-SD-NEXT: str d0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD2s_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI80_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI80_0]
; CHECK-GI-NEXT: uaddw.2d v0, v1, v0
; CHECK-GI-NEXT: shrn.2s v0, v0, #1
; CHECK-GI-NEXT: str d0, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
%add = add nuw nsw <2 x i64> %zextsrc1, <i64 10, i64 10>
%resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
%result = trunc <2 x i64> %resulti16 to <2 x i32>
store <2 x i32> %result, ptr %dest, align 8
ret void
}
define void @testLowerToUHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD16b_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.16b v1, #10
; CHECK-SD-NEXT: uhadd.16b v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD16b_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v1, #10
; CHECK-GI-NEXT: uaddw.8h v2, v1, v0
; CHECK-GI-NEXT: uaddw2.8h v0, v1, v0
; CHECK-GI-NEXT: shrn.8b v1, v2, #1
; CHECK-GI-NEXT: shrn2.16b v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
%add = add nuw nsw <16 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
store <16 x i8> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD8h_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8h v1, #10
; CHECK-SD-NEXT: uhadd.8h v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD8h_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.4s v1, #10
; CHECK-GI-NEXT: uaddw.4s v2, v1, v0
; CHECK-GI-NEXT: uaddw2.4s v0, v1, v0
; CHECK-GI-NEXT: shrn.4h v1, v2, #1
; CHECK-GI-NEXT: shrn2.8h v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%add = add nuw nsw <8 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
%resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%result = trunc <8 x i32> %resulti16 to <8 x i16>
store <8 x i16> %result, ptr %dest, align 16
ret void
}
define void @testLowerToUHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) {
; CHECK-SD-LABEL: testLowerToUHADD4s_c:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.4s v1, #10
; CHECK-SD-NEXT: uhadd.4s v0, v0, v1
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testLowerToUHADD4s_c:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI83_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI83_0]
; CHECK-GI-NEXT: uaddw.2d v2, v1, v0
; CHECK-GI-NEXT: uaddw2.2d v0, v1, v0
; CHECK-GI-NEXT: shrn.2s v1, v2, #1
; CHECK-GI-NEXT: shrn2.4s v1, v0, #1
; CHECK-GI-NEXT: str q1, [x0]
; CHECK-GI-NEXT: ret
%zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
%add = add nuw nsw <4 x i64> %zextsrc1, <i64 10, i64 10, i64 10, i64 10>
%resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
%result = trunc <4 x i64> %resulti16 to <4 x i32>
store <4 x i32> %result, ptr %dest, align 16
ret void
}
define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-SD-LABEL: andmaskv8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8b v2, #7
; CHECK-SD-NEXT: xtn.8b v0, v0
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: uhadd.8b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: andmaskv8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #7
; CHECK-GI-NEXT: and.16b v0, v0, v2
; CHECK-GI-NEXT: uaddw.8h v0, v0, v1
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
ret <8 x i8> %result
}
define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) {
; CHECK-SD-LABEL: andmaskv16i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.16b v3, #7
; CHECK-SD-NEXT: uzp1.16b v0, v0, v1
; CHECK-SD-NEXT: and.16b v0, v0, v3
; CHECK-SD-NEXT: uhadd.16b v0, v0, v2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: andmaskv16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v3, #7
; CHECK-GI-NEXT: and.16b v0, v0, v3
; CHECK-GI-NEXT: and.16b v1, v1, v3
; CHECK-GI-NEXT: uaddw.8h v0, v0, v2
; CHECK-GI-NEXT: uaddw2.8h v1, v1, v2
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: shrn2.16b v0, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
%add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
ret <16 x i8> %result
}
define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) {
; CHECK-SD-LABEL: andmask2v16i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uzp1.16b v2, v2, v3
; CHECK-SD-NEXT: movi.16b v3, #3
; CHECK-SD-NEXT: uzp1.16b v0, v0, v1
; CHECK-SD-NEXT: movi.16b v1, #7
; CHECK-SD-NEXT: and.16b v2, v2, v3
; CHECK-SD-NEXT: and.16b v0, v0, v1
; CHECK-SD-NEXT: uhadd.16b v0, v0, v2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: andmask2v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v4, #7
; CHECK-GI-NEXT: movi.8h v5, #3
; CHECK-GI-NEXT: and.16b v0, v0, v4
; CHECK-GI-NEXT: and.16b v2, v2, v5
; CHECK-GI-NEXT: and.16b v1, v1, v4
; CHECK-GI-NEXT: and.16b v3, v3, v5
; CHECK-GI-NEXT: add.8h v0, v0, v2
; CHECK-GI-NEXT: add.8h v1, v1, v3
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: shrn2.16b v0, v1, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = and <16 x i16> %src2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %resulti16 to <16 x i8>
ret <16 x i8> %result
}
define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-SD-LABEL: andmask2v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8b v2, #7
; CHECK-SD-NEXT: xtn.8b v0, v0
; CHECK-SD-NEXT: xtn.8b v1, v1
; CHECK-SD-NEXT: and.8b v0, v0, v2
; CHECK-SD-NEXT: uhadd.8b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: andmask2v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #7
; CHECK-GI-NEXT: movi.2d v3, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and.16b v0, v0, v2
; CHECK-GI-NEXT: and.16b v1, v1, v3
; CHECK-GI-NEXT: add.8h v0, v0, v1
; CHECK-GI-NEXT: shrn.8b v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = and <8 x i16> %src2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %resulti16 to <8 x i8>
ret <8 x i8> %result
}
define <8 x i16> @andmask3v8i8(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-SD-LABEL: andmask3v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.8h v2, #7
; CHECK-SD-NEXT: bic.8h v1, #254, lsl #8
; CHECK-SD-NEXT: and.16b v0, v0, v2
; CHECK-SD-NEXT: uhadd.8h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: andmask3v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.8h v2, #7
; CHECK-GI-NEXT: mvni.8h v3, #254, lsl #8
; CHECK-GI-NEXT: and.16b v1, v1, v3
; CHECK-GI-NEXT: and.16b v0, v0, v2
; CHECK-GI-NEXT: add.8h v0, v0, v1
; CHECK-GI-NEXT: ushr.8h v0, v0, #1
; CHECK-GI-NEXT: ret
%zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%zextsrc2 = and <8 x i16> %src2, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
%add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2
%resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %resulti16
}
define <16 x i8> @sextmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) {
; CHECK-SD-LABEL: sextmaskv16i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshr.8h v1, v1, #11
; CHECK-SD-NEXT: sshr.8h v0, v0, #11
; CHECK-SD-NEXT: uzp1.16b v0, v0, v1
; CHECK-SD-NEXT: shadd.16b v0, v0, v2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sextmaskv16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v3, v2, #0
; CHECK-GI-NEXT: sshr.8h v1, v1, #11
; CHECK-GI-NEXT: ssra.8h v3, v0, #11
; CHECK-GI-NEXT: saddw2.8h v1, v1, v2
; CHECK-GI-NEXT: shrn.8b v0, v3, #1
; CHECK-GI-NEXT: shrn2.16b v0, v1, #1
; CHECK-GI-NEXT: ret
%sextsrc1 = ashr <16 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
%sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
%add = add nsw <16 x i16> %sextsrc1, %sextsrc2
%1 = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <16 x i16> %1 to <16 x i8>
ret <16 x i8> %result
}
define <8 x i8> @sextmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-SD-LABEL: sextmaskv8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshr.8h v0, v0, #11
; CHECK-SD-NEXT: xtn.8b v0, v0
; CHECK-SD-NEXT: shadd.8b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sextmaskv8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v1, v1, #0
; CHECK-GI-NEXT: ssra.8h v1, v0, #11
; CHECK-GI-NEXT: shrn.8b v0, v1, #1
; CHECK-GI-NEXT: ret
%sextsrc1 = ashr <8 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, %sextsrc2
%1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %1 to <8 x i8>
ret <8 x i8> %result
}
define <8 x i8> @sextmask2v8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-SD-LABEL: sextmask2v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shrn.8b v0, v0, #8
; CHECK-SD-NEXT: shadd.8b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sextmask2v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v1, v1, #0
; CHECK-GI-NEXT: ssra.8h v1, v0, #8
; CHECK-GI-NEXT: shrn.8b v0, v1, #1
; CHECK-GI-NEXT: ret
%sextsrc1 = ashr <8 x i16> %src1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %sextsrc1, %sextsrc2
%1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %1 to <8 x i8>
ret <8 x i8> %result
}
define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) {
; CHECK-SD-LABEL: sextmask3v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushr.8h v0, v0, #7
; CHECK-SD-NEXT: sshll.8h v1, v1, #0
; CHECK-SD-NEXT: shadd.8h v0, v0, v1
; CHECK-SD-NEXT: xtn.8b v0, v0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sextmask3v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v1, v1, #0
; CHECK-GI-NEXT: ssra.8h v1, v0, #7
; CHECK-GI-NEXT: shrn.8b v0, v1, #1
; CHECK-GI-NEXT: ret
%1 = ashr <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
%add = add nsw <8 x i16> %1, %sextsrc2
%2 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%result = trunc <8 x i16> %2 to <8 x i8>
ret <8 x i8> %result
}
define <4 x i16> @ext_via_i19(<4 x i16> %a) {
; CHECK-LABEL: ext_via_i19:
; CHECK: // %bb.0:
; CHECK-NEXT: movi.4h v1, #1
; CHECK-NEXT: urhadd.4h v0, v0, v1
; CHECK-NEXT: ret
%t3 = zext <4 x i16> %a to <4 x i32>
%t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1>
%t5 = trunc <4 x i32> %t4 to <4 x i19>
%new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1>
%new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1>
%last = zext <4 x i19> %new1 to <4 x i32>
%t6 = trunc <4 x i32> %last to <4 x i16>
ret <4 x i16> %t6
}
define <8 x i8> @srhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
; CHECK-SD-LABEL: srhadd_v8i8_trunc:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.8b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srhadd_v8i8_trunc:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v0, v0, #0
; CHECK-GI-NEXT: sshll.8h v1, v1, #0
; CHECK-GI-NEXT: urhadd.8h v0, v0, v1
; CHECK-GI-NEXT: xtn.8b v0, v0
; CHECK-GI-NEXT: ret
%s0s = sext <8 x i8> %s0 to <8 x i16>
%s1s = sext <8 x i8> %s1 to <8 x i16>
%s = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
%s2 = trunc <8 x i16> %s to <8 x i8>
ret <8 x i8> %s2
}
define <4 x i16> @srhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
; CHECK-SD-LABEL: srhadd_v4i16_trunc:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srhadd_v4i16_trunc:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.4s v0, v0, #0
; CHECK-GI-NEXT: sshll.4s v1, v1, #0
; CHECK-GI-NEXT: urhadd.4s v0, v0, v1
; CHECK-GI-NEXT: xtn.4h v0, v0
; CHECK-GI-NEXT: ret
%s0s = sext <4 x i16> %s0 to <4 x i32>
%s1s = sext <4 x i16> %s1 to <4 x i32>
%s = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
%s2 = trunc <4 x i32> %s to <4 x i16>
ret <4 x i16> %s2
}
define <2 x i32> @srhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
; CHECK-LABEL: srhadd_v2i32_trunc:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll.2d v0, v0, #0
; CHECK-NEXT: sshll.2d v1, v1, #0
; CHECK-NEXT: eor.16b v2, v0, v1
; CHECK-NEXT: orr.16b v0, v0, v1
; CHECK-NEXT: ushr.2d v1, v2, #1
; CHECK-NEXT: sub.2d v0, v0, v1
; CHECK-NEXT: xtn.2s v0, v0
; CHECK-NEXT: ret
%s0s = sext <2 x i32> %s0 to <2 x i64>
%s1s = sext <2 x i32> %s1 to <2 x i64>
%s = call <2 x i64> @llvm.aarch64.neon.urhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
%s2 = trunc <2 x i64> %s to <2 x i32>
ret <2 x i32> %s2
}
define <8 x i8> @urhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
; CHECK-SD-LABEL: urhadd_v8i8_trunc:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.8b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urhadd_v8i8_trunc:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.8h v0, v0, #0
; CHECK-GI-NEXT: ushll.8h v1, v1, #0
; CHECK-GI-NEXT: srhadd.8h v0, v0, v1
; CHECK-GI-NEXT: xtn.8b v0, v0
; CHECK-GI-NEXT: ret
%s0s = zext <8 x i8> %s0 to <8 x i16>
%s1s = zext <8 x i8> %s1 to <8 x i16>
%s = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
%s2 = trunc <8 x i16> %s to <8 x i8>
ret <8 x i8> %s2
}
define <4 x i16> @urhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
; CHECK-SD-LABEL: urhadd_v4i16_trunc:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd.4h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urhadd_v4i16_trunc:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: ushll.4s v1, v1, #0
; CHECK-GI-NEXT: srhadd.4s v0, v0, v1
; CHECK-GI-NEXT: xtn.4h v0, v0
; CHECK-GI-NEXT: ret
%s0s = zext <4 x i16> %s0 to <4 x i32>
%s1s = zext <4 x i16> %s1 to <4 x i32>
%s = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
%s2 = trunc <4 x i32> %s to <4 x i16>
ret <4 x i16> %s2
}
define <2 x i32> @urhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
; CHECK-LABEL: urhadd_v2i32_trunc:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: uaddl.2d v0, v0, v1
; CHECK-NEXT: dup.2d v1, x8
; CHECK-NEXT: add.2d v0, v0, v1
; CHECK-NEXT: shrn.2s v0, v0, #1
; CHECK-NEXT: ret
%s0s = zext <2 x i32> %s0 to <2 x i64>
%s1s = zext <2 x i32> %s1 to <2 x i64>
%s = call <2 x i64> @llvm.aarch64.neon.srhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
%s2 = trunc <2 x i64> %s to <2 x i32>
ret <2 x i32> %s2
}
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>)
declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)