blob: 8d17836a2b761bc7927b46a7576d35ed49fe5b4f [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @sqshl1d_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshl1d_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshl1d_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: sqshl d0, d0, d1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @sqshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: sqshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @uqshl1d_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshl1d_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshl1d_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: uqshl d0, d0, d1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @uqshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: uqshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: srshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @srshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: srshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: srshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @srshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srshl_scalar:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr x8, [x0]
; CHECK-SD-NEXT: ldr x9, [x1]
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: fmov d1, x9
; CHECK-SD-NEXT: srshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshl_scalar:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: srshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @srshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: srshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr x9, [x0]
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: fmov d0, x9
; CHECK-SD-NEXT: srshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: srshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: urshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @urshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: urshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: urshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @urshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: urshl_scalar:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr x8, [x0]
; CHECK-SD-NEXT: ldr x9, [x1]
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: fmov d1, x9
; CHECK-SD-NEXT: urshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshl_scalar:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: urshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @urshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: urshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr x9, [x0]
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: fmov d0, x9
; CHECK-SD-NEXT: urshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: urshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: srshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: srshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: urshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: urshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: sqrshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sqrshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqrshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov x8, #1 // =0x1
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: sqrshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqrshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: sqrshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind {
; CHECK-LABEL: uqrshl1d_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: uqrshl_scalar:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp2 = load i64, ptr %B
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: uqrshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov x8, #1 // =0x1
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: uqrshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqrshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: uqrshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @urshr8b(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @urshr4h(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @urshr2s(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @urshr16b(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @urshr8h(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @urshr4s(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @urshr2d(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: urshr v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @urshr1d(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr1d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr1d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: urshl d0, d0, d1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
ret <1 x i64> %tmp3
}
define i64 @urshr_scalar(ptr %A) nounwind {
; CHECK-SD-LABEL: urshr_scalar:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: urshr d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshr_scalar:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: urshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
ret i64 %tmp3
}
define <8 x i8> @srshr8b(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @srshr4h(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @srshr2s(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @srshr16b(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @srshr8h(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @srshr4s(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @srshr2d(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: srshr v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @srshr1d(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr1d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr1d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: srshl d0, d0, d1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
ret <1 x i64> %tmp3
}
define i64 @srshr_scalar(ptr %A) nounwind {
; CHECK-SD-LABEL: srshr_scalar:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: srshr d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshr_scalar:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: srshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
ret i64 %tmp3
}
define <8 x i8> @sqshlu8b(ptr %A) nounwind {
; CHECK-LABEL: sqshlu8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu v0.8b, v0.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshlu4h(ptr %A) nounwind {
; CHECK-LABEL: sqshlu4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu v0.4h, v0.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshlu2s(ptr %A) nounwind {
; CHECK-LABEL: sqshlu2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sqshlu v0.2s, v0.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshlu16b(ptr %A) nounwind {
; CHECK-LABEL: sqshlu16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.16b, v0.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqshlu8h(ptr %A) nounwind {
; CHECK-LABEL: sqshlu8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.8h, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqshlu4s(ptr %A) nounwind {
; CHECK-LABEL: sqshlu4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.4s, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqshlu2d(ptr %A) nounwind {
; CHECK-LABEL: sqshlu2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshlu v0.2d, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshlu1d_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshlu d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshlu1d_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr x8, [x0]
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: sqshlu d0, d0, #1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqshlu_i64_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshlu_i64_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshlu d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshlu_i64_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr x8, [x0]
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: sqshlu d0, d0, #1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define i32 @sqshlu_i32_constant(ptr %A) nounwind {
; CHECK-LABEL: sqshlu_i32_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: sqshlu s0, s0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
ret i32 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i8> @rshrn8b(ptr %A) nounwind {
; CHECK-LABEL: rshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @rshrn4h(ptr %A) nounwind {
; CHECK-LABEL: rshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @rshrn2s(ptr %A) nounwind {
; CHECK-LABEL: rshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: rshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: rshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: rshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: rshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
define <8 x i8> @shrn8b(ptr %A) nounwind {
; CHECK-LABEL: shrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
ret <8 x i8> %tmp3
}
define <4 x i16> @shrn4h(ptr %A) nounwind {
; CHECK-LABEL: shrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
ret <4 x i16> %tmp3
}
define <2 x i32> @shrn2s(ptr %A) nounwind {
; CHECK-LABEL: shrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
ret <2 x i32> %tmp3
}
define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: shrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: shrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: shrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqshrn1s(i64 %A) nounwind {
; CHECK-LABEL: sqshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqshrn8b(ptr %A) nounwind {
; CHECK-LABEL: sqshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshrn4h(ptr %A) nounwind {
; CHECK-LABEL: sqshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshrn2s(ptr %A) nounwind {
; CHECK-LABEL: sqshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqshrun1s(i64 %A) nounwind {
; CHECK-LABEL: sqshrun1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqshrun s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqshrun8b(ptr %A) nounwind {
; CHECK-LABEL: sqshrun8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrun v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshrun4h(ptr %A) nounwind {
; CHECK-LABEL: sqshrun4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshrun2s(ptr %A) nounwind {
; CHECK-LABEL: sqshrun2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqshrun v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrun16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrun8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqshrun4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqrshrn1s(i64 %A) nounwind {
; CHECK-LABEL: sqrshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqrshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqrshrn8b(ptr %A) nounwind {
; CHECK-LABEL: sqrshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqrshrn4h(ptr %A) nounwind {
; CHECK-LABEL: sqrshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqrshrn2s(ptr %A) nounwind {
; CHECK-LABEL: sqrshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @sqrshrun1s(i64 %A) nounwind {
; CHECK-LABEL: sqrshrun1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: sqrshrun s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @sqrshrun8b(ptr %A) nounwind {
; CHECK-LABEL: sqrshrun8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrun v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqrshrun4h(ptr %A) nounwind {
; CHECK-LABEL: sqrshrun4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrun v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqrshrun2s(ptr %A) nounwind {
; CHECK-LABEL: sqrshrun2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: sqrshrun v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrun16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrun8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: sqrshrun4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @uqrshrn1s(i64 %A) nounwind {
; CHECK-LABEL: uqrshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: uqrshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @uqrshrn8b(ptr %A) nounwind {
; CHECK-LABEL: uqrshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqrshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqrshrn4h(ptr %A) nounwind {
; CHECK-LABEL: uqrshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqrshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqrshrn2s(ptr %A) nounwind {
; CHECK-LABEL: uqrshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqrshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqrshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqrshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqrshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
define i32 @uqshrn1s(i64 %A) nounwind {
; CHECK-LABEL: uqshrn1s:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: uqshrn s0, d0, #1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
ret i32 %tmp
}
define <8 x i8> @uqshrn8b(ptr %A) nounwind {
; CHECK-LABEL: uqshrn8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqshrn v0.8b, v0.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqshrn4h(ptr %A) nounwind {
; CHECK-LABEL: uqshrn4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqshrn2s(ptr %A) nounwind {
; CHECK-LABEL: uqshrn2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: uqshrn v0.2s, v0.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqshrn16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #1
; CHECK-NEXT: ret
%out = load <8 x i8>, ptr %ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
}
define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqshrn8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #1
; CHECK-NEXT: ret
%out = load <4 x i16>, ptr %ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
}
define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind {
; CHECK-LABEL: uqshrn4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #1
; CHECK-NEXT: ret
%out = load <2 x i32>, ptr %ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
}
declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
define <8 x i16> @ushll8h(ptr %A) nounwind {
; CHECK-LABEL: ushll8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <4 x i32> @ushll4s(ptr %A) nounwind {
; CHECK-LABEL: ushll4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
}
define <2 x i64> @ushll2d(ptr %A) nounwind {
; CHECK-LABEL: ushll2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
define <8 x i16> @ushll2_8h(ptr %A) nounwind {
; CHECK-SD-LABEL: ushll2_8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushll2_8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-GI-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <4 x i32> @ushll2_4s(ptr %A) nounwind {
; CHECK-SD-LABEL: ushll2_4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushll2_4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-GI-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
}
define <2 x i64> @ushll2_2d(ptr %A) nounwind {
; CHECK-SD-LABEL: ushll2_2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushll2_2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-GI-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
define <8 x i16> @neon_ushll8h_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll8h_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll8h_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.8h, #1
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <8 x i16> @neon_ushl8h_no_constant_shift(ptr %A) nounwind {
; CHECK-LABEL: neon_ushl8h_no_constant_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushl v0.8h, v0.8h, v0.8h
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
define <4 x i32> @neon_ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl8h_constant_shift_extend_not_2x:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushl8h_constant_shift_extend_not_2x:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w8, [x0]
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: uxtb w8, w8
; CHECK-GI-NEXT: mov b2, v1.b[2]
; CHECK-GI-NEXT: mov b3, v1.b[1]
; CHECK-GI-NEXT: mov b4, v1.b[3]
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: fmov w10, s3
; CHECK-GI-NEXT: fmov w11, s4
; CHECK-GI-NEXT: uxtb w9, w9
; CHECK-GI-NEXT: uxtb w10, w10
; CHECK-GI-NEXT: uxtb w11, w11
; CHECK-GI-NEXT: fmov s2, w9
; CHECK-GI-NEXT: mov v1.h[1], w10
; CHECK-GI-NEXT: mov v2.h[1], w11
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i8>, ptr %A
%tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <8 x i16> @neon_ushl8_noext_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl8_noext_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushl8_noext_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8h, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ushl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @neon_ushll4s_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll4s_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll4s_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.4s, #1
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
; FIXME: unnecessary ushll.4s v0, v0, #0?
define <4 x i32> @neon_ushll4s_neg_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll4s_neg_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll4s_neg_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
; FIXME: should be constant folded.
define <4 x i32> @neon_ushll4s_constant_fold() nounwind {
; CHECK-SD-LABEL: neon_ushll4s_constant_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI160_0
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI160_0]
; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll4s_constant_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: adrp x8, .LCPI160_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0]
; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @neon_ushll2d_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushll2d_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushll2d_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI161_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0]
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl_vscalar_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-SD-NEXT: ldr s1, [x0]
; CHECK-SD-NEXT: zip1 v0.2s, v1.2s, v0.2s
; CHECK-SD-NEXT: shl d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushl_vscalar_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: ushl d0, d0, d1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i32>, ptr %A
%tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
%tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_ushl_scalar_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: shl d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_ushl_scalar_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: ushl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
ret i64 %tmp3
}
define <8 x i16> @sshll8h(ptr %A) nounwind {
; CHECK-LABEL: sshll8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <2 x i64> @sshll2d(ptr %A) nounwind {
; CHECK-LABEL: sshll2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
define <16 x i8> @neon_sshl16b_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl16b_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl16b_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp2
}
define <16 x i8> @neon_sshl16b_non_splat_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl16b_non_splat_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI167_0
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI167_0]
; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl16b_non_splat_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI167_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI167_0]
; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp2
}
define <16 x i8> @neon_sshl16b_neg_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl16b_neg_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl16b_neg_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #254
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
ret <16 x i8> %tmp2
}
define <8 x i16> @neon_sshll8h_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll8h_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll8h_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.8h, #1
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl4s_wrong_ext_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl4s_wrong_ext_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w8, [x0]
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: sxtb w8, w8
; CHECK-GI-NEXT: mov b2, v1.b[2]
; CHECK-GI-NEXT: mov b3, v1.b[1]
; CHECK-GI-NEXT: mov b4, v1.b[3]
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: fmov w10, s3
; CHECK-GI-NEXT: fmov w11, s4
; CHECK-GI-NEXT: sxtb w9, w9
; CHECK-GI-NEXT: sxtb w10, w10
; CHECK-GI-NEXT: sxtb w11, w11
; CHECK-GI-NEXT: fmov s2, w9
; CHECK-GI-NEXT: mov v1.h[1], w10
; CHECK-GI-NEXT: mov v2.h[1], w11
; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i8>, ptr %A
%tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <4 x i32> @neon_sshll4s_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll4s_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll4s_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.4s, #1
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <4 x i32> @neon_sshll4s_neg_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll4s_neg_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll4s_neg_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
; FIXME: should be constant folded.
define <4 x i32> @neon_sshl4s_constant_fold() nounwind {
; CHECK-SD-LABEL: neon_sshl4s_constant_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI173_0
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI173_0]
; CHECK-SD-NEXT: shl v0.4s, v0.4s, #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl4s_constant_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #2
; CHECK-GI-NEXT: adrp x8, .LCPI173_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI173_0]
; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
ret <4 x i32> %tmp3
}
define <4 x i32> @neon_sshl4s_no_fold(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl4s_no_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl4s_no_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @neon_sshll2d_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll2d_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll2d_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI175_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI175_0]
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll_vscalar_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-SD-NEXT: ldr s1, [x0]
; CHECK-SD-NEXT: zip1 v0.2s, v1.2s, v0.2s
; CHECK-SD-NEXT: shl d0, d0, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll_vscalar_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: sshl d0, d0, d1
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i32>, ptr %A
%tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll_scalar_constant_shift:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: shl d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll_scalar_constant_shift:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: sshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
ret i64 %tmp3
}
define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshll_scalar_constant_shift_m1:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr s0, [x0]
; CHECK-SD-NEXT: sshr d0, d0, #1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshll_scalar_constant_shift_m1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: sshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
%tmp1 = load i32, ptr %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
ret i64 %tmp3
}
; FIXME: should be constant folded.
define <2 x i64> @neon_sshl2d_constant_fold() nounwind {
; CHECK-SD-LABEL: neon_sshl2d_constant_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI179_0
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI179_0]
; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl2d_constant_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI179_1
; CHECK-GI-NEXT: adrp x9, .LCPI179_0
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI179_1]
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI179_0]
; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <2 x i64> @neon_sshl2d_no_fold(ptr %A) nounwind {
; CHECK-SD-LABEL: neon_sshl2d_no_fold:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: neon_sshl2d_no_fold:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI180_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI180_0]
; CHECK-GI-NEXT: sshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp2 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
ret <2 x i64> %tmp3
}
define <8 x i16> @sshll2_8h(ptr %A) nounwind {
; CHECK-SD-LABEL: sshll2_8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshll2_8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #1
; CHECK-GI-NEXT: ret
%load1 = load <16 x i8>, ptr %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
}
define <4 x i32> @sshll2_4s(ptr %A) nounwind {
; CHECK-SD-LABEL: sshll2_4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshll2_4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #1
; CHECK-GI-NEXT: ret
%load1 = load <8 x i16>, ptr %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
}
define <2 x i64> @sshll2_2d(ptr %A) nounwind {
; CHECK-SD-LABEL: sshll2_2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0, #8]
; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshll2_2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d0, v0.d[1]
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #1
; CHECK-GI-NEXT: ret
%load1 = load <4 x i32>, ptr %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
}
define <8 x i8> @sqshli8b(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8b, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: sqshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
define <4 x i16> @sqshli4h(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4h, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: sqshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @sqshli2s(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: sqshl v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: sqshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @sqshli16b(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sqshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @sqshli8h(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8h, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sqshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @sqshli4s(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: sqshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @sqshli2d(ptr %A) nounwind {
; CHECK-SD-LABEL: sqshli2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: sqshl v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshli2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI190_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI190_0]
; CHECK-GI-NEXT: sqshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <8 x i8> @uqshli8b(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl v0.8b, v0.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8b, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: uqshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
define <8 x i8> @uqshli8b_1(ptr %A) nounwind {
; CHECK-LABEL: uqshli8b_1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.8b, #8
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: uqshl v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
ret <8 x i8> %tmp3
}
define <4 x i16> @uqshli4h(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl v0.4h, v0.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4h, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: uqshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
define <2 x i32> @uqshli2s(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: uqshl v0.2s, v0.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: uqshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
define <16 x i8> @uqshli16b(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.16b, v0.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.16b, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: uqshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
define <8 x i16> @uqshli8h(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.8h, v0.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.8h, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: uqshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
define <4 x i32> @uqshli4s(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.4s, v0.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.4s, #1
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: uqshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
define <2 x i64> @uqshli2d(ptr %A) nounwind {
; CHECK-SD-LABEL: uqshli2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: uqshl v0.2d, v0.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshli2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI198_0
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI198_0]
; CHECK-GI-NEXT: uqshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra v0.8b, v1.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra v0.4h, v1.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra v0.2s, v1.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.16b, v1.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.8h, v1.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.4s, v1.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: ursra v0.2d, v1.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra1d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: ursra d0, d1, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra1d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: ldr x8, [x1]
; CHECK-GI-NEXT: urshl d0, d0, d1
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: add x8, x9, x8
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
%tmp4 = load <1 x i64>, ptr %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define i64 @ursra_scalar(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: ursra_scalar:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: ursra d1, d0, #1
; CHECK-SD-NEXT: fmov x0, d1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ursra_scalar:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: ldr x8, [x1]
; CHECK-GI-NEXT: urshl d0, d0, d1
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: add x0, x9, x8
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
%tmp4 = load i64, ptr %B
%tmp5 = add i64 %tmp3, %tmp4
ret i64 %tmp5
}
define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra v0.8b, v1.8b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra v0.4h, v1.4h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra v0.2s, v1.2s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.16b, v1.16b, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.8h, v1.8h, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.4s, v1.4s, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q1, [x0]
; CHECK-SD-NEXT: ldr q0, [x1]
; CHECK-SD-NEXT: srsra v0.2d, v1.2d, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra1d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: srsra d0, d1, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra1d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: ldr x8, [x1]
; CHECK-GI-NEXT: srshl d0, d0, d1
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: add x8, x9, x8
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
%tmp4 = load <1 x i64>, ptr %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define i64 @srsra_scalar(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: srsra_scalar:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: srsra d1, d0, #1
; CHECK-SD-NEXT: fmov x0, d1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srsra_scalar:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: ldr x8, [x1]
; CHECK-GI-NEXT: srshl d0, d0, d1
; CHECK-GI-NEXT: fmov x9, d0
; CHECK-GI-NEXT: add x0, x9, x8
; CHECK-GI-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
%tmp4 = load i64, ptr %B
%tmp5 = add i64 %tmp3, %tmp4
ret i64 %tmp5
}
define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: usra v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: usra v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: usra v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: usra2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: usra v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: usra1d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: ldr d0, [x1]
; CHECK-SD-NEXT: usra d0, d1, #1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: usra1d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr x8, [x0]
; CHECK-GI-NEXT: ldr x9, [x1]
; CHECK-GI-NEXT: add x8, x9, x8, lsr #1
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp3 = lshr <1 x i64> %tmp1, <i64 1>
%tmp4 = load <1 x i64>, ptr %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ssra v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <8 x i8>, ptr %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ssra v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <4 x i16>, ptr %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ssra v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp4 = load <2 x i32>, ptr %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp4 = load <16 x i8>, ptr %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp4 = load <8 x i16>, ptr %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp4 = load <4 x i32>, ptr %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: ssra2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ssra v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp4 = load <2 x i64>, ptr %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ushr v0.8b, v0.8b, #1
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp4 = load <8 x i8>, ptr %B
%tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ushr v0.4h, v0.4h, #1
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp4 = load <4 x i16>, ptr %B
%tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: ushr v0.2s, v0.2s, #1
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp4 = load <2 x i32>, ptr %B
%tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp5 = or <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.16b, v0.16b, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp4 = load <16 x i8>, ptr %B
%tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.8h, v0.8h, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp4 = load <8 x i16>, ptr %B
%tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.4s, v0.4s, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp4 = load <4 x i32>, ptr %B
%tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp5 = or <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: shr_orr2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: ushr v0.2d, v0.2d, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp4 = load <2 x i64>, ptr %B
%tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp5 = or <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: shl v0.8b, v0.8b, #1
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp4 = load <8 x i8>, ptr %B
%tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr4h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr4h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: shl v0.4h, v0.4h, #1
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp4 = load <4 x i16>, ptr %B
%tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr2s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: ldr d1, [x1]
; CHECK-SD-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr2s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: ldr d1, [x1]
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #1
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp4 = load <2 x i32>, ptr %B
%tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
%tmp5 = or <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp4 = load <16 x i8>, ptr %B
%tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr8h:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr8h:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp4 = load <8 x i16>, ptr %B
%tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr4s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr4s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp4 = load <4 x i32>, ptr %B
%tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp5 = or <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind {
; CHECK-SD-LABEL: shl_orr2d:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr q0, [x0]
; CHECK-SD-NEXT: ldr q1, [x1]
; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_orr2d:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: ldr q1, [x1]
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #1
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp4 = load <2 x i64>, ptr %B
%tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
%tmp5 = or <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
define <8 x i16> @shll(<8 x i8> %in) {
; CHECK-SD-LABEL: shll:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shll v0.8h, v0.8b, #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shll:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
; CHECK-GI-NEXT: ret
%ext = zext <8 x i8> %in to <8 x i16>
%res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <8 x i16> %res
}
define <4 x i32> @shll_high(<8 x i16> %in) {
; CHECK-SD-LABEL: shll_high:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shll2 v0.4s, v0.8h, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shll_high:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #16
; CHECK-GI-NEXT: ret
%extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext = zext <4 x i16> %extract to <4 x i32>
%res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
ret <4 x i32> %res
}
define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
ret <2 x i32> %tmp3
}
define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sli d0, d1, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
ret <1 x i64> %tmp3
}
; Ensure we can select scalar SLI with a zero shift (see issue #139879).
define <1 x i64> @sli1d_imm0(<1 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: sli1d_imm0:
; CHECK: // %bb.0:
; CHECK-NEXT: sli d0, d1, #0
; CHECK-NEXT: ret
%r = call <1 x i64> @llvm.aarch64.neon.vsli(<1 x i64> %a, <1 x i64> %b, i32 0)
ret <1 x i64> %r
}
define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
ret <16 x i8> %tmp3
}
define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
ret <8 x i16> %tmp3
}
define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
ret <4 x i32> %tmp3
}
define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sli2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sli v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
ret <2 x i64> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
define <8 x i8> @sri8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri8b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri v0.8b, v1.8b, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
ret <8 x i8> %tmp3
}
define <4 x i16> @sri4h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri4h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri v0.4h, v1.4h, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
ret <4 x i16> %tmp3
}
define <2 x i32> @sri2s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri2s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri v0.2s, v1.2s, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
ret <2 x i32> %tmp3
}
define <1 x i64> @sri1d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri1d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sri d0, d1, #1
; CHECK-NEXT: ret
%tmp1 = load <1 x i64>, ptr %A
%tmp2 = load <1 x i64>, ptr %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
ret <1 x i64> %tmp3
}
define <16 x i8> @sri16b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.16b, v1.16b, #1
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
ret <16 x i8> %tmp3
}
define <8 x i16> @sri8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri8h:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.8h, v1.8h, #1
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
ret <8 x i16> %tmp3
}
define <4 x i32> @sri4s(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri4s:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.4s, v1.4s, #1
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
ret <4 x i32> %tmp3
}
define <2 x i64> @sri2d(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sri2d:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: sri v0.2d, v1.2d, #1
; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
ret <2 x i64> %tmp3
}
define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-SD-LABEL: ashr_v1i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: neg d1, d1
; CHECK-SD-NEXT: sshl d0, d0, d1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ashr_v1i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: asr x8, x8, x9
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%c = ashr <1 x i64> %a, %b
ret <1 x i64> %c
}
define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: sqshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: sqshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: uqshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: uqshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: srshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: srshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: srshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: urshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: urshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-LABEL: sqshlu_zero_shift_amount:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-NEXT: sqshlu v0.2d, v0.2d, #0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: sshl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sshl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) {
; CHECK-SD-LABEL: ushl_zero_shift_amount:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: str q0, [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ushl_zero_shift_amount:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: ret
entry:
%vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
%vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
ret void
}
define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) {
; CHECK-LABEL: sext_rshrn:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
entry:
%vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
%vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
ret <4 x i32> %vmovl.i
}
define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) {
; CHECK-LABEL: zext_rshrn:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
entry:
%vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
%vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
ret <4 x i32> %vmovl.i
}
define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) {
; CHECK-LABEL: mul_rshrn:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.4s, #3
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: rshrn v0.4h, v0.4s, #13
; CHECK-NEXT: ret
entry:
%b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
%vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
ret <4 x i16> %vrshrn_n1
}
define <8 x i16> @signbits_vashr(<8 x i16> %a) {
; CHECK-SD-LABEL: signbits_vashr:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #8
; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: signbits_vashr:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mvni v1.8h, #7
; CHECK-GI-NEXT: mvni v2.8h, #8
; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v2.8h
; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #7
; CHECK-GI-NEXT: ret
%b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
%c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> <i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9>)
%d = ashr <8 x i16> %c, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %d
}
define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) {
; CHECK-SD-LABEL: lshr_trunc_v2i64_v2i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: lshr_trunc_v2i64_v2i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #16
; CHECK-GI-NEXT: ret
%b = lshr <2 x i64> %a, <i64 16, i64 16>
%c = trunc <2 x i64> %b to <2 x i8>
ret <2 x i8> %c
}
define <4 x i16> @lshr_trunc_v4i64_v4i16(<4 x i64> %a) {
; CHECK-SD-LABEL: lshr_trunc_v4i64_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
; CHECK-SD-NEXT: ushr v1.2s, v1.2s, #8
; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #8
; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: lshr_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI278_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI278_0]
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: neg v1.4s, v2.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
%b = lshr <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
%c = trunc <4 x i64> %b to <4 x i16>
ret <4 x i16> %c
}
define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) {
; CHECK-SD-LABEL: ashr_trunc_v2i64_v2i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ashr_trunc_v2i64_v2i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-GI-NEXT: ret
%b = ashr <2 x i64> %a, <i64 16, i64 16>
%c = trunc <2 x i64> %b to <2 x i8>
ret <2 x i8> %c
}
define <4 x i16> @ashr_trunc_v4i64_v4i16(<4 x i64> %a) {
; CHECK-SD-LABEL: ashr_trunc_v4i64_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
; CHECK-SD-NEXT: ushr v1.2s, v1.2s, #8
; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #8
; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ashr_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI280_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI280_0]
; CHECK-GI-NEXT: uzp1 v2.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: neg v1.4s, v2.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
%b = ashr <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
%c = trunc <4 x i64> %b to <4 x i16>
ret <4 x i16> %c
}
define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) {
; CHECK-SD-LABEL: shl_trunc_v2i64_v2i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_trunc_v2i64_v2i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #16
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ret
%b = shl <2 x i64> %a, <i64 16, i64 16>
%c = trunc <2 x i64> %b to <2 x i8>
ret <2 x i8> %c
}
define <4 x i16> @shl_trunc_v4i64_v4i16(<4 x i64> %a) {
; CHECK-SD-LABEL: shl_trunc_v4i64_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shl_trunc_v4i64_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI282_0
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI282_0]
; CHECK-GI-NEXT: uzp1 v1.4s, v2.4s, v2.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%b = shl <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
%c = trunc <4 x i64> %b to <4 x i16>
ret <4 x i16> %c
}
declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)