blob: 4adbded1eb82b1649d900e4bb0d67d25b22b4554 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
; Test basic add/sub operations for v4i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_padd_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: padd.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%res = add <4 x i16> %a, %b
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_psub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psub_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psub.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%res = sub <4 x i16> %a, %b
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test basic add/sub operations for v8i8
define void @test_padd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_padd_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: padd.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%res = add <8 x i8> %a, %b
store <8 x i8> %res, ptr %ret_ptr
ret void
}
define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psub_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psub.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%res = sub <8 x i8> %a, %b
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test saturating add operations for v4i16
define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psadd_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psadd.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%res = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_psaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psaddu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psaddu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%res = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test saturating sub operations for v4i16
define void @test_pssub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pssub_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pssub.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%res = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_pssubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pssubu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pssubu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%res = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test saturating add operations for v8i8
define void @test_psadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psadd_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psadd.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%res = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
define void @test_psaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psaddu_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psaddu.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%res = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test saturating sub operations for v8i8
define void @test_pssub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pssub_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pssub.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%res = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
define void @test_pssubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pssubu_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pssubu.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%res = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test averaging floor signed operations for v4i16
; avgfloors pattern: (a + b) arithmetic shift right 1
define void @test_paadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_paadd_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: paadd.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%ext.a = sext <4 x i16> %a to <4 x i32>
%ext.b = sext <4 x i16> %b to <4 x i32>
%add = add nsw <4 x i32> %ext.a, %ext.b
%shift = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
%res = trunc <4 x i32> %shift to <4 x i16>
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test averaging floor unsigned operations for v4i16
; avgflooru pattern: (a & b) + ((a ^ b) >> 1)
define void @test_paaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_paaddu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: paaddu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%and = and <4 x i16> %a, %b
%xor = xor <4 x i16> %a, %b
%shift = lshr <4 x i16> %xor, <i16 1, i16 1, i16 1, i16 1>
%res = add <4 x i16> %and, %shift
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test averaging floor signed operations for v8i8
define void @test_paadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_paadd_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: paadd.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%ext.a = sext <8 x i8> %a to <8 x i16>
%ext.b = sext <8 x i8> %b to <8 x i16>
%add = add nsw <8 x i16> %ext.a, %ext.b
%shift = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%res = trunc <8 x i16> %shift to <8 x i8>
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test averaging floor unsigned operations for v8i8
define void @test_paaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_paaddu_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: paaddu.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%and = and <8 x i8> %a, %b
%xor = xor <8 x i8> %a, %b
%shift = lshr <8 x i8> %xor, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%res = add <8 x i8> %and, %shift
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test absolute difference signed for v4i16
; abds pattern: sub(smax(a,b), smin(a,b))
define void @test_pdif_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pdif_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pabd.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%min = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
%max = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
%res = sub <4 x i16> %max, %min
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test absolute difference unsigned for v4i16
; abdu pattern: sub(umax(a,b), umin(a,b))
define void @test_pdifu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pdifu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pabdu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%min = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
%max = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
%res = sub <4 x i16> %max, %min
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test absolute difference signed for v8i8
define void @test_pdif_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pdif_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pabd.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%min = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
%max = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
%res = sub <8 x i8> %max, %min
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test absolute difference unsigned for v8i8
define void @test_pdifu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pdifu_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pabdu.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%min = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
%max = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
%res = sub <8 x i8> %max, %min
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test averaging floor subtraction signed for v4i16
; pasub pattern: (a - b) arithmetic shift right 1
define void @test_pasub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pasub_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pasub.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%a_ext = sext <4 x i16> %a to <4 x i32>
%b_ext = sext <4 x i16> %b to <4 x i32>
%sub = sub <4 x i32> %a_ext, %b_ext
%res = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
%res_trunc = trunc <4 x i32> %res to <4 x i16>
store <4 x i16> %res_trunc, ptr %ret_ptr
ret void
}
; Test averaging floor subtraction unsigned for v4i16
; pasubu pattern: (a - b) logical shift right 1
define void @test_pasubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pasubu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pasubu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%a_ext = zext <4 x i16> %a to <4 x i32>
%b_ext = zext <4 x i16> %b to <4 x i32>
%sub = sub <4 x i32> %a_ext, %b_ext
%res = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
%res_trunc = trunc <4 x i32> %res to <4 x i16>
store <4 x i16> %res_trunc, ptr %ret_ptr
ret void
}
; Test averaging floor subtraction signed for v8i8
define void @test_pasub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pasub_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pasub.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%a_ext = sext <8 x i8> %a to <8 x i16>
%b_ext = sext <8 x i8> %b to <8 x i16>
%sub = sub <8 x i16> %a_ext, %b_ext
%res = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%res_trunc = trunc <8 x i16> %res to <8 x i8>
store <8 x i8> %res_trunc, ptr %ret_ptr
ret void
}
; Test averaging floor subtraction unsigned for v8i8
define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pasubu_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pasubu.b a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
%a_ext = zext <8 x i8> %a to <8 x i16>
%b_ext = zext <8 x i8> %b to <8 x i16>
%sub = sub <8 x i16> %a_ext, %b_ext
%res = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%res_trunc = trunc <8 x i16> %res to <8 x i8>
store <8 x i8> %res_trunc, ptr %ret_ptr
ret void
}
; Test PLI (pack load immediate) for v4i16
define void @test_pli_h(ptr %ret_ptr) {
; CHECK-LABEL: test_pli_h:
; CHECK: # %bb.0:
; CHECK-NEXT: pli.h a1, 100
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%res = add <4 x i16> <i16 100, i16 100, i16 100, i16 100>, <i16 0, i16 0, i16 0, i16 0>
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test PLI for v8i8 with unsigned immediate
define void @test_pli_b(ptr %ret_ptr) {
; CHECK-LABEL: test_pli_b:
; CHECK: # %bb.0:
; CHECK-NEXT: pli.b a1, 64
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%res = add <8 x i8> <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test PLI for v2i32 with signed immediate
define void @test_pli_w(ptr %ret_ptr) {
; CHECK-LABEL: test_pli_w:
; CHECK: # %bb.0:
; CHECK-NEXT: pli.w a1, -256
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%res = add <2 x i32> <i32 -256, i32 -256>, <i32 0, i32 0>
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_extract_vector_16(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_extract_vector_16:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%extracted = extractelement <4 x i16> %a, i32 0
store i16 %extracted, ptr %ret_ptr
ret void
}
define void @test_extract_vector_8(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_extract_vector_8:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%extracted = extractelement <8 x i8> %a, i32 0
store i8 %extracted, ptr %ret_ptr
ret void
}
define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_extract_vector_32:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: sw a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%extracted = extractelement <2 x i32> %a, i32 0
store i32 %extracted, ptr %ret_ptr
ret void
}
define void @test_extract_vector_32_elem1(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_extract_vector_32_elem1:
; CHECK: # %bb.0:
; CHECK-NEXT: lw a1, 4(a1)
; CHECK-NEXT: sw a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%extracted = extractelement <2 x i32> %a, i32 1
store i32 %extracted, ptr %ret_ptr
ret void
}
; Test basic add/sub operations for v2i32 (RV64 only)
define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_padd_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: padd.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%res = add <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_psub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psub_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psub.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%res = sub <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test saturating add operations for v2i32 (RV64 only)
define void @test_psadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psadd_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psadd.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_psaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psaddu_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: psaddu.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test saturating sub operations for v2i32 (RV64 only)
define void @test_pssub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pssub_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pssub.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_pssubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pssubu_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pssubu.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test averaging floor signed operations for v2i32 (RV64 only)
; avgfloors pattern: (a + b) arithmetic shift right 1
define void @test_paadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_paadd_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: paadd.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%ext.a = sext <2 x i32> %a to <2 x i64>
%ext.b = sext <2 x i32> %b to <2 x i64>
%add = add nsw <2 x i64> %ext.a, %ext.b
%shift = ashr <2 x i64> %add, <i64 1, i64 1>
%res = trunc <2 x i64> %shift to <2 x i32>
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test averaging floor unsigned operations for v2i32 (RV64 only)
; avgflooru pattern: (a & b) + ((a ^ b) >> 1)
define void @test_paaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_paaddu_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: paaddu.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%and = and <2 x i32> %a, %b
%xor = xor <2 x i32> %a, %b
%shift = lshr <2 x i32> %xor, <i32 1, i32 1>
%res = add <2 x i32> %and, %shift
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test averaging floor subtraction signed for v2i32 (RV64 only)
; pasub pattern: (a - b) arithmetic shift right 1
define void @test_pasub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pasub_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pasub.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%a_ext = sext <2 x i32> %a to <2 x i64>
%b_ext = sext <2 x i32> %b to <2 x i64>
%sub = sub <2 x i64> %a_ext, %b_ext
%res = ashr <2 x i64> %sub, <i64 1, i64 1>
%res_trunc = trunc <2 x i64> %res to <2 x i32>
store <2 x i32> %res_trunc, ptr %ret_ptr
ret void
}
; Test averaging floor subtraction unsigned for v2i32 (RV64 only)
; pasubu pattern: (a - b) logical shift right 1
define void @test_pasubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pasubu_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pasubu.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%a_ext = zext <2 x i32> %a to <2 x i64>
%b_ext = zext <2 x i32> %b to <2 x i64>
%sub = sub <2 x i64> %a_ext, %b_ext
%res = lshr <2 x i64> %sub, <i64 1, i64 1>
%res_trunc = trunc <2 x i64> %res to <2 x i32>
store <2 x i32> %res_trunc, ptr %ret_ptr
ret void
}
; Test for splat
define void @test_non_const_splat_i32(ptr %ret_ptr, ptr %a_ptr, i32 %elt) {
; CHECK-LABEL: test_non_const_splat_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: padd.ws a1, zero, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%insert = insertelement <2 x i32> poison, i32 %elt, i32 0
%splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
store <2 x i32> %splat, ptr %ret_ptr
ret void
}
define void @test_build_vector_i8(ptr %ret_ptr, i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) {
; CHECK-LABEL: test_build_vector_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lbu t0, 0(sp)
; CHECK-NEXT: ppaire.b a5, a5, a6
; CHECK-NEXT: ppaire.b a3, a3, a4
; CHECK-NEXT: ppaire.b a1, a1, a2
; CHECK-NEXT: ppaire.b a2, a7, t0
; CHECK-NEXT: ppaire.h a2, a5, a2
; CHECK-NEXT: ppaire.h a1, a1, a3
; CHECK-NEXT: pack a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%v0 = insertelement <8 x i8> poison, i8 %a, i32 0
%v1 = insertelement <8 x i8> %v0, i8 %b, i32 1
%v2 = insertelement <8 x i8> %v1, i8 %c, i32 2
%v3 = insertelement <8 x i8> %v2, i8 %d, i32 3
%v4 = insertelement <8 x i8> %v3, i8 %e, i32 4
%v5 = insertelement <8 x i8> %v4, i8 %f, i32 5
%v6 = insertelement <8 x i8> %v5, i8 %g, i32 6
%v7 = insertelement <8 x i8> %v6, i8 %h, i32 7
store <8 x i8> %v7, ptr %ret_ptr
ret void
}
define void @test_build_vector_i16(ptr %ret_ptr, i16 %a, i16 %b, i16 %c, i16 %d) {
; CHECK-LABEL: test_build_vector_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: ppaire.h a3, a3, a4
; CHECK-NEXT: ppaire.h a1, a1, a2
; CHECK-NEXT: pack a1, a1, a3
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%v0 = insertelement <4 x i16> poison, i16 %a, i32 0
%v1 = insertelement <4 x i16> %v0, i16 %b, i32 1
%v2 = insertelement <4 x i16> %v1, i16 %c, i32 2
%v3 = insertelement <4 x i16> %v2, i16 %d, i32 3
store <4 x i16> %v3, ptr %ret_ptr
ret void
}
define void @test_build_vector_i32(ptr %ret_ptr, i32 %a, i32 %b) {
; CHECK-LABEL: test_build_vector_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pack a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%v0 = insertelement <2 x i32> poison, i32 %a, i32 0
%v1 = insertelement <2 x i32> %v0, i32 %b, i32 1
store <2 x i32> %v1, ptr %ret_ptr
ret void
}
; Test logical shift left immediate for v4i16
define void @test_pslli_h(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_pslli_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: pslli.h a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%res = shl <4 x i16> %a, splat(i16 2)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
; Test logical shift left immediate for v8i8
define void @test_pslli_b(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_pslli_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: pslli.b a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%res = shl <8 x i8> %a, splat(i8 2)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test logical shift left immediate for v2i32
define void @test_pslli_w(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_pslli_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: pslli.w a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%res = shl <2 x i32> %a, splat(i32 2)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test logical shift right immediate
define void @test_psrli_w(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psrli_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrli.w a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%res = lshr <2 x i32> %a, splat(i32 2)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_psrli_h(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psrli_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrli.h a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%res = lshr <4 x i16> %a, splat(i16 2)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_psrli_b(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psrli_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrli.b a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%res = lshr <8 x i8> %a, splat(i8 2)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test arithmetic shift right immediate
define void @test_psrai_w(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psrai_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrai.w a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%res = ashr <2 x i32> %a, splat(i32 2)
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_psrai_h(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psrai_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrai.h a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%res = ashr <4 x i16> %a, splat(i16 2)
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_psrai_b(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psrai_b:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrai.b a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%res = ashr <8 x i8> %a, splat(i8 2)
store <8 x i8> %res, ptr %ret_ptr
ret void
}
; Test arithmetic saturation shift left immediate for v2i32
define void @test_psslai_w(ptr %ret_ptr, ptr %a_ptr) {
; CHECK-LABEL: test_psslai_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psslai.w a1, a1, 2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %a, <2 x i32> splat(i32 2))
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test logical shift left(scalar shamt)
define void @test_psll_ws(ptr %ret_ptr, ptr %a_ptr, i32 %shamt) {
; CHECK-LABEL: test_psll_ws:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psll.ws a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%insert = insertelement <2 x i32> poison, i32 %shamt, i32 0
%b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
%res = shl <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test logical shift left(vector shamt)
define void @test_psll_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) {
; CHECK-LABEL: test_psll_ws_vec_shamt:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: sllw a3, a1, a2
; CHECK-NEXT: srli a2, a2, 32
; CHECK-NEXT: srli a1, a1, 32
; CHECK-NEXT: sllw a1, a1, a2
; CHECK-NEXT: pack a1, a3, a1
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %shamt_ptr
%res = shl <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test logical shift right(scalar shamt)
define void @test_psrl_ws(ptr %ret_ptr, ptr %a_ptr, i32 %shamt) {
; CHECK-LABEL: test_psrl_ws:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psrl.ws a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%insert = insertelement <2 x i32> poison, i32 %shamt, i32 0
%b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
%res = lshr <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test arithmetic shift right(scalar shamt)
define void @test_psra_ws(ptr %ret_ptr, ptr %a_ptr, i32 %shamt) {
; CHECK-LABEL: test_psra_ws:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: psra.ws a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%insert = insertelement <2 x i32> poison, i32 %shamt, i32 0
%b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
%res = ashr <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test logical shift right(vector shamt)
define void @test_psrl_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) {
; CHECK-LABEL: test_psrl_ws_vec_shamt:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: srlw a3, a1, a2
; CHECK-NEXT: srli a2, a2, 32
; CHECK-NEXT: srli a1, a1, 32
; CHECK-NEXT: srlw a1, a1, a2
; CHECK-NEXT: pack a1, a3, a1
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %shamt_ptr
%res = lshr <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test arithmetic shift right(vector shamt)
define void @test_psra_ws_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) {
; CHECK-LABEL: test_psra_ws_vec_shamt:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: sraw a3, a1, a2
; CHECK-NEXT: srli a2, a2, 32
; CHECK-NEXT: srli a1, a1, 32
; CHECK-NEXT: sraw a1, a1, a2
; CHECK-NEXT: pack a1, a3, a1
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %shamt_ptr
%res = ashr <2 x i32> %a, %b
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test packed multiply high signed
define void @test_pmulh_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulh_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulh.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%a_ext = sext <4 x i16> %a to <4 x i32>
%b_ext = sext <4 x i16> %b to <4 x i32>
%mul = mul <4 x i32> %a_ext, %b_ext
%shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
%res = trunc <4 x i32> %shift to <4 x i16>
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_pmulh_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulh_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulh.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%a_ext = sext <2 x i32> %a to <2 x i64>
%b_ext = sext <2 x i32> %b to <2 x i64>
%mul = mul <2 x i64> %a_ext, %b_ext
%shift = lshr <2 x i64> %mul, <i64 32, i64 32>
%res = trunc <2 x i64> %shift to <2 x i32>
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test packed multiply high unsigned
define void @test_pmulhu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulhu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulhu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%a_ext = zext <4 x i16> %a to <4 x i32>
%b_ext = zext <4 x i16> %b to <4 x i32>
%mul = mul <4 x i32> %a_ext, %b_ext
%shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
%res = trunc <4 x i32> %shift to <4 x i16>
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_pmulhu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulhu_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulhu.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%a_ext = zext <2 x i32> %a to <2 x i64>
%b_ext = zext <2 x i32> %b to <2 x i64>
%mul = mul <2 x i64> %a_ext, %b_ext
%shift = lshr <2 x i64> %mul, <i64 32, i64 32>
%res = trunc <2 x i64> %shift to <2 x i32>
store <2 x i32> %res, ptr %ret_ptr
ret void
}
; Test packed multiply high signed-unsigned
define void @test_pmulhsu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulhsu_h:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulhsu.h a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%a_ext = sext <4 x i16> %a to <4 x i32>
%b_ext = zext <4 x i16> %b to <4 x i32>
%mul = mul <4 x i32> %a_ext, %b_ext
%shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
%res = trunc <4 x i32> %shift to <4 x i16>
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_pmulhsu_h_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulhsu_h_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulhsu.h a1, a2, a1
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
%a_ext = zext <4 x i16> %a to <4 x i32>
%b_ext = sext <4 x i16> %b to <4 x i32>
%mul = mul <4 x i32> %a_ext, %b_ext
%shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
%res = trunc <4 x i32> %shift to <4 x i16>
store <4 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_pmulhsu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulhsu_w:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulhsu.w a1, a1, a2
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%a_ext = sext <2 x i32> %a to <2 x i64>
%b_ext = zext <2 x i32> %b to <2 x i64>
%mul = mul <2 x i64> %a_ext, %b_ext
%shift = lshr <2 x i64> %mul, <i64 32, i64 32>
%res = trunc <2 x i64> %shift to <2 x i32>
store <2 x i32> %res, ptr %ret_ptr
ret void
}
define void @test_pmulhsu_w_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_pmulhsu_w_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: ld a1, 0(a1)
; CHECK-NEXT: ld a2, 0(a2)
; CHECK-NEXT: pmulhsu.w a1, a2, a1
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %a_ptr
%b = load <2 x i32>, ptr %b_ptr
%a_ext = zext <2 x i32> %a to <2 x i64>
%b_ext = sext <2 x i32> %b to <2 x i64>
%mul = mul <2 x i64> %a_ext, %b_ext
%shift = lshr <2 x i64> %mul, <i64 32, i64 32>
%res = trunc <2 x i64> %shift to <2 x i32>
store <2 x i32> %res, ptr %ret_ptr
ret void
}