| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+m,+zbb \ |
| ; RUN: -verify-machineinstrs < %s | \ |
| ; RUN: FileCheck %s |
| |
| ; Test basic add/sub operations for v4i16 |
| define <4 x i16> @test_padd_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_padd_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = add <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_psub_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_psub_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psub.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = sub <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| ; Test basic add/sub operations for v8i8 |
| define <8 x i8> @test_padd_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_padd_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = add <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_psub_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_psub_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psub.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = sub <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| ; Test bitwise operations for v4i16 (use scalar instructions) |
| define <4 x i16> @test_and_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_and_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: and a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = and <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_or_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_or_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = or <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_xor_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_xor_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xor a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = xor <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_andn_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_andn_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andn a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %not = xor <4 x i16> %b, splat (i16 -1) |
| %res = and <4 x i16> %a, %not |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_orn_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_orn_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orn a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %not = xor <4 x i16> %b, splat (i16 -1) |
| %res = or <4 x i16> %a, %not |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_xnor_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_xnor_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xnor a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %not = xor <4 x i16> %b, splat (i16 -1) |
| %res = xor <4 x i16> %a, %not |
| ret <4 x i16> %res |
| } |
| |
| ; Test bitwise operations for v8i8 (use scalar instructions) |
| define <8 x i8> @test_and_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_and_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: and a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = and <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_or_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_or_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = or <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_xor_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_xor_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xor a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = xor <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_andn_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_andn_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andn a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %not = xor <8 x i8> %b, splat (i8 -1) |
| %res = and <8 x i8> %a, %not |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_orn_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_orn_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orn a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %not = xor <8 x i8> %b, splat (i8 -1) |
| %res = or <8 x i8> %a, %not |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_xnor_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_xnor_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xnor a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %not = xor <8 x i8> %b, splat (i8 -1) |
| %res = xor <8 x i8> %a, %not |
| ret <8 x i8> %res |
| } |
| |
| ; Test bitwise operations for v2i32 (use scalar instructions) |
| define <2 x i32> @test_and_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_and_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: and a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = and <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_or_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_or_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = or <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_xor_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_xor_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xor a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = xor <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_andn_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_andn_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andn a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %not = xor <2 x i32> %b, splat (i32 -1) |
| %res = and <2 x i32> %a, %not |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_orn_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_orn_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orn a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %not = xor <2 x i32> %b, splat (i32 -1) |
| %res = or <2 x i32> %a, %not |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_xnor_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_xnor_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xnor a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %not = xor <2 x i32> %b, splat (i32 -1) |
| %res = xor <2 x i32> %a, %not |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_not_h(<4 x i16> %a) { |
| ; CHECK-LABEL: test_not_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %res = xor <4 x i16> %a, splat(i16 -1) |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_not_b(<8 x i8> %a) { |
| ; CHECK-LABEL: test_not_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %res = xor <8 x i8> %a, splat(i8 -1) |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_not_w(<2 x i32> %a) { |
| ; CHECK-LABEL: test_not_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %res = xor <2 x i32> %a, splat(i32 -1) |
| ret <2 x i32> %res |
| } |
| |
| ; Test saturating add operations for v4i16 |
| define <4 x i16> @test_psadd_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_psadd_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psadd.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_psaddu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_psaddu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psaddu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %res |
| } |
| |
| ; Test saturating sub operations for v4i16 |
| define <4 x i16> @test_pssub_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pssub_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pssub.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_pssubu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pssubu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pssubu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %res |
| } |
| |
| ; Test saturating add operations for v8i8 |
| define <8 x i8> @test_psadd_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_psadd_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psadd.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_psaddu_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_psaddu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psaddu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %res |
| } |
| |
| ; Test saturating sub operations for v8i8 |
| define <8 x i8> @test_pssub_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pssub_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pssub.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_pssubu_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pssubu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pssubu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %res |
| } |
| |
| ; Test averaging floor signed operations for v4i16 |
| ; avgfloors pattern: (a + b) arithmetic shift right 1 |
| define <4 x i16> @test_paadd_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_paadd_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: paadd.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %ext.a = sext <4 x i16> %a to <4 x i32> |
| %ext.b = sext <4 x i16> %b to <4 x i32> |
| %add = add nsw <4 x i32> %ext.a, %ext.b |
| %shift = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| ; Test averaging floor unsigned operations for v4i16 |
| ; avgflooru pattern: (a & b) + ((a ^ b) >> 1) |
| define <4 x i16> @test_paaddu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_paaddu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: paaddu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %and = and <4 x i16> %a, %b |
| %xor = xor <4 x i16> %a, %b |
| %shift = lshr <4 x i16> %xor, <i16 1, i16 1, i16 1, i16 1> |
| %res = add <4 x i16> %and, %shift |
| ret <4 x i16> %res |
| } |
| |
| ; Test averaging floor signed operations for v8i8 |
| define <8 x i8> @test_paadd_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_paadd_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: paadd.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %ext.a = sext <8 x i8> %a to <8 x i16> |
| %ext.b = sext <8 x i8> %b to <8 x i16> |
| %add = add nsw <8 x i16> %ext.a, %ext.b |
| %shift = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| %res = trunc <8 x i16> %shift to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| ; Test averaging floor unsigned operations for v8i8 |
| define <8 x i8> @test_paaddu_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_paaddu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: paaddu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %and = and <8 x i8> %a, %b |
| %xor = xor <8 x i8> %a, %b |
| %shift = lshr <8 x i8> %xor, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> |
| %res = add <8 x i8> %and, %shift |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_pabs_w(<2 x i32> %a) { |
| ; CHECK-LABEL: test_pabs_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pneg.w a1, a0 |
| ; CHECK-NEXT: pmax.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %a, i1 0) |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_pabs_h(<4 x i16> %a) { |
| ; CHECK-LABEL: test_pabs_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pabs.h a0, a0 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %a, i1 0) |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_pabs_b(<8 x i8> %a) { |
| ; CHECK-LABEL: test_pabs_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pabs.b a0, a0 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %a, i1 0) |
| ret <8 x i8> %res |
| } |
| |
| ; Test absolute difference signed for v2i32 |
| ; abds pattern: sub(smax(a,b), smin(a,b)) |
| define <2 x i32> @test_pdif_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pdif_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmin.w a2, a0, a1 |
| ; CHECK-NEXT: pmax.w a0, a0, a1 |
| ; CHECK-NEXT: psub.w a0, a0, a2 |
| ; CHECK-NEXT: ret |
| %min = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) |
| %max = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) |
| %res = sub <2 x i32> %max, %min |
| ret <2 x i32> %res |
| } |
| |
| ; Test absolute difference unsigned for v2i32 |
| ; abdu pattern: sub(umax(a,b), umin(a,b)) |
| define <2 x i32> @test_pdifu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pdifu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pminu.w a2, a0, a1 |
| ; CHECK-NEXT: pmaxu.w a0, a0, a1 |
| ; CHECK-NEXT: psub.w a0, a0, a2 |
| ; CHECK-NEXT: ret |
| %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) |
| %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) |
| %res = sub <2 x i32> %max, %min |
| ret <2 x i32> %res |
| } |
| |
| ; Test absolute difference signed for v4i16 |
| ; abds pattern: sub(smax(a,b), smin(a,b)) |
| define <4 x i16> @test_pdif_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pdif_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pabd.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) |
| %max = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) |
| %res = sub <4 x i16> %max, %min |
| ret <4 x i16> %res |
| } |
| |
| ; Test absolute difference unsigned for v4i16 |
| ; abdu pattern: sub(umax(a,b), umin(a,b)) |
| define <4 x i16> @test_pdifu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pdifu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pabdu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) |
| %max = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) |
| %res = sub <4 x i16> %max, %min |
| ret <4 x i16> %res |
| } |
| |
| ; Test absolute difference signed for v8i8 |
| define <8 x i8> @test_pdif_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pdif_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pabd.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) |
| %max = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) |
| %res = sub <8 x i8> %max, %min |
| ret <8 x i8> %res |
| } |
| |
| ; Test absolute difference unsigned for v8i8 |
| define <8 x i8> @test_pdifu_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pdifu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pabdu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) |
| %max = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) |
| %res = sub <8 x i8> %max, %min |
| ret <8 x i8> %res |
| } |
| |
| ; Test averaging floor subtraction signed for v4i16 |
| ; pasub pattern: (a - b) arithmetic shift right 1 |
| define <4 x i16> @test_pasub_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pasub_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pasub.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <4 x i16> %a to <4 x i32> |
| %b_ext = sext <4 x i16> %b to <4 x i32> |
| %sub = sub <4 x i32> %a_ext, %b_ext |
| %res = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1> |
| %res_trunc = trunc <4 x i32> %res to <4 x i16> |
| ret <4 x i16> %res_trunc |
| } |
| |
| ; Test averaging floor subtraction unsigned for v4i16 |
| ; pasubu pattern: (a - b) logical shift right 1 |
| define <4 x i16> @test_pasubu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pasubu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pasubu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <4 x i16> %a to <4 x i32> |
| %b_ext = zext <4 x i16> %b to <4 x i32> |
| %sub = sub <4 x i32> %a_ext, %b_ext |
| %res = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1> |
| %res_trunc = trunc <4 x i32> %res to <4 x i16> |
| ret <4 x i16> %res_trunc |
| } |
| |
| ; Test averaging floor subtraction signed for v8i8 |
| define <8 x i8> @test_pasub_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pasub_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pasub.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <8 x i8> %a to <8 x i16> |
| %b_ext = sext <8 x i8> %b to <8 x i16> |
| %sub = sub <8 x i16> %a_ext, %b_ext |
| %res = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| %res_trunc = trunc <8 x i16> %res to <8 x i8> |
| ret <8 x i8> %res_trunc |
| } |
| |
| ; Test averaging floor subtraction unsigned for v8i8 |
| define <8 x i8> @test_pasubu_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pasubu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pasubu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <8 x i8> %a to <8 x i16> |
| %b_ext = zext <8 x i8> %b to <8 x i16> |
| %sub = sub <8 x i16> %a_ext, %b_ext |
| %res = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| %res_trunc = trunc <8 x i16> %res to <8 x i8> |
| ret <8 x i8> %res_trunc |
| } |
| |
| ; Test PLI (pack load immediate) for v4i16 |
| define <4 x i16> @test_pli_h() { |
| ; CHECK-LABEL: test_pli_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.h a0, 100 |
| ; CHECK-NEXT: ret |
| ret <4 x i16> splat (i16 100) |
| } |
| |
| define <2 x i32> @test_pli_h_v2i32() { |
| ; CHECK-LABEL: test_pli_h_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.h a0, 100 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 u0x640064) |
| } |
| |
| ; Test PLI for v8i8 with unsigned immediate |
| define <8 x i8> @test_pli_b() { |
| ; CHECK-LABEL: test_pli_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.b a0, 64 |
| ; CHECK-NEXT: ret |
| ret <8 x i8> splat (i8 64) |
| } |
| |
| define <4 x i16> @test_pli_b_v4i16() { |
| ; CHECK-LABEL: test_pli_b_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.b a0, 64 |
| ; CHECK-NEXT: ret |
| ret <4 x i16> splat (i16 u0x4040) |
| } |
| |
| define <2 x i32> @test_pli_b_v2i32() { |
| ; CHECK-LABEL: test_pli_b_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.b a0, 64 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 u0x40404040) |
| } |
| |
| ; Test PLI for v2i32 with signed immediate |
| define <2 x i32> @test_pli_w() { |
| ; CHECK-LABEL: test_pli_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.w a0, -256 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 -256) |
| } |
| |
| define <4 x i16> @test_plui_h() { |
| ; CHECK-LABEL: test_plui_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: plui.h a0, 100 |
| ; CHECK-NEXT: ret |
| ret <4 x i16> splat (i16 u0x1900) |
| } |
| |
| define <4 x i16> @test_plui_h_negative() { |
| ; CHECK-LABEL: test_plui_h_negative: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: plui.h a0, -412 |
| ; CHECK-NEXT: ret |
| ret <4 x i16> splat (i16 u0x9900) |
| } |
| |
| define <2 x i32> @test_plui_h_v2i32() { |
| ; CHECK-LABEL: test_plui_h_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: plui.h a0, 100 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 u0x19001900) |
| } |
| |
| define <2 x i32> @test_plui_h_negative_v2i32() { |
| ; CHECK-LABEL: test_plui_h_negative_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: plui.h a0, -412 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 u0x99009900) |
| } |
| |
| define <2 x i32> @test_plui_w() { |
| ; CHECK-LABEL: test_plui_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: plui.w a0, 75 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 u0x12c00000) |
| } |
| |
| define <2 x i32> @test_plui_w_negative() { |
| ; CHECK-LABEL: test_plui_w_negative: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: plui.w a0, -218 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 u0xc9800000) |
| } |
| |
| define <8 x i8> @test_allones_v8i8() { |
| ; CHECK-LABEL: test_allones_v8i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: li a0, -1 |
| ; CHECK-NEXT: ret |
| ret <8 x i8> splat (i8 -1) |
| } |
| |
| define <4 x i16> @test_allones_v4i16() { |
| ; CHECK-LABEL: test_allones_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: li a0, -1 |
| ; CHECK-NEXT: ret |
| ret <4 x i16> splat (i16 -1) |
| } |
| |
| define <2 x i32> @test_allones_v2i32() { |
| ; CHECK-LABEL: test_allones_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: li a0, -1 |
| ; CHECK-NEXT: ret |
| ret <2 x i32> splat (i32 -1) |
| } |
| |
| define i16 @test_extract_vector_16(<4 x i16> %a) { |
| ; CHECK-LABEL: test_extract_vector_16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| %extracted = extractelement <4 x i16> %a, i32 0 |
| ret i16 %extracted |
| } |
| |
| define i8 @test_extract_vector_8(<8 x i8> %a) { |
| ; CHECK-LABEL: test_extract_vector_8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| %extracted = extractelement <8 x i8> %a, i32 0 |
| ret i8 %extracted |
| } |
| |
| define i32 @test_extract_vector_32(<2 x i32> %a) { |
| ; CHECK-LABEL: test_extract_vector_32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| %extracted = extractelement <2 x i32> %a, i32 0 |
| ret i32 %extracted |
| } |
| |
| define i32 @test_extract_vector_32_elem1(<2 x i32> %a) { |
| ; CHECK-LABEL: test_extract_vector_32_elem1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: ret |
| %extracted = extractelement <2 x i32> %a, i32 1 |
| ret i32 %extracted |
| } |
| |
| define <4 x i16> @test_insert_vector_16(<4 x i16> %a, i16 %val) { |
| ; CHECK-LABEL: test_insert_vector_16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lui a2, 16 |
| ; CHECK-NEXT: addi a2, a2, -1 |
| ; CHECK-NEXT: mvm a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %res = insertelement <4 x i16> %a, i16 %val, i32 0 |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_insert_vector_16_elem2(<4 x i16> %a, i16 %val) { |
| ; CHECK-LABEL: test_insert_vector_16_elem2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: slli a1, a1, 32 |
| ; CHECK-NEXT: lui a2, 65535 |
| ; CHECK-NEXT: slli a2, a2, 20 |
| ; CHECK-NEXT: mvm a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %res = insertelement <4 x i16> %a, i16 %val, i32 2 |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_insert_vector_8(<8 x i8> %a, i8 %val) { |
| ; CHECK-LABEL: test_insert_vector_8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: li a2, 255 |
| ; CHECK-NEXT: mvm a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %res = insertelement <8 x i8> %a, i8 %val, i32 0 |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_insert_vector_8_elem3(<8 x i8> %a, i8 %val) { |
| ; CHECK-LABEL: test_insert_vector_8_elem3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: slli a1, a1, 24 |
| ; CHECK-NEXT: li a2, 255 |
| ; CHECK-NEXT: slli a2, a2, 24 |
| ; CHECK-NEXT: mvm a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %res = insertelement <8 x i8> %a, i8 %val, i32 3 |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_insert_vector_32(<2 x i32> %a, i32 %val) { |
| ; CHECK-LABEL: test_insert_vector_32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: pack a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %res = insertelement <2 x i32> %a, i32 %val, i32 0 |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_insert_vector_32_elem1(<2 x i32> %a, i32 %val) { |
| ; CHECK-LABEL: test_insert_vector_32_elem1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = insertelement <2 x i32> %a, i32 %val, i32 1 |
| ret <2 x i32> %res |
| } |
| |
| ; Test basic add/sub operations for v2i32 (RV64 only) |
| define <2 x i32> @test_padd_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_padd_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = add <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_psub_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psub_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psub.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = sub <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test saturating add operations for v2i32 (RV64 only) |
| define <2 x i32> @test_psadd_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psadd_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psadd.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_psaddu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psaddu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psaddu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %res |
| } |
| |
| ; Test saturating sub operations for v2i32 (RV64 only) |
| define <2 x i32> @test_pssub_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pssub_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pssub.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_pssubu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pssubu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pssubu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %res |
| } |
| |
| ; Test averaging floor signed operations for v2i32 (RV64 only) |
| ; avgfloors pattern: (a + b) arithmetic shift right 1 |
| define <2 x i32> @test_paadd_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_paadd_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: paadd.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %ext.a = sext <2 x i32> %a to <2 x i64> |
| %ext.b = sext <2 x i32> %b to <2 x i64> |
| %add = add nsw <2 x i64> %ext.a, %ext.b |
| %shift = ashr <2 x i64> %add, <i64 1, i64 1> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test averaging floor unsigned operations for v2i32 (RV64 only) |
| ; avgflooru pattern: (a & b) + ((a ^ b) >> 1) |
| define <2 x i32> @test_paaddu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_paaddu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: paaddu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %and = and <2 x i32> %a, %b |
| %xor = xor <2 x i32> %a, %b |
| %shift = lshr <2 x i32> %xor, <i32 1, i32 1> |
| %res = add <2 x i32> %and, %shift |
| ret <2 x i32> %res |
| } |
| |
| ; Test averaging floor subtraction signed for v2i32 (RV64 only) |
| ; pasub pattern: (a - b) arithmetic shift right 1 |
| define <2 x i32> @test_pasub_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pasub_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pasub.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <2 x i32> %a to <2 x i64> |
| %b_ext = sext <2 x i32> %b to <2 x i64> |
| %sub = sub <2 x i64> %a_ext, %b_ext |
| %res = ashr <2 x i64> %sub, <i64 1, i64 1> |
| %res_trunc = trunc <2 x i64> %res to <2 x i32> |
| ret <2 x i32> %res_trunc |
| } |
| |
| ; Test averaging floor subtraction unsigned for v2i32 (RV64 only) |
| ; pasubu pattern: (a - b) logical shift right 1 |
| define <2 x i32> @test_pasubu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pasubu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pasubu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <2 x i32> %a to <2 x i64> |
| %b_ext = zext <2 x i32> %b to <2 x i64> |
| %sub = sub <2 x i64> %a_ext, %b_ext |
| %res = lshr <2 x i64> %sub, <i64 1, i64 1> |
| %res_trunc = trunc <2 x i64> %res to <2 x i32> |
| ret <2 x i32> %res_trunc |
| } |
| |
| ; Test for splat |
| define <2 x i32> @test_non_const_splat_i32(i32 %elt) { |
| ; CHECK-LABEL: test_non_const_splat_i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmv.ws a0, a0 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %elt, i32 0 |
| %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| ret <2 x i32> %splat |
| } |
| |
| ; Test add(vec, splat(scalar)) pattern |
| define <8 x i8> @test_padd_bs_splat_lhs(<8 x i8> %a, i8 %b) { |
| ; CHECK-LABEL: test_padd_bs_splat_lhs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <8 x i8> poison, i8 %b, i32 0 |
| %splat = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = add <8 x i8> %splat, %a |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_padd_bs_splat_rhs(<8 x i8> %a, i8 %b) { |
| ; CHECK-LABEL: test_padd_bs_splat_rhs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <8 x i8> poison, i8 %b, i32 0 |
| %splat = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = add <8 x i8> %a, %splat |
| ret <8 x i8> %res |
| } |
| |
| define <4 x i16> @test_padd_hs_splat_lhs(<4 x i16> %a, i16 %b) { |
| ; CHECK-LABEL: test_padd_hs_splat_lhs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <4 x i16> poison, i16 %b, i32 0 |
| %splat = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = add <4 x i16> %splat, %a |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_padd_hs_splat_rhs(<4 x i16> %a, i16 %b) { |
| ; CHECK-LABEL: test_padd_hs_splat_rhs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <4 x i16> poison, i16 %b, i32 0 |
| %splat = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = add <4 x i16> %a, %splat |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_padd_ws_splat_lhs(<2 x i32> %a, i32 %b) { |
| ; CHECK-LABEL: test_padd_ws_splat_lhs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %b, i32 0 |
| %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = add <2 x i32> %splat, %a |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_padd_ws_splat_rhs(<2 x i32> %a, i32 %b) { |
| ; CHECK-LABEL: test_padd_ws_splat_rhs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %b, i32 0 |
| %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = add <2 x i32> %a, %splat |
| ret <2 x i32> %res |
| } |
| |
| define <8 x i8> @test_build_vector_i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) { |
| ; CHECK-LABEL: test_build_vector_i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ppaire.b a6, a6, a7 |
| ; CHECK-NEXT: ppaire.b a4, a4, a5 |
| ; CHECK-NEXT: ppaire.b a2, a2, a3 |
| ; CHECK-NEXT: ppaire.b a0, a0, a1 |
| ; CHECK-NEXT: ppaire.h a1, a4, a6 |
| ; CHECK-NEXT: ppaire.h a0, a0, a2 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %v0 = insertelement <8 x i8> poison, i8 %a, i32 0 |
| %v1 = insertelement <8 x i8> %v0, i8 %b, i32 1 |
| %v2 = insertelement <8 x i8> %v1, i8 %c, i32 2 |
| %v3 = insertelement <8 x i8> %v2, i8 %d, i32 3 |
| %v4 = insertelement <8 x i8> %v3, i8 %e, i32 4 |
| %v5 = insertelement <8 x i8> %v4, i8 %f, i32 5 |
| %v6 = insertelement <8 x i8> %v5, i8 %g, i32 6 |
| %v7 = insertelement <8 x i8> %v6, i8 %h, i32 7 |
| ret <8 x i8> %v7 |
| } |
| |
| define <4 x i16> @test_build_vector_i16(i16 %a, i16 %b, i16 %c, i16 %d) { |
| ; CHECK-LABEL: test_build_vector_i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ppaire.h a2, a2, a3 |
| ; CHECK-NEXT: ppaire.h a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a0, a2 |
| ; CHECK-NEXT: ret |
| %v0 = insertelement <4 x i16> poison, i16 %a, i32 0 |
| %v1 = insertelement <4 x i16> %v0, i16 %b, i32 1 |
| %v2 = insertelement <4 x i16> %v1, i16 %c, i32 2 |
| %v3 = insertelement <4 x i16> %v2, i16 %d, i32 3 |
| ret <4 x i16> %v3 |
| } |
| |
| define <2 x i32> @test_build_vector_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: test_build_vector_i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %v0 = insertelement <2 x i32> poison, i32 %a, i32 0 |
| %v1 = insertelement <2 x i32> %v0, i32 %b, i32 1 |
| ret <2 x i32> %v1 |
| } |
| |
| ; Test logical shift left immediate for v4i16 |
| define <4 x i16> @test_pslli_h(<4 x i16> %a) { |
| ; CHECK-LABEL: test_pslli_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pslli.h a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = shl <4 x i16> %a, splat(i16 2) |
| ret <4 x i16> %res |
| } |
| |
| ; Test logical shift left immediate for v8i8 |
| define <8 x i8> @test_pslli_b(<8 x i8> %a) { |
| ; CHECK-LABEL: test_pslli_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pslli.b a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = shl <8 x i8> %a, splat(i8 2) |
| ret <8 x i8> %res |
| } |
| |
| ; Test logical shift left immediate for v2i32 |
| define <2 x i32> @test_pslli_w(<2 x i32> %a) { |
| ; CHECK-LABEL: test_pslli_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pslli.w a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = shl <2 x i32> %a, splat(i32 2) |
| ret <2 x i32> %res |
| } |
| |
| ; Test logical shift right immediate |
| define <2 x i32> @test_psrli_w(<2 x i32> %a) { |
| ; CHECK-LABEL: test_psrli_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.w a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = lshr <2 x i32> %a, splat(i32 2) |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_psrli_h(<4 x i16> %a) { |
| ; CHECK-LABEL: test_psrli_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.h a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = lshr <4 x i16> %a, splat(i16 2) |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psrli_b(<8 x i8> %a) { |
| ; CHECK-LABEL: test_psrli_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.b a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = lshr <8 x i8> %a, splat(i8 2) |
| ret <8 x i8> %res |
| } |
| |
| ; Test arithmetic shift right immediate |
| define <2 x i32> @test_psrai_w(<2 x i32> %a) { |
| ; CHECK-LABEL: test_psrai_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrai.w a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = ashr <2 x i32> %a, splat(i32 2) |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_psrai_h(<4 x i16> %a) { |
| ; CHECK-LABEL: test_psrai_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrai.h a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = ashr <4 x i16> %a, splat(i16 2) |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psrai_b(<8 x i8> %a) { |
| ; CHECK-LABEL: test_psrai_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrai.b a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = ashr <8 x i8> %a, splat(i8 2) |
| ret <8 x i8> %res |
| } |
| |
| ; Test saturating shift left arithmetic with splat immediate shift amount for v2i32 |
| define <2 x i32> @test_psslai_w(<2 x i32> %a) { |
| ; CHECK-LABEL: test_psslai_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psslai.w a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %a, <2 x i32> splat(i32 2)) |
| ret <2 x i32> %res |
| } |
| |
| ; Test saturating shift left arithmetic with splat immediate shift amount for v4i16 |
| define <4 x i16> @test_psslai_h(<4 x i16> %a) { |
| ; CHECK-LABEL: test_psslai_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psslai.h a0, a0, 2 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16> %a, <4 x i16> splat(i16 2)) |
| ret <4 x i16> %res |
| } |
| |
| ; Test saturating shift left arithmetic with splat immediate shift amount for v8i8 |
| define <8 x i8> @test_psslai_b(<8 x i8> %a) { |
| ; CHECK-LABEL: test_psslai_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltz.b a1, a0 |
| ; CHECK-NEXT: pli.b a2, -128 |
| ; CHECK-NEXT: pli.b a3, 127 |
| ; CHECK-NEXT: merge a1, a3, a2 |
| ; CHECK-NEXT: pslli.b a2, a0, 2 |
| ; CHECK-NEXT: psrai.b a3, a2, 2 |
| ; CHECK-NEXT: pmseq.b a0, a0, a3 |
| ; CHECK-NEXT: merge a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.sshl.sat.v8i8(<8 x i8> %a, <8 x i8> splat(i8 2)) |
| ret <8 x i8> %res |
| } |
| |
| ; Test saturating shift left arithmetic with splat shift amount for v4i16 |
| define <4 x i16> @test_pssla_hs(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_pssla_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltz.h a2, a0 |
| ; CHECK-NEXT: lui a3, 8 |
| ; CHECK-NEXT: plui.h a4, -512 |
| ; CHECK-NEXT: addi a3, a3, -1 |
| ; CHECK-NEXT: pmv.hs a3, a3 |
| ; CHECK-NEXT: merge a2, a3, a4 |
| ; CHECK-NEXT: psll.hs a3, a0, a1 |
| ; CHECK-NEXT: psra.hs a1, a3, a1 |
| ; CHECK-NEXT: pmseq.h a0, a0, a1 |
| ; CHECK-NEXT: merge a0, a2, a3 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <4 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = call <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %res |
| } |
| |
| ; Test saturating shift left arithmetic with splat shift amount for v2i32 |
| define <2 x i32> @test_pssla_ws(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_pssla_ws: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltz.w a2, a0 |
| ; CHECK-NEXT: lui a3, 524288 |
| ; CHECK-NEXT: plui.w a4, -512 |
| ; CHECK-NEXT: addiw a3, a3, -1 |
| ; CHECK-NEXT: pmv.ws a3, a3 |
| ; CHECK-NEXT: merge a2, a3, a4 |
| ; CHECK-NEXT: psll.ws a3, a0, a1 |
| ; CHECK-NEXT: psra.ws a1, a3, a1 |
| ; CHECK-NEXT: pmseq.w a0, a0, a1 |
| ; CHECK-NEXT: merge a0, a2, a3 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %shamt, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %res |
| } |
| |
| ; Test saturating shift left arithmetic with non-splat shift amount for v4i16 |
| define <4 x i16> @test_pssla_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pssla_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srli a2, a1, 48 |
| ; CHECK-NEXT: srli a3, a0, 48 |
| ; CHECK-NEXT: srli a4, a1, 32 |
| ; CHECK-NEXT: srli a5, a0, 32 |
| ; CHECK-NEXT: sll a6, a0, a1 |
| ; CHECK-NEXT: srli a7, a1, 16 |
| ; CHECK-NEXT: srli t0, a0, 16 |
| ; CHECK-NEXT: pmsltz.h t1, a0 |
| ; CHECK-NEXT: sll a3, a3, a2 |
| ; CHECK-NEXT: sll a5, a5, a4 |
| ; CHECK-NEXT: sll t0, t0, a7 |
| ; CHECK-NEXT: sext.h t2, a6 |
| ; CHECK-NEXT: sra a1, t2, a1 |
| ; CHECK-NEXT: ppaire.h t2, a5, a3 |
| ; CHECK-NEXT: ppaire.h a6, a6, t0 |
| ; CHECK-NEXT: pack a6, a6, t2 |
| ; CHECK-NEXT: lui t2, 8 |
| ; CHECK-NEXT: sext.h a3, a3 |
| ; CHECK-NEXT: sra a2, a3, a2 |
| ; CHECK-NEXT: plui.h a3, -512 |
| ; CHECK-NEXT: addi t2, t2, -1 |
| ; CHECK-NEXT: sext.h a5, a5 |
| ; CHECK-NEXT: sext.h t0, t0 |
| ; CHECK-NEXT: pmv.hs t2, t2 |
| ; CHECK-NEXT: sra a4, a5, a4 |
| ; CHECK-NEXT: sra a5, t0, a7 |
| ; CHECK-NEXT: ppaire.h a2, a4, a2 |
| ; CHECK-NEXT: ppaire.h a1, a1, a5 |
| ; CHECK-NEXT: pack a1, a1, a2 |
| ; CHECK-NEXT: pmseq.h a0, a0, a1 |
| ; CHECK-NEXT: merge t1, t2, a3 |
| ; CHECK-NEXT: merge a0, t1, a6 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %res |
| } |
| |
| ; Test saturating shift left arithmetic with non-splat shift amount for v2i32 |
| define <2 x i32> @test_pssla_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pssla_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sllw a2, a0, a1 |
| ; CHECK-NEXT: srli a3, a1, 32 |
| ; CHECK-NEXT: srli a4, a0, 32 |
| ; CHECK-NEXT: pmsltz.w a5, a0 |
| ; CHECK-NEXT: sllw a4, a4, a3 |
| ; CHECK-NEXT: sraw a1, a2, a1 |
| ; CHECK-NEXT: pack a2, a2, a4 |
| ; CHECK-NEXT: sraw a3, a4, a3 |
| ; CHECK-NEXT: lui a4, 524288 |
| ; CHECK-NEXT: pack a1, a1, a3 |
| ; CHECK-NEXT: plui.w a3, -512 |
| ; CHECK-NEXT: addiw a4, a4, -1 |
| ; CHECK-NEXT: pmv.ws a4, a4 |
| ; CHECK-NEXT: pmseq.w a0, a0, a1 |
| ; CHECK-NEXT: merge a5, a4, a3 |
| ; CHECK-NEXT: merge a0, a5, a2 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %res |
| } |
| |
| ; Test logical shift left(scalar shamt) |
| define <4 x i16> @test_psll_hs(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_psll_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psll.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <4 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = shl <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_psll_hs_mask(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_psll_hs_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psll.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i16 %shamt, 15 |
| %insert = insertelement <4 x i16> poison, i16 %masked, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = shl <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psll_bs(<8 x i8> %a, i8 %shamt) { |
| ; CHECK-LABEL: test_psll_bs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psll.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <8 x i8> poison, i8 %shamt, i32 0 |
| %b = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = shl <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_psll_bs_mask(<8 x i8> %a, i8 %shamt) { |
| ; CHECK-LABEL: test_psll_bs_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psll.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i8 %shamt, 7 |
| %insert = insertelement <8 x i8> poison, i8 %masked, i32 0 |
| %b = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = shl <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_psll_ws(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_psll_ws: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psll.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %shamt, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = shl <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_psll_ws_mask(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_psll_ws_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psll.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i32 %shamt, 31 |
| %insert = insertelement <2 x i32> poison, i32 %masked, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = shl <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test logical shift left(vector shamt) |
| define <2 x i32> @test_psll_ws_vec_shamt(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psll_ws_vec_shamt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sllw a2, a0, a1 |
| ; CHECK-NEXT: srli a1, a1, 32 |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: sllw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = shl <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test logical shift right(scalar shamt) |
| define <4 x i16> @test_psrl_hs(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_psrl_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrl.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <4 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = lshr <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_psrl_hs_mask(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_psrl_hs_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrl.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i16 %shamt, 15 |
| %insert = insertelement <4 x i16> poison, i16 %masked, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = lshr <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psrl_bs(<8 x i8> %a, i8 %shamt) { |
| ; CHECK-LABEL: test_psrl_bs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrl.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <8 x i8> poison, i8 %shamt, i32 0 |
| %b = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = lshr <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_psrl_bs_mask(<8 x i8> %a, i8 %shamt) { |
| ; CHECK-LABEL: test_psrl_bs_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrl.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i8 %shamt, 7 |
| %insert = insertelement <8 x i8> poison, i8 %masked, i32 0 |
| %b = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = lshr <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_psrl_ws(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_psrl_ws: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrl.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %shamt, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = lshr <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_psrl_ws_mask(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_psrl_ws_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrl.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i32 %shamt, 31 |
| %insert = insertelement <2 x i32> poison, i32 %masked, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = lshr <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test arithmetic shift right(scalar shamt) |
| define <4 x i16> @test_psra_hs(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_psra_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psra.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <4 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = ashr <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_psra_hs_mask(<4 x i16> %a, i16 %shamt) { |
| ; CHECK-LABEL: test_psra_hs_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psra.hs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i16 %shamt, 15 |
| %insert = insertelement <4 x i16> poison, i16 %masked, i32 0 |
| %b = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer |
| %res = ashr <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psra_bs(<8 x i8> %a, i8 %shamt) { |
| ; CHECK-LABEL: test_psra_bs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psra.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <8 x i8> poison, i8 %shamt, i32 0 |
| %b = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = ashr <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_psra_bs_mask(<8 x i8> %a, i8 %shamt) { |
| ; CHECK-LABEL: test_psra_bs_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psra.bs a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i8 %shamt, 7 |
| %insert = insertelement <8 x i8> poison, i8 %masked, i32 0 |
| %b = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer |
| %res = ashr <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_psra_ws(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_psra_ws: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psra.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %insert = insertelement <2 x i32> poison, i32 %shamt, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = ashr <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_psra_ws_mask(<2 x i32> %a, i32 %shamt) { |
| ; CHECK-LABEL: test_psra_ws_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psra.ws a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %masked = and i32 %shamt, 31 |
| %insert = insertelement <2 x i32> poison, i32 %masked, i32 0 |
| %b = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer |
| %res = ashr <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test logical shift right(vector shamt) |
| define <2 x i32> @test_psrl_ws_vec_shamt(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psrl_ws_vec_shamt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srlw a2, a0, a1 |
| ; CHECK-NEXT: srli a1, a1, 32 |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: srlw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = lshr <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test arithmetic shift right(vector shamt) |
| define <2 x i32> @test_psra_ws_vec_shamt(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psra_ws_vec_shamt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: sraw a2, a0, a1 |
| ; CHECK-NEXT: srli a1, a1, 32 |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: sraw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = ashr <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply high signed |
| define <4 x i16> @test_pmulh_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulh_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulh.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <4 x i16> %a to <4 x i32> |
| %b_ext = sext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_pmulh_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulh_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulh.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <2 x i32> %a to <2 x i64> |
| %b_ext = sext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %shift = lshr <2 x i64> %mul, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply high unsigned |
| define <4 x i16> @test_pmulhu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <4 x i16> %a to <4 x i32> |
| %b_ext = zext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_pmulhu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <2 x i32> %a to <2 x i64> |
| %b_ext = zext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %shift = lshr <2 x i64> %mul, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply high signed-unsigned |
| define <4 x i16> @test_pmulhsu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhsu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhsu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <4 x i16> %a to <4 x i32> |
| %b_ext = zext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_pmulhsu_h_commuted(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhsu_h_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhsu.h a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <4 x i16> %a to <4 x i32> |
| %b_ext = sext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %shift = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_pmulhsu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhsu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhsu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <2 x i32> %a to <2 x i64> |
| %b_ext = zext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %shift = lshr <2 x i64> %mul, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_pmulhsu_w_commuted(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhsu_w_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhsu.w a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <2 x i32> %a to <2 x i64> |
| %b_ext = sext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %shift = lshr <2 x i64> %mul, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply high rounding signed |
| define <4 x i16> @test_pmulhr_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhr_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhr.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <4 x i16> %a to <4 x i32> |
| %b_ext = sext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %add = add <4 x i32> %mul, <i32 32768, i32 32768, i32 32768, i32 32768> |
| %shift = lshr <4 x i32> %add, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_pmulhr_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhr_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhr.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <2 x i32> %a to <2 x i64> |
| %b_ext = sext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %add = add <2 x i64> %mul, <i64 2147483648, i64 2147483648> |
| %shift = lshr <2 x i64> %add, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply high rounding unsigned |
| define <4 x i16> @test_pmulhru_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhru_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhru.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <4 x i16> %a to <4 x i32> |
| %b_ext = zext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %add = add <4 x i32> %mul, <i32 32768, i32 32768, i32 32768, i32 32768> |
| %shift = lshr <4 x i32> %add, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_pmulhru_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhru_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhru.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <2 x i32> %a to <2 x i64> |
| %b_ext = zext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %add = add <2 x i64> %mul, <i64 2147483648, i64 2147483648> |
| %shift = lshr <2 x i64> %add, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply high rounding signed-unsigned |
| define <4 x i16> @test_pmulhrsu_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhrsu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhrsu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <4 x i16> %a to <4 x i32> |
| %b_ext = zext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %add = add <4 x i32> %mul, <i32 32768, i32 32768, i32 32768, i32 32768> |
| %shift = lshr <4 x i32> %add, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_pmulhrsu_h_commuted(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmulhrsu_h_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhrsu.h a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <4 x i16> %a to <4 x i32> |
| %b_ext = sext <4 x i16> %b to <4 x i32> |
| %mul = mul <4 x i32> %a_ext, %b_ext |
| %add = add <4 x i32> %mul, <i32 32768, i32 32768, i32 32768, i32 32768> |
| %shift = lshr <4 x i32> %add, <i32 16, i32 16, i32 16, i32 16> |
| %res = trunc <4 x i32> %shift to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_pmulhrsu_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhrsu_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhrsu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %a_ext = sext <2 x i32> %a to <2 x i64> |
| %b_ext = zext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %add = add <2 x i64> %mul, <i64 2147483648, i64 2147483648> |
| %shift = lshr <2 x i64> %add, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_pmulhrsu_w_commuted(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmulhrsu_w_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmulhrsu.w a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %a_ext = zext <2 x i32> %a to <2 x i64> |
| %b_ext = sext <2 x i32> %b to <2 x i64> |
| %mul = mul <2 x i64> %a_ext, %b_ext |
| %add = add <2 x i64> %mul, <i64 2147483648, i64 2147483648> |
| %shift = lshr <2 x i64> %add, <i64 32, i64 32> |
| %res = trunc <2 x i64> %shift to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test packed multiply low for v4i16 |
| define <4 x i16> @test_pmul_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pmul_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmul.w.h11 a2, a0, a1 |
| ; CHECK-NEXT: pmul.w.h00 a0, a0, a1 |
| ; CHECK-NEXT: ppaire.h a0, a0, a2 |
| ; CHECK-NEXT: ret |
| %res = mul <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| ; Test packed multiply low for v8i8 |
| define <8 x i8> @test_pmul_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pmul_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmul.h.b11 a2, a0, a1 |
| ; CHECK-NEXT: pmul.h.b00 a0, a0, a1 |
| ; CHECK-NEXT: ppaire.b a0, a0, a2 |
| ; CHECK-NEXT: ret |
| %res = mul <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| ; Test packed multiply low for v2i32 |
| define <2 x i32> @test_pmul_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pmul_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: mul.w11 a2, a0, a1 |
| ; CHECK-NEXT: mul.w00 a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a0, a2 |
| ; CHECK-NEXT: ret |
| %res = mul <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Division and remainder tests |
| define <4 x i16> @test_psdiv_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_psdiv_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a2, a1, 48 |
| ; CHECK-NEXT: srai a3, a0, 48 |
| ; CHECK-NEXT: slli a4, a1, 16 |
| ; CHECK-NEXT: sext.h a5, a1 |
| ; CHECK-NEXT: divw a2, a3, a2 |
| ; CHECK-NEXT: sext.h a3, a0 |
| ; CHECK-NEXT: divw a3, a3, a5 |
| ; CHECK-NEXT: slli a5, a0, 16 |
| ; CHECK-NEXT: srai a4, a4, 48 |
| ; CHECK-NEXT: srai a5, a5, 48 |
| ; CHECK-NEXT: divw a4, a5, a4 |
| ; CHECK-NEXT: slli a1, a1, 32 |
| ; CHECK-NEXT: slli a0, a0, 32 |
| ; CHECK-NEXT: srai a1, a1, 48 |
| ; CHECK-NEXT: srai a0, a0, 48 |
| ; CHECK-NEXT: divw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.h a1, a4, a2 |
| ; CHECK-NEXT: ppaire.h a0, a3, a0 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = sdiv <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psdiv_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_psdiv_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a2, a1, 56 |
| ; CHECK-NEXT: srai a3, a0, 56 |
| ; CHECK-NEXT: slli a4, a1, 8 |
| ; CHECK-NEXT: slli a5, a0, 8 |
| ; CHECK-NEXT: slli a6, a1, 16 |
| ; CHECK-NEXT: slli a7, a0, 16 |
| ; CHECK-NEXT: slli t0, a1, 24 |
| ; CHECK-NEXT: sext.b t1, a1 |
| ; CHECK-NEXT: divw a2, a3, a2 |
| ; CHECK-NEXT: sext.b a3, a0 |
| ; CHECK-NEXT: divw a3, a3, t1 |
| ; CHECK-NEXT: slli t1, a0, 24 |
| ; CHECK-NEXT: srai a4, a4, 56 |
| ; CHECK-NEXT: srai a5, a5, 56 |
| ; CHECK-NEXT: divw a4, a5, a4 |
| ; CHECK-NEXT: slli a5, a1, 32 |
| ; CHECK-NEXT: srai a6, a6, 56 |
| ; CHECK-NEXT: srai a7, a7, 56 |
| ; CHECK-NEXT: divw a6, a7, a6 |
| ; CHECK-NEXT: slli a7, a0, 32 |
| ; CHECK-NEXT: srai t0, t0, 56 |
| ; CHECK-NEXT: srai t1, t1, 56 |
| ; CHECK-NEXT: divw t0, t1, t0 |
| ; CHECK-NEXT: slli t1, a1, 40 |
| ; CHECK-NEXT: srai a5, a5, 56 |
| ; CHECK-NEXT: srai a7, a7, 56 |
| ; CHECK-NEXT: divw a5, a7, a5 |
| ; CHECK-NEXT: slli a7, a0, 40 |
| ; CHECK-NEXT: srai t1, t1, 56 |
| ; CHECK-NEXT: srai a7, a7, 56 |
| ; CHECK-NEXT: divw a7, a7, t1 |
| ; CHECK-NEXT: slli a1, a1, 48 |
| ; CHECK-NEXT: slli a0, a0, 48 |
| ; CHECK-NEXT: srai a1, a1, 56 |
| ; CHECK-NEXT: srai a0, a0, 56 |
| ; CHECK-NEXT: divw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.b a1, a4, a2 |
| ; CHECK-NEXT: ppaire.b a2, t0, a6 |
| ; CHECK-NEXT: ppaire.b a4, a7, a5 |
| ; CHECK-NEXT: ppaire.b a0, a3, a0 |
| ; CHECK-NEXT: ppaire.h a1, a2, a1 |
| ; CHECK-NEXT: ppaire.h a0, a0, a4 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = sdiv <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_psdiv_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psdiv_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: divw a2, a0, a1 |
| ; CHECK-NEXT: srli a1, a1, 32 |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: divw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = sdiv <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_pudiv_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_pudiv_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srliw a2, a1, 16 |
| ; CHECK-NEXT: srliw a3, a0, 16 |
| ; CHECK-NEXT: zext.h a4, a1 |
| ; CHECK-NEXT: zext.h a5, a0 |
| ; CHECK-NEXT: divuw a2, a3, a2 |
| ; CHECK-NEXT: srli a3, a1, 48 |
| ; CHECK-NEXT: divuw a4, a5, a4 |
| ; CHECK-NEXT: srli a5, a0, 48 |
| ; CHECK-NEXT: divuw a3, a5, a3 |
| ; CHECK-NEXT: slli a1, a1, 16 |
| ; CHECK-NEXT: slli a0, a0, 16 |
| ; CHECK-NEXT: srli a1, a1, 48 |
| ; CHECK-NEXT: srli a0, a0, 48 |
| ; CHECK-NEXT: divuw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.h a1, a4, a2 |
| ; CHECK-NEXT: ppaire.h a0, a0, a3 |
| ; CHECK-NEXT: pack a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %res = udiv <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_pudiv_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_pudiv_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srli a2, a1, 56 |
| ; CHECK-NEXT: srli a3, a0, 56 |
| ; CHECK-NEXT: slli a4, a1, 8 |
| ; CHECK-NEXT: slli a5, a0, 8 |
| ; CHECK-NEXT: slli a6, a1, 16 |
| ; CHECK-NEXT: slli a7, a0, 16 |
| ; CHECK-NEXT: srliw t0, a1, 24 |
| ; CHECK-NEXT: srliw t1, a0, 24 |
| ; CHECK-NEXT: divuw a2, a3, a2 |
| ; CHECK-NEXT: zext.b a3, a1 |
| ; CHECK-NEXT: divuw t0, t1, t0 |
| ; CHECK-NEXT: zext.b t1, a0 |
| ; CHECK-NEXT: divuw a3, t1, a3 |
| ; CHECK-NEXT: slli t1, a1, 24 |
| ; CHECK-NEXT: srli a4, a4, 56 |
| ; CHECK-NEXT: srli a5, a5, 56 |
| ; CHECK-NEXT: divuw a4, a5, a4 |
| ; CHECK-NEXT: slli a5, a0, 24 |
| ; CHECK-NEXT: srli a6, a6, 56 |
| ; CHECK-NEXT: srli a7, a7, 56 |
| ; CHECK-NEXT: divuw a6, a7, a6 |
| ; CHECK-NEXT: slli a7, a1, 40 |
| ; CHECK-NEXT: srli t1, t1, 56 |
| ; CHECK-NEXT: srli a5, a5, 56 |
| ; CHECK-NEXT: divuw a5, a5, t1 |
| ; CHECK-NEXT: slli t1, a0, 40 |
| ; CHECK-NEXT: srli a7, a7, 56 |
| ; CHECK-NEXT: srli t1, t1, 56 |
| ; CHECK-NEXT: divuw a7, t1, a7 |
| ; CHECK-NEXT: slli a1, a1, 48 |
| ; CHECK-NEXT: slli a0, a0, 48 |
| ; CHECK-NEXT: srli a1, a1, 56 |
| ; CHECK-NEXT: srli a0, a0, 56 |
| ; CHECK-NEXT: divuw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.b a1, a4, a2 |
| ; CHECK-NEXT: ppaire.b a2, a5, a6 |
| ; CHECK-NEXT: ppaire.b a4, a7, t0 |
| ; CHECK-NEXT: ppaire.b a0, a3, a0 |
| ; CHECK-NEXT: ppaire.h a1, a2, a1 |
| ; CHECK-NEXT: ppaire.h a0, a0, a4 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = udiv <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_pudiv_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_pudiv_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: divuw a2, a0, a1 |
| ; CHECK-NEXT: srli a1, a1, 32 |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: divuw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = udiv <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_psrem_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_psrem_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a2, a1, 48 |
| ; CHECK-NEXT: srai a3, a0, 48 |
| ; CHECK-NEXT: slli a4, a1, 16 |
| ; CHECK-NEXT: sext.h a5, a1 |
| ; CHECK-NEXT: remw a2, a3, a2 |
| ; CHECK-NEXT: sext.h a3, a0 |
| ; CHECK-NEXT: remw a3, a3, a5 |
| ; CHECK-NEXT: slli a5, a0, 16 |
| ; CHECK-NEXT: srai a4, a4, 48 |
| ; CHECK-NEXT: srai a5, a5, 48 |
| ; CHECK-NEXT: remw a4, a5, a4 |
| ; CHECK-NEXT: slli a1, a1, 32 |
| ; CHECK-NEXT: slli a0, a0, 32 |
| ; CHECK-NEXT: srai a1, a1, 48 |
| ; CHECK-NEXT: srai a0, a0, 48 |
| ; CHECK-NEXT: remw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.h a1, a4, a2 |
| ; CHECK-NEXT: ppaire.h a0, a3, a0 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = srem <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_psrem_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_psrem_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srai a2, a1, 56 |
| ; CHECK-NEXT: srai a3, a0, 56 |
| ; CHECK-NEXT: slli a4, a1, 8 |
| ; CHECK-NEXT: slli a5, a0, 8 |
| ; CHECK-NEXT: slli a6, a1, 16 |
| ; CHECK-NEXT: slli a7, a0, 16 |
| ; CHECK-NEXT: slli t0, a1, 24 |
| ; CHECK-NEXT: sext.b t1, a1 |
| ; CHECK-NEXT: remw a2, a3, a2 |
| ; CHECK-NEXT: sext.b a3, a0 |
| ; CHECK-NEXT: remw a3, a3, t1 |
| ; CHECK-NEXT: slli t1, a0, 24 |
| ; CHECK-NEXT: srai a4, a4, 56 |
| ; CHECK-NEXT: srai a5, a5, 56 |
| ; CHECK-NEXT: remw a4, a5, a4 |
| ; CHECK-NEXT: slli a5, a1, 32 |
| ; CHECK-NEXT: srai a6, a6, 56 |
| ; CHECK-NEXT: srai a7, a7, 56 |
| ; CHECK-NEXT: remw a6, a7, a6 |
| ; CHECK-NEXT: slli a7, a0, 32 |
| ; CHECK-NEXT: srai t0, t0, 56 |
| ; CHECK-NEXT: srai t1, t1, 56 |
| ; CHECK-NEXT: remw t0, t1, t0 |
| ; CHECK-NEXT: slli t1, a1, 40 |
| ; CHECK-NEXT: srai a5, a5, 56 |
| ; CHECK-NEXT: srai a7, a7, 56 |
| ; CHECK-NEXT: remw a5, a7, a5 |
| ; CHECK-NEXT: slli a7, a0, 40 |
| ; CHECK-NEXT: srai t1, t1, 56 |
| ; CHECK-NEXT: srai a7, a7, 56 |
| ; CHECK-NEXT: remw a7, a7, t1 |
| ; CHECK-NEXT: slli a1, a1, 48 |
| ; CHECK-NEXT: slli a0, a0, 48 |
| ; CHECK-NEXT: srai a1, a1, 56 |
| ; CHECK-NEXT: srai a0, a0, 56 |
| ; CHECK-NEXT: remw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.b a1, a4, a2 |
| ; CHECK-NEXT: ppaire.b a2, t0, a6 |
| ; CHECK-NEXT: ppaire.b a4, a7, a5 |
| ; CHECK-NEXT: ppaire.b a0, a3, a0 |
| ; CHECK-NEXT: ppaire.h a1, a2, a1 |
| ; CHECK-NEXT: ppaire.h a0, a0, a4 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = srem <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_psrem_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_psrem_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: remw a2, a0, a1 |
| ; CHECK-NEXT: srai a1, a1, 32 |
| ; CHECK-NEXT: srai a0, a0, 32 |
| ; CHECK-NEXT: remw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = srem <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_purem_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_purem_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srliw a2, a1, 16 |
| ; CHECK-NEXT: srliw a3, a0, 16 |
| ; CHECK-NEXT: zext.h a4, a1 |
| ; CHECK-NEXT: zext.h a5, a0 |
| ; CHECK-NEXT: remuw a2, a3, a2 |
| ; CHECK-NEXT: srli a3, a1, 48 |
| ; CHECK-NEXT: remuw a4, a5, a4 |
| ; CHECK-NEXT: srli a5, a0, 48 |
| ; CHECK-NEXT: remuw a3, a5, a3 |
| ; CHECK-NEXT: slli a1, a1, 16 |
| ; CHECK-NEXT: slli a0, a0, 16 |
| ; CHECK-NEXT: srli a1, a1, 48 |
| ; CHECK-NEXT: srli a0, a0, 48 |
| ; CHECK-NEXT: remuw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.h a1, a4, a2 |
| ; CHECK-NEXT: ppaire.h a0, a0, a3 |
| ; CHECK-NEXT: pack a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %res = urem <4 x i16> %a, %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_purem_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_purem_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: srli a2, a1, 56 |
| ; CHECK-NEXT: srli a3, a0, 56 |
| ; CHECK-NEXT: slli a4, a1, 8 |
| ; CHECK-NEXT: slli a5, a0, 8 |
| ; CHECK-NEXT: slli a6, a1, 16 |
| ; CHECK-NEXT: slli a7, a0, 16 |
| ; CHECK-NEXT: srliw t0, a1, 24 |
| ; CHECK-NEXT: srliw t1, a0, 24 |
| ; CHECK-NEXT: remuw a2, a3, a2 |
| ; CHECK-NEXT: zext.b a3, a1 |
| ; CHECK-NEXT: remuw t0, t1, t0 |
| ; CHECK-NEXT: zext.b t1, a0 |
| ; CHECK-NEXT: remuw a3, t1, a3 |
| ; CHECK-NEXT: slli t1, a1, 24 |
| ; CHECK-NEXT: srli a4, a4, 56 |
| ; CHECK-NEXT: srli a5, a5, 56 |
| ; CHECK-NEXT: remuw a4, a5, a4 |
| ; CHECK-NEXT: slli a5, a0, 24 |
| ; CHECK-NEXT: srli a6, a6, 56 |
| ; CHECK-NEXT: srli a7, a7, 56 |
| ; CHECK-NEXT: remuw a6, a7, a6 |
| ; CHECK-NEXT: slli a7, a1, 40 |
| ; CHECK-NEXT: srli t1, t1, 56 |
| ; CHECK-NEXT: srli a5, a5, 56 |
| ; CHECK-NEXT: remuw a5, a5, t1 |
| ; CHECK-NEXT: slli t1, a0, 40 |
| ; CHECK-NEXT: srli a7, a7, 56 |
| ; CHECK-NEXT: srli t1, t1, 56 |
| ; CHECK-NEXT: remuw a7, t1, a7 |
| ; CHECK-NEXT: slli a1, a1, 48 |
| ; CHECK-NEXT: slli a0, a0, 48 |
| ; CHECK-NEXT: srli a1, a1, 56 |
| ; CHECK-NEXT: srli a0, a0, 56 |
| ; CHECK-NEXT: remuw a0, a0, a1 |
| ; CHECK-NEXT: ppaire.b a1, a4, a2 |
| ; CHECK-NEXT: ppaire.b a2, a5, a6 |
| ; CHECK-NEXT: ppaire.b a4, a7, t0 |
| ; CHECK-NEXT: ppaire.b a0, a3, a0 |
| ; CHECK-NEXT: ppaire.h a1, a2, a1 |
| ; CHECK-NEXT: ppaire.h a0, a0, a4 |
| ; CHECK-NEXT: pack a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = urem <8 x i8> %a, %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_purem_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_purem_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: remuw a2, a0, a1 |
| ; CHECK-NEXT: srli a1, a1, 32 |
| ; CHECK-NEXT: srli a0, a0, 32 |
| ; CHECK-NEXT: remuw a0, a0, a1 |
| ; CHECK-NEXT: pack a0, a2, a0 |
| ; CHECK-NEXT: ret |
| %res = urem <2 x i32> %a, %b |
| ret <2 x i32> %res |
| } |
| |
| ; Comparison operations for v4i16 |
| define <4 x i16> @test_eq_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_eq_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp eq <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_ne_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_ne_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.h a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ne <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_slt_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_slt_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp slt <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_sle_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_sle_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.h a0, a1, a0 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sle <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_sgt_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_sgt_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.h a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sgt <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_sge_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_sge_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.h a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sge <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_ult_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_ult_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ult <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_ule_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_ule_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.h a0, a1, a0 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ule <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_ugt_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_ugt_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.h a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ugt <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| define <4 x i16> @test_uge_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_uge_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.h a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp uge <4 x i16> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %res |
| } |
| |
| ; Comparison operations for v8i8 |
| define <8 x i8> @test_eq_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_eq_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp eq <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_ne_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_ne_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.b a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ne <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_slt_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_slt_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp slt <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_sle_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_sle_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.b a0, a1, a0 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sle <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_sgt_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_sgt_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.b a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sgt <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_sge_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_sge_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.b a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sge <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_ult_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_ult_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ult <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_ule_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_ule_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.b a0, a1, a0 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ule <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_ugt_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_ugt_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.b a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ugt <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| define <8 x i8> @test_uge_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_uge_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.b a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp uge <8 x i8> %a, %b |
| %res = sext <8 x i1> %cmp to <8 x i8> |
| ret <8 x i8> %res |
| } |
| |
| ; Comparison operations for v2i32 |
| define <2 x i32> @test_eq_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_eq_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp eq <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_ne_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_ne_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.w a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ne <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_slt_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_slt_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp slt <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_sle_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_sle_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.w a0, a1, a0 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sle <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_sgt_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_sgt_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.w a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sgt <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_sge_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_sge_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.w a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp sge <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_ult_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_ult_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ult <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_ule_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_ule_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.w a0, a1, a0 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ule <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_ugt_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_ugt_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.w a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp ugt <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| define <2 x i32> @test_uge_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_uge_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.w a0, a0, a1 |
| ; CHECK-NEXT: not a0, a0 |
| ; CHECK-NEXT: ret |
| %cmp = icmp uge <2 x i32> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i32> |
| ret <2 x i32> %res |
| } |
| |
| ; Test 8/16/32-bit [s|u]min/[s|u]max |
| define <4 x i16> @test_smin_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_smin_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmin.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <4 x i16> @llvm.smin.v2i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %min |
| } |
| |
| define <4 x i16> @test_umin_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_umin_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pminu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <4 x i16> @llvm.umin.v2i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %min |
| } |
| |
| define <8 x i8> @test_smin_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_smin_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmin.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <8 x i8> @llvm.smin.v4i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %min |
| } |
| |
| define <8 x i8> @test_umin_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_umin_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pminu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <8 x i8> @llvm.umin.v4i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %min |
| } |
| |
| define <2 x i32> @test_smin_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_smin_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmin.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %min |
| } |
| |
| define <2 x i32> @test_umin_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_umin_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pminu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %min |
| } |
| |
| define <4 x i16> @test_smax_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_smax_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmax.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %max = call <4 x i16> @llvm.smax.v2i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %max |
| } |
| |
| define <4 x i16> @test_umax_h(<4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_umax_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmaxu.h a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %max = call <4 x i16> @llvm.umax.v2i16(<4 x i16> %a, <4 x i16> %b) |
| ret <4 x i16> %max |
| } |
| |
| define <8 x i8> @test_smax_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_smax_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmax.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %max = call <8 x i8> @llvm.smax.v4i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %max |
| } |
| |
| define <8 x i8> @test_umax_b(<8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_umax_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmaxu.b a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %max = call <8 x i8> @llvm.umax.v4i8(<8 x i8> %a, <8 x i8> %b) |
| ret <8 x i8> %max |
| } |
| |
| define <2 x i32> @test_smax_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_smax_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmax.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %max = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %max |
| } |
| |
| define <2 x i32> @test_umax_w(<2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_umax_w: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmaxu.w a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) |
| ret <2 x i32> %max |
| } |
| |
| ; Test select operations |
| define <4 x i16> @test_select_v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) { |
| ; CHECK-LABEL: test_select_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andi a3, a0, 1 |
| ; CHECK-NEXT: mv a0, a1 |
| ; CHECK-NEXT: bnez a3, .LBB205_2 |
| ; CHECK-NEXT: # %bb.1: |
| ; CHECK-NEXT: mv a0, a2 |
| ; CHECK-NEXT: .LBB205_2: |
| ; CHECK-NEXT: ret |
| %res = select i1 %cond, <4 x i16> %a, <4 x i16> %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_select_v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) { |
| ; CHECK-LABEL: test_select_v8i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andi a3, a0, 1 |
| ; CHECK-NEXT: mv a0, a1 |
| ; CHECK-NEXT: bnez a3, .LBB206_2 |
| ; CHECK-NEXT: # %bb.1: |
| ; CHECK-NEXT: mv a0, a2 |
| ; CHECK-NEXT: .LBB206_2: |
| ; CHECK-NEXT: ret |
| %res = select i1 %cond, <8 x i8> %a, <8 x i8> %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_select_v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) { |
| ; CHECK-LABEL: test_select_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andi a3, a0, 1 |
| ; CHECK-NEXT: mv a0, a1 |
| ; CHECK-NEXT: bnez a3, .LBB207_2 |
| ; CHECK-NEXT: # %bb.1: |
| ; CHECK-NEXT: mv a0, a2 |
| ; CHECK-NEXT: .LBB207_2: |
| ; CHECK-NEXT: ret |
| %res = select i1 %cond, <2 x i32> %a, <2 x i32> %b |
| ret <2 x i32> %res |
| } |
| |
| ; Test vselect operations |
| define <4 x i16> @test_vselect_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { |
| ; CHECK-LABEL: test_vselect_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmseq.h a0, a0, a1 |
| ; CHECK-NEXT: merge a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %mask = icmp eq <4 x i16> %a, %b |
| %res = select <4 x i1> %mask, <4 x i16> %c, <4 x i16> %b |
| ret <4 x i16> %res |
| } |
| |
| define <8 x i8> @test_vselect_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { |
| ; CHECK-LABEL: test_vselect_v8i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmsltu.b a0, a0, a1 |
| ; CHECK-NEXT: merge a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %mask = icmp ult <8 x i8> %a, %b |
| %res = select <8 x i1> %mask, <8 x i8> %c, <8 x i8> %b |
| ret <8 x i8> %res |
| } |
| |
| define <2 x i32> @test_vselect_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { |
| ; CHECK-LABEL: test_vselect_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pmslt.w a0, a1, a0 |
| ; CHECK-NEXT: merge a0, a1, a2 |
| ; CHECK-NEXT: ret |
| %mask = icmp sgt <2 x i32> %a, %b |
| %res = select <2 x i1> %mask, <2 x i32> %c, <2 x i32> %b |
| ret <2 x i32> %res |
| } |
| |
| define <4 x i16> @test_bswap_v4i16(<4 x i16> %a) { |
| ; CHECK-LABEL: test_bswap_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.h a1, a0, 8 |
| ; CHECK-NEXT: pslli.h a0, a0, 8 |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a) |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_bswap_v2i32(<2 x i32> %a) { |
| ; CHECK-LABEL: test_bswap_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.w a1, a0, 8 |
| ; CHECK-NEXT: lui a2, 16 |
| ; CHECK-NEXT: psrli.w a3, a0, 24 |
| ; CHECK-NEXT: addi a2, a2, -256 |
| ; CHECK-NEXT: pmv.ws a2, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: and a2, a0, a2 |
| ; CHECK-NEXT: or a1, a1, a3 |
| ; CHECK-NEXT: pslli.w a2, a2, 8 |
| ; CHECK-NEXT: pslli.w a0, a0, 24 |
| ; CHECK-NEXT: or a0, a0, a2 |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a) |
| ret <2 x i32> %res |
| } |
| |
| define <8 x i8> @test_bitreverse_v8i8(<8 x i8> %a) { |
| ; CHECK-LABEL: test_bitreverse_v8i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.b a1, a0, 4 |
| ; CHECK-NEXT: pli.b a2, 15 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: pli.b a2, 51 |
| ; CHECK-NEXT: pslli.b a0, a0, 4 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: psrli.b a1, a0, 2 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: pli.b a2, 85 |
| ; CHECK-NEXT: pslli.b a0, a0, 2 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: psrli.b a1, a0, 1 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: pslli.b a0, a0, 1 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a) |
| ret <8 x i8> %res |
| } |
| |
| define <4 x i16> @test_bitreverse_v4i16(<4 x i16> %a) { |
| ; CHECK-LABEL: test_bitreverse_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.h a1, a0, 8 |
| ; CHECK-NEXT: pslli.h a0, a0, 8 |
| ; CHECK-NEXT: pli.b a2, 15 |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: psrli.h a1, a0, 4 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: pli.b a2, 51 |
| ; CHECK-NEXT: pslli.h a0, a0, 4 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: psrli.h a1, a0, 2 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: pli.b a2, 85 |
| ; CHECK-NEXT: pslli.h a0, a0, 2 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: psrli.h a1, a0, 1 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: pslli.h a0, a0, 1 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a) |
| ret <4 x i16> %res |
| } |
| |
| define <2 x i32> @test_bitreverse_v2i32(<2 x i32> %a) { |
| ; CHECK-LABEL: test_bitreverse_v2i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: psrli.w a1, a0, 8 |
| ; CHECK-NEXT: lui a2, 16 |
| ; CHECK-NEXT: psrli.w a3, a0, 24 |
| ; CHECK-NEXT: addi a2, a2, -256 |
| ; CHECK-NEXT: pmv.ws a2, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: and a2, a0, a2 |
| ; CHECK-NEXT: pslli.w a0, a0, 24 |
| ; CHECK-NEXT: or a1, a1, a3 |
| ; CHECK-NEXT: pli.b a3, 15 |
| ; CHECK-NEXT: pslli.w a2, a2, 8 |
| ; CHECK-NEXT: or a0, a0, a2 |
| ; CHECK-NEXT: pli.b a2, 51 |
| ; CHECK-NEXT: or a0, a0, a1 |
| ; CHECK-NEXT: psrli.w a1, a0, 4 |
| ; CHECK-NEXT: and a0, a0, a3 |
| ; CHECK-NEXT: and a1, a1, a3 |
| ; CHECK-NEXT: pli.b a3, 85 |
| ; CHECK-NEXT: pslli.w a0, a0, 4 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: psrli.w a1, a0, 2 |
| ; CHECK-NEXT: and a0, a0, a2 |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: pslli.w a0, a0, 2 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: psrli.w a1, a0, 1 |
| ; CHECK-NEXT: and a0, a0, a3 |
| ; CHECK-NEXT: and a1, a1, a3 |
| ; CHECK-NEXT: pslli.w a0, a0, 1 |
| ; CHECK-NEXT: or a0, a1, a0 |
| ; CHECK-NEXT: ret |
| %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a) |
| ret <2 x i32> %res |
| } |