| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+m,+zbb \ |
| ; RUN: -riscv-enable-p-ext-simd-codegen -verify-machineinstrs < %s | \ |
| ; RUN: FileCheck --check-prefixes=CHECK,CHECK-RV32 %s |
| ; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+m,+zbb \ |
| ; RUN: -riscv-enable-p-ext-simd-codegen -verify-machineinstrs < %s | \ |
| ; RUN: FileCheck --check-prefixes=CHECK,CHECK-RV64 %s |
| |
| ; Test basic add/sub operations for v2i16 |
| define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_padd_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: padd.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = add <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_psub_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: psub.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = sub <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test basic add/sub operations for v4i8 |
| define void @test_padd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_padd_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: padd.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = add <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_psub_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: psub.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = sub <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test bitwise operations for v2i16 (use scalar instructions) |
| define void @test_and_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_and_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = and <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_or_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_or_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: or a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = or <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_xor_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_xor_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: xor a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = xor <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_andn_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_andn_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: andn a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %not = xor <2 x i16> %b, splat (i16 -1) |
| %res = and <2 x i16> %a, %not |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_orn_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_orn_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: orn a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %not = xor <2 x i16> %b, splat (i16 -1) |
| %res = or <2 x i16> %a, %not |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; FIXME: A bitcast is getting in the way on RV64. |
| define void @test_xnor_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_xnor_h: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: xnor a1, a2, a1 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_xnor_h: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: xor a1, a2, a1 |
| ; CHECK-RV64-NEXT: not a1, a1 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %not = xor <2 x i16> %b, splat (i16 -1) |
| %res = xor <2 x i16> %a, %not |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test bitwise operations for v4i8 (use scalar instructions) |
| define void @test_and_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_and_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: and a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = and <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_or_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_or_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: or a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = or <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_xor_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_xor_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: xor a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = xor <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_andn_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_andn_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: andn a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %not = xor <4 x i8> %b, splat (i8 -1) |
| %res = and <4 x i8> %a, %not |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_orn_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_orn_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: orn a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %not = xor <4 x i8> %b, splat (i8 -1) |
| %res = or <4 x i8> %a, %not |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; FIXME: A bitcast is getting in the way on RV64. |
| define void @test_xnor_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_xnor_b: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: xnor a1, a2, a1 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_xnor_b: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: xor a1, a2, a1 |
| ; CHECK-RV64-NEXT: not a1, a1 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %not = xor <4 x i8> %b, splat (i8 -1) |
| %res = xor <4 x i8> %a, %not |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_not_h(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_not_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %res = xor <2 x i16> %a, splat(i16 -1) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_not_b(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_not_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %res = xor <4 x i8> %a, splat(i8 -1) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test saturating add operations for v2i16 |
| define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_psadd_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: psadd.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %a, <2 x i16> %b) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_psaddu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: psaddu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %a, <2 x i16> %b) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test saturating sub operations for v2i16 |
| define void @test_pssub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pssub_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pssub.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %a, <2 x i16> %b) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pssubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pssubu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pssubu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %a, <2 x i16> %b) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test saturating add operations for v4i8 |
| define void @test_psadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_psadd_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: psadd.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %a, <4 x i8> %b) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_psaddu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: psaddu.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %a, <4 x i8> %b) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test saturating sub operations for v4i8 |
| define void @test_pssub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pssub_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pssub.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %a, <4 x i8> %b) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pssubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pssubu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pssubu.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %a, <4 x i8> %b) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor signed operations for v2i16 |
| define void @test_paadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_paadd_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: paadd.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %ext.a = sext <2 x i16> %a to <2 x i32> |
| %ext.b = sext <2 x i16> %b to <2 x i32> |
| %add = add nsw <2 x i32> %ext.a, %ext.b |
| %shift = ashr <2 x i32> %add, <i32 1, i32 1> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor unsigned operations for v2i16 |
| define void @test_paaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_paaddu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: paaddu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %and = and <2 x i16> %a, %b |
| %xor = xor <2 x i16> %a, %b |
| %shift = lshr <2 x i16> %xor, <i16 1, i16 1> |
| %res = add <2 x i16> %and, %shift |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor signed operations for v4i8 |
| define void @test_paadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_paadd_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: paadd.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %ext.a = sext <4 x i8> %a to <4 x i16> |
| %ext.b = sext <4 x i8> %b to <4 x i16> |
| %add = add nsw <4 x i16> %ext.a, %ext.b |
| %shift = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> |
| %res = trunc <4 x i16> %shift to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor unsigned operations for v4i8 |
| define void @test_paaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_paaddu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: paaddu.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %and = and <4 x i8> %a, %b |
| %xor = xor <4 x i8> %a, %b |
| %shift = lshr <4 x i8> %xor, <i8 1, i8 1, i8 1, i8 1> |
| %res = add <4 x i8> %and, %shift |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test absolute difference signed for v2i16 |
| define void @test_pdif_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pdif_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pabd.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b) |
| %max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b) |
| %res = sub <2 x i16> %max, %min |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test absolute difference unsigned for v2i16 |
| define void @test_pdifu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pdifu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pabdu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %min = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %a, <2 x i16> %b) |
| %max = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %a, <2 x i16> %b) |
| %res = sub <2 x i16> %max, %min |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test absolute difference signed for v4i8 |
| define void @test_pdif_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pdif_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pabd.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %min = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %a, <4 x i8> %b) |
| %max = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %a, <4 x i8> %b) |
| %res = sub <4 x i8> %max, %min |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test absolute difference unsigned for v4i8 |
| define void @test_pdifu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pdifu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pabdu.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %min = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %a, <4 x i8> %b) |
| %max = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %a, <4 x i8> %b) |
| %res = sub <4 x i8> %max, %min |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor subtraction signed for v2i16 |
| ; pasub pattern: (a - b) arithmetic shift right 1 |
| define void @test_pasub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pasub_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pasub.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = sext <2 x i16> %a to <2 x i32> |
| %b_ext = sext <2 x i16> %b to <2 x i32> |
| %sub = sub <2 x i32> %a_ext, %b_ext |
| %res = ashr <2 x i32> %sub, <i32 1, i32 1> |
| %res_trunc = trunc <2 x i32> %res to <2 x i16> |
| store <2 x i16> %res_trunc, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor subtraction unsigned for v2i16 |
| ; pasubu pattern: (a - b) logical shift right 1 |
| define void @test_pasubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pasubu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pasubu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = zext <2 x i16> %a to <2 x i32> |
| %b_ext = zext <2 x i16> %b to <2 x i32> |
| %sub = sub <2 x i32> %a_ext, %b_ext |
| %res = lshr <2 x i32> %sub, <i32 1, i32 1> |
| %res_trunc = trunc <2 x i32> %res to <2 x i16> |
| store <2 x i16> %res_trunc, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor subtraction signed for v4i8 |
| define void @test_pasub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pasub_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pasub.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %a_ext = sext <4 x i8> %a to <4 x i16> |
| %b_ext = sext <4 x i8> %b to <4 x i16> |
| %sub = sub <4 x i16> %a_ext, %b_ext |
| %res = ashr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1> |
| %res_trunc = trunc <4 x i16> %res to <4 x i8> |
| store <4 x i8> %res_trunc, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test averaging floor subtraction unsigned for v4i8 |
| define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pasubu_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pasubu.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %a_ext = zext <4 x i8> %a to <4 x i16> |
| %b_ext = zext <4 x i8> %b to <4 x i16> |
| %sub = sub <4 x i16> %a_ext, %b_ext |
| %res = lshr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1> |
| %res_trunc = trunc <4 x i16> %res to <4 x i8> |
| store <4 x i8> %res_trunc, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test PLI (pack load immediate) for v2i16 |
| define void @test_pli_h(ptr %ret_ptr) { |
| ; CHECK-LABEL: test_pli_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.h a1, 42 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %res = add <2 x i16> <i16 42, i16 42>, <i16 0, i16 0> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pli_h_negative(ptr %ret_ptr) { |
| ; CHECK-LABEL: test_pli_h_negative: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.h a1, -5 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %res = add <2 x i16> <i16 -5, i16 -5>, <i16 0, i16 0> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test PLI for v4i8 with unsigned immediate |
| define void @test_pli_b(ptr %ret_ptr) { |
| ; CHECK-LABEL: test_pli_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pli.b a1, 32 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %res = add <4 x i8> <i8 32, i8 32, i8 32, i8 32>, <i8 0, i8 0, i8 0, i8 0> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pli_b_negative(ptr %ret_ptr) { |
| ; CHECK-RV32-LABEL: test_pli_b_negative: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: pli.b a1, -2 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_pli_b_negative: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: pli.h a1, -258 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %res = add <4 x i8> <i8 -2, i8 -2, i8 -2, i8 -2>, <i8 0, i8 0, i8 0, i8 0> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_extract_vector_16(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_extract_vector_16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: sh a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %extracted = extractelement <2 x i16> %a, i32 0 |
| store i16 %extracted, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_extract_vector_16_elem1(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-RV32-LABEL: test_extract_vector_16_elem1: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lhu a1, 2(a1) |
| ; CHECK-RV32-NEXT: sh a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_extract_vector_16_elem1: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srli a1, a1, 16 |
| ; CHECK-RV64-NEXT: sh a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %extracted = extractelement <2 x i16> %a, i32 1 |
| store i16 %extracted, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_extract_vector_8(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_extract_vector_8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: sb a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %extracted = extractelement <4 x i8> %a, i32 0 |
| store i8 %extracted, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_extract_vector_8_elem1(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_extract_vector_8_elem1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: srli a1, a1, 8 |
| ; CHECK-NEXT: sb a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %extracted = extractelement <4 x i8> %a, i32 1 |
| store i8 %extracted, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test for splat |
| define void @test_non_const_splat_i8(ptr %ret_ptr, ptr %a_ptr, i8 %elt) { |
| ; CHECK-LABEL: test_non_const_splat_i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.bs a1, zero, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %insert = insertelement <4 x i8> poison, i8 %elt, i32 0 |
| %splat = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer |
| store <4 x i8> %splat, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_non_const_splat_i16(ptr %ret_ptr, ptr %a_ptr, i16 %elt) { |
| ; CHECK-LABEL: test_non_const_splat_i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: padd.hs a1, zero, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %insert = insertelement <2 x i16> poison, i16 %elt, i32 0 |
| %splat = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer |
| store <2 x i16> %splat, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_build_vector_i8(i8 %a, i8 %c, i8 %b, i8 %d, ptr %ret_ptr) { |
| ; CHECK-RV32-LABEL: test_build_vector_i8: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: ppaire.db a0, a0, a2 |
| ; CHECK-RV32-NEXT: pack a0, a0, a1 |
| ; CHECK-RV32-NEXT: sw a0, 0(a4) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_build_vector_i8: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: ppaire.b a1, a1, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a0, a0, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a0, a0, a1 |
| ; CHECK-RV64-NEXT: sw a0, 0(a4) |
| ; CHECK-RV64-NEXT: ret |
| %v0 = insertelement <4 x i8> poison, i8 %a, i32 0 |
| %v1 = insertelement <4 x i8> %v0, i8 %b, i32 1 |
| %v2 = insertelement <4 x i8> %v1, i8 %c, i32 2 |
| %v3 = insertelement <4 x i8> %v2, i8 %d, i32 3 |
| store <4 x i8> %v3, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_build_vector_i16(ptr %ret_ptr, i16 %a, i16 %b) { |
| ; CHECK-RV32-LABEL: test_build_vector_i16: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: pack a1, a1, a2 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_build_vector_i16: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %v0 = insertelement <2 x i16> poison, i16 %a, i32 0 |
| %v1 = insertelement <2 x i16> %v0, i16 %b, i32 1 |
| store <2 x i16> %v1, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift left immediate for v2i16 |
| define void @test_pslli_h(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_pslli_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: pslli.h a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %res = shl <2 x i16> %a, splat(i16 2) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift left immediate for v4i8 |
| define void @test_pslli_b(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_pslli_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: pslli.b a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %res = shl <4 x i8> %a, splat(i8 2) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test arithmetic saturation shift left immediate for v2i16 |
| define void @test_psslai_h(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_psslai_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psslai.h a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %res = call <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16> %a, <2 x i16> splat(i16 2)) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift right immediate |
| define void @test_psrli_h(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_psrli_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psrli.h a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %res = lshr <2 x i16> %a, splat(i16 2) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psrli_b(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_psrli_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psrli.b a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %res = lshr <4 x i8> %a, splat(i8 2) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test arithmetic shift right immediate |
| define void @test_psrai_h(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_psrai_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psrai.h a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %res = ashr <2 x i16> %a, splat(i16 2) |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psrai_b(ptr %ret_ptr, ptr %a_ptr) { |
| ; CHECK-LABEL: test_psrai_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psrai.b a1, a1, 2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %res = ashr <4 x i8> %a, splat(i8 2) |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift left(scalar shamt) |
| define void @test_psll_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) { |
| ; CHECK-LABEL: test_psll_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psll.hs a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %insert = insertelement <2 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer |
| %res = shl <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psll_bs(ptr %ret_ptr, ptr %a_ptr, i8 %shamt) { |
| ; CHECK-LABEL: test_psll_bs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psll.bs a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %insert = insertelement <4 x i8> poison, i8 %shamt, i32 0 |
| %b = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer |
| %res = shl <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift left(vector shamt) |
| define void @test_psll_hs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { |
| ; CHECK-RV32-LABEL: test_psll_hs_vec_shamt: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: sll a3, a1, a2 |
| ; CHECK-RV32-NEXT: srli a2, a2, 16 |
| ; CHECK-RV32-NEXT: srli a1, a1, 16 |
| ; CHECK-RV32-NEXT: sll a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a3, a1 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psll_hs_vec_shamt: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: sll a3, a1, a2 |
| ; CHECK-RV64-NEXT: srli a2, a2, 16 |
| ; CHECK-RV64-NEXT: srli a1, a1, 16 |
| ; CHECK-RV64-NEXT: sll a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a3, a1 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %shamt_ptr |
| %res = shl <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psll_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { |
| ; CHECK-RV32-LABEL: test_psll_bs_vec_shamt: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 24 |
| ; CHECK-RV32-NEXT: srli a4, a1, 24 |
| ; CHECK-RV32-NEXT: srli a5, a2, 8 |
| ; CHECK-RV32-NEXT: srli a6, a1, 8 |
| ; CHECK-RV32-NEXT: sll a7, a4, a3 |
| ; CHECK-RV32-NEXT: sll a6, a6, a5 |
| ; CHECK-RV32-NEXT: sll a4, a1, a2 |
| ; CHECK-RV32-NEXT: srli a2, a2, 16 |
| ; CHECK-RV32-NEXT: srli a1, a1, 16 |
| ; CHECK-RV32-NEXT: sll a5, a1, a2 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a4, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psll_bs_vec_shamt: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srli a3, a2, 24 |
| ; CHECK-RV64-NEXT: srli a4, a1, 24 |
| ; CHECK-RV64-NEXT: srli a5, a2, 16 |
| ; CHECK-RV64-NEXT: sll a3, a4, a3 |
| ; CHECK-RV64-NEXT: srli a4, a1, 16 |
| ; CHECK-RV64-NEXT: sll a4, a4, a5 |
| ; CHECK-RV64-NEXT: sll a5, a1, a2 |
| ; CHECK-RV64-NEXT: srli a2, a2, 8 |
| ; CHECK-RV64-NEXT: srli a1, a1, 8 |
| ; CHECK-RV64-NEXT: sll a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a4, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a5, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %shamt_ptr |
| %res = shl <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift right(scalar shamt) |
| define void @test_psrl_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) { |
| ; CHECK-LABEL: test_psrl_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psrl.hs a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %insert = insertelement <2 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer |
| %res = lshr <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psrl_bs(ptr %ret_ptr, ptr %a_ptr, i8 %shamt) { |
| ; CHECK-LABEL: test_psrl_bs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psrl.bs a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %insert = insertelement <4 x i8> poison, i8 %shamt, i32 0 |
| %b = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer |
| %res = lshr <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test arithmetic shift right(scalar shamt) |
| define void @test_psra_hs(ptr %ret_ptr, ptr %a_ptr, i16 %shamt) { |
| ; CHECK-LABEL: test_psra_hs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psra.hs a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %insert = insertelement <2 x i16> poison, i16 %shamt, i32 0 |
| %b = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer |
| %res = ashr <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psra_bs(ptr %ret_ptr, ptr %a_ptr, i8 %shamt) { |
| ; CHECK-LABEL: test_psra_bs: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: psra.bs a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %insert = insertelement <4 x i8> poison, i8 %shamt, i32 0 |
| %b = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer |
| %res = ashr <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test logical shift right(vector shamt) |
| define void @test_psrl_hs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { |
| ; CHECK-RV32-LABEL: test_psrl_hs_vec_shamt: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 16 |
| ; CHECK-RV32-NEXT: srli a4, a1, 16 |
| ; CHECK-RV32-NEXT: zext.h a1, a1 |
| ; CHECK-RV32-NEXT: srl a3, a4, a3 |
| ; CHECK-RV32-NEXT: srl a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a1, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psrl_hs_vec_shamt: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srli a3, a2, 16 |
| ; CHECK-RV64-NEXT: srliw a4, a1, 16 |
| ; CHECK-RV64-NEXT: zext.h a1, a1 |
| ; CHECK-RV64-NEXT: srl a3, a4, a3 |
| ; CHECK-RV64-NEXT: srl a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %shamt_ptr |
| %res = lshr <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psrl_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { |
| ; CHECK-RV32-LABEL: test_psrl_bs_vec_shamt: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 24 |
| ; CHECK-RV32-NEXT: srli a4, a1, 24 |
| ; CHECK-RV32-NEXT: srli a5, a2, 8 |
| ; CHECK-RV32-NEXT: slli a6, a1, 16 |
| ; CHECK-RV32-NEXT: srl a7, a4, a3 |
| ; CHECK-RV32-NEXT: srli a3, a6, 24 |
| ; CHECK-RV32-NEXT: srl a6, a3, a5 |
| ; CHECK-RV32-NEXT: zext.b a3, a1 |
| ; CHECK-RV32-NEXT: srli a4, a2, 16 |
| ; CHECK-RV32-NEXT: slli a1, a1, 8 |
| ; CHECK-RV32-NEXT: srl a2, a3, a2 |
| ; CHECK-RV32-NEXT: srli a1, a1, 24 |
| ; CHECK-RV32-NEXT: srl a3, a1, a4 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a2, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psrl_bs_vec_shamt: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srli a3, a2, 24 |
| ; CHECK-RV64-NEXT: srliw a4, a1, 24 |
| ; CHECK-RV64-NEXT: srli a5, a2, 16 |
| ; CHECK-RV64-NEXT: srl a3, a4, a3 |
| ; CHECK-RV64-NEXT: slli a4, a1, 40 |
| ; CHECK-RV64-NEXT: srli a4, a4, 56 |
| ; CHECK-RV64-NEXT: srl a4, a4, a5 |
| ; CHECK-RV64-NEXT: zext.b a5, a1 |
| ; CHECK-RV64-NEXT: srl a5, a5, a2 |
| ; CHECK-RV64-NEXT: srli a2, a2, 8 |
| ; CHECK-RV64-NEXT: slli a1, a1, 48 |
| ; CHECK-RV64-NEXT: srli a1, a1, 56 |
| ; CHECK-RV64-NEXT: srl a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a4, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a5, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %shamt_ptr |
| %res = lshr <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test arithmetic shift right(vector shamt) |
| define void @test_psra_hs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { |
| ; CHECK-RV32-LABEL: test_psra_hs_vec_shamt: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 16 |
| ; CHECK-RV32-NEXT: srai a4, a1, 16 |
| ; CHECK-RV32-NEXT: sext.h a1, a1 |
| ; CHECK-RV32-NEXT: sra a3, a4, a3 |
| ; CHECK-RV32-NEXT: sra a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a1, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psra_hs_vec_shamt: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srli a3, a2, 16 |
| ; CHECK-RV64-NEXT: srai a4, a1, 16 |
| ; CHECK-RV64-NEXT: sext.h a1, a1 |
| ; CHECK-RV64-NEXT: sra a3, a4, a3 |
| ; CHECK-RV64-NEXT: sra a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %shamt_ptr |
| %res = ashr <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psra_bs_vec_shamt(ptr %ret_ptr, ptr %a_ptr, ptr %shamt_ptr) { |
| ; CHECK-RV32-LABEL: test_psra_bs_vec_shamt: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 24 |
| ; CHECK-RV32-NEXT: srai a4, a1, 24 |
| ; CHECK-RV32-NEXT: srli a5, a2, 8 |
| ; CHECK-RV32-NEXT: srli a6, a1, 8 |
| ; CHECK-RV32-NEXT: sra a7, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a3, a6 |
| ; CHECK-RV32-NEXT: sra a6, a3, a5 |
| ; CHECK-RV32-NEXT: sext.b a3, a1 |
| ; CHECK-RV32-NEXT: srli a4, a2, 16 |
| ; CHECK-RV32-NEXT: srli a1, a1, 16 |
| ; CHECK-RV32-NEXT: sra a2, a3, a2 |
| ; CHECK-RV32-NEXT: sext.b a1, a1 |
| ; CHECK-RV32-NEXT: sra a3, a1, a4 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a2, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psra_bs_vec_shamt: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srli a3, a2, 24 |
| ; CHECK-RV64-NEXT: srai a4, a1, 24 |
| ; CHECK-RV64-NEXT: srli a5, a2, 16 |
| ; CHECK-RV64-NEXT: sra a3, a4, a3 |
| ; CHECK-RV64-NEXT: srli a4, a1, 16 |
| ; CHECK-RV64-NEXT: sext.b a4, a4 |
| ; CHECK-RV64-NEXT: sra a4, a4, a5 |
| ; CHECK-RV64-NEXT: sext.b a5, a1 |
| ; CHECK-RV64-NEXT: sra a5, a5, a2 |
| ; CHECK-RV64-NEXT: srli a2, a2, 8 |
| ; CHECK-RV64-NEXT: srli a1, a1, 8 |
| ; CHECK-RV64-NEXT: sext.b a1, a1 |
| ; CHECK-RV64-NEXT: sra a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a4, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a5, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %shamt_ptr |
| %res = ashr <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| ; Test packed multiply high signed for v2i16 |
| define void @test_pmulh_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulh_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulh.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = sext <2 x i16> %a to <2 x i32> |
| %b_ext = sext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %shift = lshr <2 x i32> %mul, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply high unsigned for v2i16 |
| define void @test_pmulhu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = zext <2 x i16> %a to <2 x i32> |
| %b_ext = zext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %shift = lshr <2 x i32> %mul, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply high signed-unsigned for v2i16 |
| define void @test_pmulhsu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhsu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhsu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = sext <2 x i16> %a to <2 x i32> |
| %b_ext = zext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %shift = lshr <2 x i32> %mul, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pmulhsu_h_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhsu_h_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhsu.h a1, a2, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = zext <2 x i16> %a to <2 x i32> |
| %b_ext = sext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %shift = lshr <2 x i32> %mul, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply high rounding signed for v2i16 |
| define void @test_pmulhr_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhr_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhr.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = sext <2 x i16> %a to <2 x i32> |
| %b_ext = sext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %add = add <2 x i32> %mul, <i32 32768, i32 32768> |
| %shift = lshr <2 x i32> %add, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply high rounding unsigned for v2i16 |
| define void @test_pmulhru_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhru_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhru.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = zext <2 x i16> %a to <2 x i32> |
| %b_ext = zext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %add = add <2 x i32> %mul, <i32 32768, i32 32768> |
| %shift = lshr <2 x i32> %add, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply high rounding signed-unsigned for v2i16 |
| define void @test_pmulhrsu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhrsu_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhrsu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = sext <2 x i16> %a to <2 x i32> |
| %b_ext = zext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %add = add <2 x i32> %mul, <i32 32768, i32 32768> |
| %shift = lshr <2 x i32> %add, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pmulhrsu_h_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_pmulhrsu_h_commuted: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmulhrsu.h a1, a2, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %a_ext = zext <2 x i16> %a to <2 x i32> |
| %b_ext = sext <2 x i16> %b to <2 x i32> |
| %mul = mul <2 x i32> %a_ext, %b_ext |
| %add = add <2 x i32> %mul, <i32 32768, i32 32768> |
| %shift = lshr <2 x i32> %add, <i32 16, i32 16> |
| %res = trunc <2 x i32> %shift to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply low for v2i16 |
| define void @test_pmul_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_pmul_h: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: pwmul.h a2, a1, a2 |
| ; CHECK-RV32-NEXT: pnsrli.h a1, a2, 0 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_pmul_h: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: pmul.w.h11 a3, a1, a2 |
| ; CHECK-RV64-NEXT: pmul.w.h00 a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = mul <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Test packed multiply low for v4i8 |
| define void @test_pmul_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_pmul_b: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: pwmul.b a2, a1, a2 |
| ; CHECK-RV32-NEXT: pnsrli.b a1, a2, 0 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_pmul_b: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: pmul.h.b11 a3, a1, a2 |
| ; CHECK-RV64-NEXT: pmul.h.b00 a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = mul <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Division and remainder tests |
| define void @test_psdiv_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_psdiv_h: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srai a3, a2, 16 |
| ; CHECK-RV32-NEXT: srai a4, a1, 16 |
| ; CHECK-RV32-NEXT: sext.h a2, a2 |
| ; CHECK-RV32-NEXT: sext.h a1, a1 |
| ; CHECK-RV32-NEXT: div a3, a4, a3 |
| ; CHECK-RV32-NEXT: div a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a1, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psdiv_h: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srai a3, a2, 16 |
| ; CHECK-RV64-NEXT: srai a4, a1, 16 |
| ; CHECK-RV64-NEXT: sext.h a2, a2 |
| ; CHECK-RV64-NEXT: sext.h a1, a1 |
| ; CHECK-RV64-NEXT: divw a3, a4, a3 |
| ; CHECK-RV64-NEXT: divw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = sdiv <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psdiv_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_psdiv_b: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srai a3, a2, 24 |
| ; CHECK-RV32-NEXT: srai a4, a1, 24 |
| ; CHECK-RV32-NEXT: srli a5, a2, 8 |
| ; CHECK-RV32-NEXT: srli a6, a1, 8 |
| ; CHECK-RV32-NEXT: div a7, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a3, a5 |
| ; CHECK-RV32-NEXT: sext.b a4, a6 |
| ; CHECK-RV32-NEXT: div a6, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a3, a2 |
| ; CHECK-RV32-NEXT: sext.b a4, a1 |
| ; CHECK-RV32-NEXT: srli a2, a2, 16 |
| ; CHECK-RV32-NEXT: srli a1, a1, 16 |
| ; CHECK-RV32-NEXT: div a4, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a2, a2 |
| ; CHECK-RV32-NEXT: sext.b a1, a1 |
| ; CHECK-RV32-NEXT: div a5, a1, a2 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a4, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psdiv_b: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srai a3, a2, 24 |
| ; CHECK-RV64-NEXT: srai a4, a1, 24 |
| ; CHECK-RV64-NEXT: srli a5, a2, 16 |
| ; CHECK-RV64-NEXT: sext.b a6, a2 |
| ; CHECK-RV64-NEXT: divw a3, a4, a3 |
| ; CHECK-RV64-NEXT: sext.b a4, a1 |
| ; CHECK-RV64-NEXT: divw a4, a4, a6 |
| ; CHECK-RV64-NEXT: srli a6, a1, 16 |
| ; CHECK-RV64-NEXT: sext.b a5, a5 |
| ; CHECK-RV64-NEXT: sext.b a6, a6 |
| ; CHECK-RV64-NEXT: divw a5, a6, a5 |
| ; CHECK-RV64-NEXT: srli a2, a2, 8 |
| ; CHECK-RV64-NEXT: srli a1, a1, 8 |
| ; CHECK-RV64-NEXT: sext.b a2, a2 |
| ; CHECK-RV64-NEXT: sext.b a1, a1 |
| ; CHECK-RV64-NEXT: divw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a5, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a4, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = sdiv <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pudiv_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_pudiv_h: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 16 |
| ; CHECK-RV32-NEXT: srli a4, a1, 16 |
| ; CHECK-RV32-NEXT: zext.h a2, a2 |
| ; CHECK-RV32-NEXT: zext.h a1, a1 |
| ; CHECK-RV32-NEXT: divu a3, a4, a3 |
| ; CHECK-RV32-NEXT: divu a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a1, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_pudiv_h: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srliw a3, a2, 16 |
| ; CHECK-RV64-NEXT: srliw a4, a1, 16 |
| ; CHECK-RV64-NEXT: zext.h a2, a2 |
| ; CHECK-RV64-NEXT: zext.h a1, a1 |
| ; CHECK-RV64-NEXT: divuw a3, a4, a3 |
| ; CHECK-RV64-NEXT: divuw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = udiv <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_pudiv_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_pudiv_b: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 24 |
| ; CHECK-RV32-NEXT: srli a4, a1, 24 |
| ; CHECK-RV32-NEXT: slli a5, a2, 16 |
| ; CHECK-RV32-NEXT: slli a6, a1, 16 |
| ; CHECK-RV32-NEXT: divu a7, a4, a3 |
| ; CHECK-RV32-NEXT: srli a5, a5, 24 |
| ; CHECK-RV32-NEXT: srli a3, a6, 24 |
| ; CHECK-RV32-NEXT: divu a6, a3, a5 |
| ; CHECK-RV32-NEXT: zext.b a3, a2 |
| ; CHECK-RV32-NEXT: zext.b a4, a1 |
| ; CHECK-RV32-NEXT: slli a2, a2, 8 |
| ; CHECK-RV32-NEXT: slli a1, a1, 8 |
| ; CHECK-RV32-NEXT: divu a4, a4, a3 |
| ; CHECK-RV32-NEXT: srli a2, a2, 24 |
| ; CHECK-RV32-NEXT: srli a1, a1, 24 |
| ; CHECK-RV32-NEXT: divu a5, a1, a2 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a4, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_pudiv_b: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srliw a3, a2, 24 |
| ; CHECK-RV64-NEXT: srliw a4, a1, 24 |
| ; CHECK-RV64-NEXT: slli a5, a2, 40 |
| ; CHECK-RV64-NEXT: zext.b a6, a2 |
| ; CHECK-RV64-NEXT: divuw a3, a4, a3 |
| ; CHECK-RV64-NEXT: zext.b a4, a1 |
| ; CHECK-RV64-NEXT: divuw a4, a4, a6 |
| ; CHECK-RV64-NEXT: slli a6, a1, 40 |
| ; CHECK-RV64-NEXT: srli a5, a5, 56 |
| ; CHECK-RV64-NEXT: srli a6, a6, 56 |
| ; CHECK-RV64-NEXT: divuw a5, a6, a5 |
| ; CHECK-RV64-NEXT: slli a2, a2, 48 |
| ; CHECK-RV64-NEXT: slli a1, a1, 48 |
| ; CHECK-RV64-NEXT: srli a2, a2, 56 |
| ; CHECK-RV64-NEXT: srli a1, a1, 56 |
| ; CHECK-RV64-NEXT: divuw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a5, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a4, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = udiv <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psrem_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_psrem_h: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srai a3, a2, 16 |
| ; CHECK-RV32-NEXT: srai a4, a1, 16 |
| ; CHECK-RV32-NEXT: sext.h a2, a2 |
| ; CHECK-RV32-NEXT: sext.h a1, a1 |
| ; CHECK-RV32-NEXT: rem a3, a4, a3 |
| ; CHECK-RV32-NEXT: rem a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a1, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psrem_h: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srai a3, a2, 16 |
| ; CHECK-RV64-NEXT: srai a4, a1, 16 |
| ; CHECK-RV64-NEXT: sext.h a2, a2 |
| ; CHECK-RV64-NEXT: sext.h a1, a1 |
| ; CHECK-RV64-NEXT: remw a3, a4, a3 |
| ; CHECK-RV64-NEXT: remw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = srem <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_psrem_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_psrem_b: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srai a3, a2, 24 |
| ; CHECK-RV32-NEXT: srai a4, a1, 24 |
| ; CHECK-RV32-NEXT: srli a5, a2, 8 |
| ; CHECK-RV32-NEXT: srli a6, a1, 8 |
| ; CHECK-RV32-NEXT: rem a7, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a3, a5 |
| ; CHECK-RV32-NEXT: sext.b a4, a6 |
| ; CHECK-RV32-NEXT: rem a6, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a3, a2 |
| ; CHECK-RV32-NEXT: sext.b a4, a1 |
| ; CHECK-RV32-NEXT: srli a2, a2, 16 |
| ; CHECK-RV32-NEXT: srli a1, a1, 16 |
| ; CHECK-RV32-NEXT: rem a4, a4, a3 |
| ; CHECK-RV32-NEXT: sext.b a2, a2 |
| ; CHECK-RV32-NEXT: sext.b a1, a1 |
| ; CHECK-RV32-NEXT: rem a5, a1, a2 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a4, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_psrem_b: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srai a3, a2, 24 |
| ; CHECK-RV64-NEXT: srai a4, a1, 24 |
| ; CHECK-RV64-NEXT: srli a5, a2, 16 |
| ; CHECK-RV64-NEXT: sext.b a6, a2 |
| ; CHECK-RV64-NEXT: remw a3, a4, a3 |
| ; CHECK-RV64-NEXT: sext.b a4, a1 |
| ; CHECK-RV64-NEXT: remw a4, a4, a6 |
| ; CHECK-RV64-NEXT: srli a6, a1, 16 |
| ; CHECK-RV64-NEXT: sext.b a5, a5 |
| ; CHECK-RV64-NEXT: sext.b a6, a6 |
| ; CHECK-RV64-NEXT: remw a5, a6, a5 |
| ; CHECK-RV64-NEXT: srli a2, a2, 8 |
| ; CHECK-RV64-NEXT: srli a1, a1, 8 |
| ; CHECK-RV64-NEXT: sext.b a2, a2 |
| ; CHECK-RV64-NEXT: sext.b a1, a1 |
| ; CHECK-RV64-NEXT: remw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a5, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a4, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = srem <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_purem_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_purem_h: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 16 |
| ; CHECK-RV32-NEXT: srli a4, a1, 16 |
| ; CHECK-RV32-NEXT: zext.h a2, a2 |
| ; CHECK-RV32-NEXT: zext.h a1, a1 |
| ; CHECK-RV32-NEXT: remu a3, a4, a3 |
| ; CHECK-RV32-NEXT: remu a1, a1, a2 |
| ; CHECK-RV32-NEXT: pack a1, a1, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_purem_h: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srliw a3, a2, 16 |
| ; CHECK-RV64-NEXT: srliw a4, a1, 16 |
| ; CHECK-RV64-NEXT: zext.h a2, a2 |
| ; CHECK-RV64-NEXT: zext.h a1, a1 |
| ; CHECK-RV64-NEXT: remuw a3, a4, a3 |
| ; CHECK-RV64-NEXT: remuw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a3 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %res = urem <2 x i16> %a, %b |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_purem_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-RV32-LABEL: test_purem_b: |
| ; CHECK-RV32: # %bb.0: |
| ; CHECK-RV32-NEXT: lw a2, 0(a2) |
| ; CHECK-RV32-NEXT: lw a1, 0(a1) |
| ; CHECK-RV32-NEXT: srli a3, a2, 24 |
| ; CHECK-RV32-NEXT: srli a4, a1, 24 |
| ; CHECK-RV32-NEXT: slli a5, a2, 16 |
| ; CHECK-RV32-NEXT: slli a6, a1, 16 |
| ; CHECK-RV32-NEXT: remu a7, a4, a3 |
| ; CHECK-RV32-NEXT: srli a5, a5, 24 |
| ; CHECK-RV32-NEXT: srli a3, a6, 24 |
| ; CHECK-RV32-NEXT: remu a6, a3, a5 |
| ; CHECK-RV32-NEXT: zext.b a3, a2 |
| ; CHECK-RV32-NEXT: zext.b a4, a1 |
| ; CHECK-RV32-NEXT: slli a2, a2, 8 |
| ; CHECK-RV32-NEXT: slli a1, a1, 8 |
| ; CHECK-RV32-NEXT: remu a4, a4, a3 |
| ; CHECK-RV32-NEXT: srli a2, a2, 24 |
| ; CHECK-RV32-NEXT: srli a1, a1, 24 |
| ; CHECK-RV32-NEXT: remu a5, a1, a2 |
| ; CHECK-RV32-NEXT: ppaire.db a2, a4, a6 |
| ; CHECK-RV32-NEXT: pack a1, a2, a3 |
| ; CHECK-RV32-NEXT: sw a1, 0(a0) |
| ; CHECK-RV32-NEXT: ret |
| ; |
| ; CHECK-RV64-LABEL: test_purem_b: |
| ; CHECK-RV64: # %bb.0: |
| ; CHECK-RV64-NEXT: lw a2, 0(a2) |
| ; CHECK-RV64-NEXT: lw a1, 0(a1) |
| ; CHECK-RV64-NEXT: srliw a3, a2, 24 |
| ; CHECK-RV64-NEXT: srliw a4, a1, 24 |
| ; CHECK-RV64-NEXT: slli a5, a2, 40 |
| ; CHECK-RV64-NEXT: zext.b a6, a2 |
| ; CHECK-RV64-NEXT: remuw a3, a4, a3 |
| ; CHECK-RV64-NEXT: zext.b a4, a1 |
| ; CHECK-RV64-NEXT: remuw a4, a4, a6 |
| ; CHECK-RV64-NEXT: slli a6, a1, 40 |
| ; CHECK-RV64-NEXT: srli a5, a5, 56 |
| ; CHECK-RV64-NEXT: srli a6, a6, 56 |
| ; CHECK-RV64-NEXT: remuw a5, a6, a5 |
| ; CHECK-RV64-NEXT: slli a2, a2, 48 |
| ; CHECK-RV64-NEXT: slli a1, a1, 48 |
| ; CHECK-RV64-NEXT: srli a2, a2, 56 |
| ; CHECK-RV64-NEXT: srli a1, a1, 56 |
| ; CHECK-RV64-NEXT: remuw a1, a1, a2 |
| ; CHECK-RV64-NEXT: ppaire.b a2, a5, a3 |
| ; CHECK-RV64-NEXT: ppaire.b a1, a4, a1 |
| ; CHECK-RV64-NEXT: ppaire.h a1, a1, a2 |
| ; CHECK-RV64-NEXT: sw a1, 0(a0) |
| ; CHECK-RV64-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %res = urem <4 x i8> %a, %b |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Comparison operations for v2i16 |
| define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_eq_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmseq.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp eq <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ne_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmseq.h a1, a1, a2 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp ne <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_slt_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp slt <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_sle_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.h a1, a2, a1 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp sle <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_sgt_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.h a1, a2, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp sgt <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_sge_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.h a1, a1, a2 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp sge <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ult_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.h a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp ult <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ule_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.h a1, a2, a1 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp ule <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ugt_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.h a1, a2, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp ugt <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_uge_h: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.h a1, a1, a2 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <2 x i16>, ptr %a_ptr |
| %b = load <2 x i16>, ptr %b_ptr |
| %cmp = icmp uge <2 x i16> %a, %b |
| %res = sext <2 x i1> %cmp to <2 x i16> |
| store <2 x i16> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| ; Comparison operations for v4i8 |
| define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_eq_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmseq.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp eq <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ne_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmseq.b a1, a1, a2 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp ne <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_slt_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp slt <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_sle_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.b a1, a2, a1 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp sle <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_sgt_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.b a1, a2, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp sgt <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_sge_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmslt.b a1, a1, a2 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp sge <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ult_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.b a1, a1, a2 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp ult <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ule_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.b a1, a2, a1 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp ule <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_ugt_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.b a1, a2, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp ugt <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |
| |
| define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { |
| ; CHECK-LABEL: test_uge_b: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: lw a1, 0(a1) |
| ; CHECK-NEXT: lw a2, 0(a2) |
| ; CHECK-NEXT: pmsltu.b a1, a1, a2 |
| ; CHECK-NEXT: not a1, a1 |
| ; CHECK-NEXT: sw a1, 0(a0) |
| ; CHECK-NEXT: ret |
| %a = load <4 x i8>, ptr %a_ptr |
| %b = load <4 x i8>, ptr %b_ptr |
| %cmp = icmp uge <4 x i8> %a, %b |
| %res = sext <4 x i1> %cmp to <4 x i8> |
| store <4 x i8> %res, ptr %ret_ptr |
| ret void |
| } |