| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx --verify-machineinstrs < %s \ |
| ; RUN: | FileCheck %s --check-prefix=LA32 |
| ; RUN: llc --mtriple=loongarch64 -mattr=+lsx --verify-machineinstrs < %s \ |
| ; RUN: | FileCheck %s --check-prefix=LA64 |
| |
| declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) |
| |
| define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { |
| ; LA32-LABEL: test_bitreverse_v16i8: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: vslli.b $vr1, $vr0, 4 |
| ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 |
| ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 |
| ; LA32-NEXT: vandi.b $vr1, $vr0, 51 |
| ; LA32-NEXT: vslli.b $vr1, $vr1, 2 |
| ; LA32-NEXT: vsrli.b $vr0, $vr0, 2 |
| ; LA32-NEXT: vandi.b $vr0, $vr0, 51 |
| ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 |
| ; LA32-NEXT: vandi.b $vr1, $vr0, 85 |
| ; LA32-NEXT: vslli.b $vr1, $vr1, 1 |
| ; LA32-NEXT: vsrli.b $vr0, $vr0, 1 |
| ; LA32-NEXT: vandi.b $vr0, $vr0, 85 |
| ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 |
| ; LA32-NEXT: ret |
| ; |
| ; LA64-LABEL: test_bitreverse_v16i8: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 |
| ; LA64-NEXT: bitrev.8b $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 |
| ; LA64-NEXT: bitrev.8b $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 |
| ; LA64-NEXT: vori.b $vr0, $vr1, 0 |
| ; LA64-NEXT: ret |
| %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) |
| ret <16 x i8> %b |
| } |
| |
| declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) |
| |
| define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind { |
| ; LA32-LABEL: test_bitreverse_v8i16: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2 |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 |
| ; LA32-NEXT: vshuf4i.h $vr0, $vr1, 27 |
| ; LA32-NEXT: ret |
| ; |
| ; LA64-LABEL: test_bitreverse_v8i16: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 |
| ; LA64-NEXT: bitrev.d $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 |
| ; LA64-NEXT: bitrev.d $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 |
| ; LA64-NEXT: vshuf4i.h $vr0, $vr1, 27 |
| ; LA64-NEXT: ret |
| %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) |
| ret <8 x i16> %b |
| } |
| |
| declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) |
| |
| define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind { |
| ; LA32-LABEL: test_bitreverse_v4i32: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2 |
| ; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 |
| ; LA32-NEXT: bitrev.w $a0, $a0 |
| ; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 |
| ; LA32-NEXT: vori.b $vr0, $vr1, 0 |
| ; LA32-NEXT: ret |
| ; |
| ; LA64-LABEL: test_bitreverse_v4i32: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 |
| ; LA64-NEXT: bitrev.d $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 |
| ; LA64-NEXT: bitrev.d $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 |
| ; LA64-NEXT: vshuf4i.w $vr0, $vr1, 177 |
| ; LA64-NEXT: ret |
| %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) |
| ret <4 x i32> %b |
| } |
| |
| declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) |
| |
| define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { |
| ; LA32-LABEL: test_bitreverse_v2i64: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) |
| ; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0) |
| ; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 |
| ; LA32-NEXT: vslli.b $vr1, $vr0, 4 |
| ; LA32-NEXT: vsrli.b $vr0, $vr0, 4 |
| ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 |
| ; LA32-NEXT: vandi.b $vr1, $vr0, 51 |
| ; LA32-NEXT: vslli.b $vr1, $vr1, 2 |
| ; LA32-NEXT: vsrli.b $vr0, $vr0, 2 |
| ; LA32-NEXT: vandi.b $vr0, $vr0, 51 |
| ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 |
| ; LA32-NEXT: vandi.b $vr1, $vr0, 85 |
| ; LA32-NEXT: vslli.b $vr1, $vr1, 1 |
| ; LA32-NEXT: vsrli.b $vr0, $vr0, 1 |
| ; LA32-NEXT: vandi.b $vr0, $vr0, 85 |
| ; LA32-NEXT: vor.v $vr0, $vr0, $vr1 |
| ; LA32-NEXT: ret |
| ; |
| ; LA64-LABEL: test_bitreverse_v2i64: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 |
| ; LA64-NEXT: bitrev.d $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 |
| ; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 |
| ; LA64-NEXT: bitrev.d $a0, $a0 |
| ; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 |
| ; LA64-NEXT: vori.b $vr0, $vr1, 0 |
| ; LA64-NEXT: ret |
| %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) |
| ret <2 x i64> %b |
| } |