| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=thumbv7s-none-eabi %s -o - | FileCheck %s |
| |
| declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i) |
| |
| ; Check that we get the motivating example: |
| ; The bitcasts force the values to go through the GPRs, whereas |
| ; they are defined on VPRs and used on VPRs. |
| ; |
| define void @motivatingExample(<2 x i64>* %addr, <8 x i8>* %addr2) { |
| ; CHECK-LABEL: motivatingExample: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] |
| ; CHECK-NEXT: vldr d18, [r1] |
| ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 |
| ; CHECK-NEXT: vstr d16, [r1] |
| ; CHECK-NEXT: bx lr |
| %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr |
| %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 |
| %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 |
| %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 |
| %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> |
| %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> |
| %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) |
| store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 |
| ret void |
| } |
| |
| ; Check that we do not perform the transformation for dynamic index. |
| define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) { |
| ; CHECK-LABEL: dynamicIndex: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: .save {r4, r6, r7, lr} |
| ; CHECK-NEXT: push {r4, r6, r7, lr} |
| ; CHECK-NEXT: .setfp r7, sp, #8 |
| ; CHECK-NEXT: add r7, sp, #8 |
| ; CHECK-NEXT: .pad #16 |
| ; CHECK-NEXT: sub sp, #16 |
| ; CHECK-NEXT: mov r4, sp |
| ; CHECK-NEXT: bfc r4, #0, #4 |
| ; CHECK-NEXT: mov sp, r4 |
| ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] |
| ; CHECK-NEXT: adds r0, r2, r2 |
| ; CHECK-NEXT: and r2, r0, #3 |
| ; CHECK-NEXT: adds r0, #1 |
| ; CHECK-NEXT: mov r12, sp |
| ; CHECK-NEXT: and r0, r0, #3 |
| ; CHECK-NEXT: lsls r2, r2, #2 |
| ; CHECK-NEXT: mov r3, r12 |
| ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r2 |
| ; CHECK-NEXT: orr.w r0, r12, r0, lsl #2 |
| ; CHECK-NEXT: sub.w r4, r7, #8 |
| ; CHECK-NEXT: ldr r2, [r3] |
| ; CHECK-NEXT: ldr r0, [r0] |
| ; CHECK-NEXT: vldr d18, [r1] |
| ; CHECK-NEXT: vmov d16, r2, r0 |
| ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 |
| ; CHECK-NEXT: vstr d16, [r1] |
| ; CHECK-NEXT: mov sp, r4 |
| ; CHECK-NEXT: pop {r4, r6, r7, pc} |
| %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr |
| %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 |
| %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index |
| %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 |
| %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> |
| %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> |
| %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) |
| store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 |
| ret void |
| } |
| |
| ; Check that we do not perform the transformation when there are several uses |
| ; of the result of the bitcast. |
| define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) { |
| ; CHECK-LABEL: severalUses: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] |
| ; CHECK-NEXT: vmov r0, r2, d16 |
| ; CHECK-NEXT: vldr d18, [r1] |
| ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 |
| ; CHECK-NEXT: vstr d16, [r1] |
| ; CHECK-NEXT: mov r1, r2 |
| ; CHECK-NEXT: bx lr |
| %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr |
| %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 |
| %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 |
| %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 |
| %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> |
| %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> |
| %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) |
| store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 |
| ret i64 %shuffle.i.extract.i310 |
| } |