| //===----------------------Hexagon builtin routine ------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| // Functions that implement common sequences in function prologues and epilogues |
| // used to save code size |
| |
| .macro FUNCTION_BEGIN name |
| .text |
| .globl \name |
| .type \name, @function |
| .falign |
| \name: |
| .endm |
| |
| .macro FUNCTION_END name |
| .size \name, . - \name |
| .endm |
| |
| .macro FALLTHROUGH_TAIL_CALL name0 name1 |
| .size \name0, . - \name0 |
| .globl \name1 |
| .type \name1, @function |
| .falign |
| \name1: |
| .endm |
| |
| |
| |
| |
| // Save r25:24 at fp+#-8 and r27:26 at fp+#-16. |
| |
| |
| |
| |
| // The compiler knows that the __save_* functions clobber LR. No other |
| // registers should be used without informing the compiler. |
| |
| // Since we can only issue one store per packet, we don't hurt performance by |
| // simply jumping to the right point in this sequence of stores. |
| |
| FUNCTION_BEGIN __save_r24_through_r27 |
| memd(fp+#-16) = r27:26 |
| FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25 |
| { |
| memd(fp+#-8) = r25:24 |
| jumpr lr |
| } |
| FUNCTION_END __save_r24_through_r25 |
| |
| |
| |
| |
| // For each of the *_before_tailcall functions, jumpr lr is executed in parallel |
| // with deallocframe. That way, the return gets the old value of lr, which is |
| // where these functions need to return, and at the same time, lr gets the value |
| // it needs going into the tail call. |
| |
| FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall |
| r27:26 = memd(fp+#-16) |
| FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall |
| { |
| r25:24 = memd(fp+#-8) |
| deallocframe |
| jumpr lr |
| } |
| FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall |
| |
| |
| |
| |
| // Here we use the extra load bandwidth to restore LR early, allowing the return |
| // to occur in parallel with the deallocframe. |
| |
| FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe |
| { |
| lr = memw(fp+#4) |
| r27:26 = memd(fp+#-16) |
| } |
| { |
| r25:24 = memd(fp+#-8) |
| deallocframe |
| jumpr lr |
| } |
| FUNCTION_END __restore_r24_through_r27_and_deallocframe |
| |
| |
| |
| |
| // Here the load bandwidth is maximized. |
| |
| FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe |
| { |
| r25:24 = memd(fp+#-8) |
| deallocframe |
| } |
| jumpr lr |
| FUNCTION_END __restore_r24_through_r25_and_deallocframe |