| /* Copyright (C) 2006 Free Software Foundation, Inc. |
| |
| This file is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 2 of the License, or (at your option) |
| any later version. |
| |
| This file is distributed in the hope that it will be useful, but WITHOUT |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this file; see the file COPYING. If not, write to the Free |
| Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301, USA. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "rtl.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "real.h" |
| #include "insn-config.h" |
| #include "conditions.h" |
| #include "insn-attr.h" |
| #include "flags.h" |
| #include "recog.h" |
| #include "obstack.h" |
| #include "tree.h" |
| #include "expr.h" |
| #include "optabs.h" |
| #include "except.h" |
| #include "function.h" |
| #include "output.h" |
| #include "basic-block.h" |
| #include "integrate.h" |
| #include "toplev.h" |
| #include "ggc.h" |
| #include "hashtab.h" |
| #include "tm_p.h" |
| #include "target.h" |
| #include "target-def.h" |
| #include "langhooks.h" |
| #include "reload.h" |
| #include "cfglayout.h" |
| #include "sched-int.h" |
| #include "params.h" |
| #include "assert.h" |
| #include "c-tree.h" |
| #include "c-common.h" |
| #include "machmode.h" |
| #include "tree-gimple.h" |
| #include "tm-constrs.h" |
| #include "spu-builtins.h" |
| |
| /* LLVM LOCAL begin */ |
| #ifdef ENABLE_LLVM |
| #undef INSN_SCHEDULING |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* Builtin types, data and prototypes. */ |
| struct spu_builtin_range |
| { |
| int low, high; |
| }; |
| |
| static struct spu_builtin_range spu_builtin_range[] = { |
| {-0x40ll, 0x7fll}, /* SPU_BTI_7 */ |
| {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */ |
| {0ll, 0x7fll}, /* SPU_BTI_U7 */ |
| {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */ |
| {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */ |
| {0ll, 0x3fffll}, /* SPU_BTI_U14 */ |
| {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */ |
| {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */ |
| {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */ |
| {0ll, 0xffffll}, /* SPU_BTI_U16 */ |
| {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */ |
| {0ll, 0x3ffffll}, /* SPU_BTI_U18 */ |
| }; |
| |
| |
| /* Target specific attribute specifications. */ |
| char regs_ever_allocated[FIRST_PSEUDO_REGISTER]; |
| |
| /* Prototypes and external defs. */ |
| static void spu_init_builtins (void); |
| static bool spu_scalar_mode_supported_p (enum machine_mode mode); |
| static bool spu_vector_mode_supported_p (enum machine_mode mode); |
| static rtx adjust_operand (rtx op, HOST_WIDE_INT * start); |
| static rtx get_pic_reg (void); |
| static int need_to_save_reg (int regno, int saving); |
| static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset); |
| static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset); |
| static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, |
| rtx scratch); |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| static void emit_nop_for_insn (rtx insn); |
| static bool insn_clobbers_hbr (rtx insn); |
| static void spu_emit_branch_hint (rtx before, rtx branch, rtx target, |
| int distance); |
| static rtx get_branch_target (rtx branch); |
| #endif |
| /* LLVM LOCAL end */ |
| static void insert_branch_hints (void); |
| static void insert_nops (void); |
| static void spu_machine_dependent_reorg (void); |
| static int spu_sched_issue_rate (void); |
| static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn, |
| int can_issue_more); |
| static int get_pipe (rtx insn); |
| static int spu_sched_adjust_priority (rtx insn, int pri); |
| static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost); |
| static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args, |
| int flags, |
| bool *no_add_attrs); |
| static tree spu_handle_vector_attribute (tree * node, tree name, tree args, |
| int flags, |
| bool *no_add_attrs); |
| static int spu_naked_function_p (tree func); |
| static bool spu_pass_by_reference (int *cum, enum machine_mode mode, |
| tree type, bool named); |
| static tree spu_build_builtin_va_list (void); |
| static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p, |
| tree * post_p); |
| static int regno_aligned_for_load (int regno); |
| static int store_with_one_insn_p (rtx mem); |
| static int reg_align (rtx reg); |
| static int mem_is_padded_component_ref (rtx x); |
| static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); |
| static void spu_asm_globalize_label (FILE * file, const char *name); |
| static bool spu_rtx_costs (rtx x, int code, int outer_code, int *total); |
| static bool spu_function_ok_for_sibcall (tree decl, tree exp); |
| static void spu_init_libfuncs (void); |
| static bool spu_return_in_memory (tree type, tree fntype); |
| static void fix_range (const char *); |
| static void spu_encode_section_info (tree, rtx, int); |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| static tree spu_builtin_mul_widen_even (tree); |
| static tree spu_builtin_mul_widen_odd (tree); |
| #endif |
| /* LLVM LOCAL end */ |
| static tree spu_builtin_mask_for_load (void); |
| |
| extern const char *reg_names[]; |
| rtx spu_compare_op0, spu_compare_op1; |
| |
| enum spu_immediate { |
| SPU_NONE, |
| SPU_IL, |
| SPU_ILA, |
| SPU_ILH, |
| SPU_ILHU, |
| SPU_ORI, |
| SPU_ORHI, |
| SPU_ORBI, |
| SPU_IOHL |
| }; |
| enum immediate_class |
| { |
| IC_POOL, /* constant pool */ |
| IC_IL1, /* one il* instruction */ |
| IC_IL2, /* both ilhu and iohl instructions */ |
| IC_IL1s, /* one il* instruction */ |
| IC_IL2s, /* both ilhu and iohl instructions */ |
| IC_FSMBI, /* the fsmbi instruction */ |
| IC_CPAT, /* one of the c*d instructions */ |
| }; |
| |
| static enum spu_immediate which_immediate_load (HOST_WIDE_INT val); |
| static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val); |
| static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart); |
| static enum immediate_class classify_immediate (rtx op, |
| enum machine_mode mode); |
| |
| /* Built in types. */ |
| tree spu_builtin_types[SPU_BTI_MAX]; |
| |
| /* TARGET overrides. */ |
| |
| #undef TARGET_INIT_BUILTINS |
| #define TARGET_INIT_BUILTINS spu_init_builtins |
| |
| #undef TARGET_EXPAND_BUILTIN |
| #define TARGET_EXPAND_BUILTIN spu_expand_builtin |
| |
| #undef TARGET_EH_RETURN_FILTER_MODE |
| #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode |
| |
| /* The .8byte directive doesn't seem to work well for a 32 bit |
| architecture. */ |
| #undef TARGET_ASM_UNALIGNED_DI_OP |
| #define TARGET_ASM_UNALIGNED_DI_OP NULL |
| |
| #undef TARGET_RTX_COSTS |
| #define TARGET_RTX_COSTS spu_rtx_costs |
| |
| #undef TARGET_ADDRESS_COST |
| #define TARGET_ADDRESS_COST hook_int_rtx_0 |
| |
| #undef TARGET_SCHED_ISSUE_RATE |
| #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate |
| |
| #undef TARGET_SCHED_VARIABLE_ISSUE |
| #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue |
| |
| #undef TARGET_SCHED_ADJUST_PRIORITY |
| #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority |
| |
| #undef TARGET_SCHED_ADJUST_COST |
| #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost |
| |
| const struct attribute_spec spu_attribute_table[]; |
| #undef TARGET_ATTRIBUTE_TABLE |
| #define TARGET_ATTRIBUTE_TABLE spu_attribute_table |
| |
| #undef TARGET_ASM_INTEGER |
| #define TARGET_ASM_INTEGER spu_assemble_integer |
| |
| #undef TARGET_SCALAR_MODE_SUPPORTED_P |
| #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p |
| |
| #undef TARGET_VECTOR_MODE_SUPPORTED_P |
| #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p |
| |
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall |
| |
| #undef TARGET_ASM_GLOBALIZE_LABEL |
| #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label |
| |
| #undef TARGET_PASS_BY_REFERENCE |
| #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference |
| |
| #undef TARGET_MUST_PASS_IN_STACK |
| #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size |
| |
| #undef TARGET_BUILD_BUILTIN_VA_LIST |
| #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list |
| |
| #undef TARGET_SETUP_INCOMING_VARARGS |
| #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs |
| |
| #undef TARGET_MACHINE_DEPENDENT_REORG |
| #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg |
| |
| #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
| #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr |
| |
| #undef TARGET_DEFAULT_TARGET_FLAGS |
| #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT) |
| |
| #undef TARGET_INIT_LIBFUNCS |
| #define TARGET_INIT_LIBFUNCS spu_init_libfuncs |
| |
| #undef TARGET_RETURN_IN_MEMORY |
| #define TARGET_RETURN_IN_MEMORY spu_return_in_memory |
| |
| #undef TARGET_ENCODE_SECTION_INFO |
| #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info |
| |
| #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN |
| #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even |
| |
| #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD |
| #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd |
| |
| #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD |
| #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load |
| |
| struct gcc_target targetm = TARGET_INITIALIZER; |
| |
| /* Sometimes certain combinations of command options do not make sense |
| on a particular target machine. You can define a macro |
| OVERRIDE_OPTIONS to take account of this. This macro, if defined, is |
| executed once just after all the command options have been parsed. */ |
| void |
| spu_override_options (void) |
| { |
| /* Override some of the default param values. With so many registers |
| larger values are better for these params. */ |
| if (MAX_UNROLLED_INSNS == 100) |
| MAX_UNROLLED_INSNS = 250; |
| if (MAX_PENDING_LIST_LENGTH == 32) |
| MAX_PENDING_LIST_LENGTH = 128; |
| |
| flag_omit_frame_pointer = 1; |
| |
| if (align_functions < 8) |
| align_functions = 8; |
| |
| if (spu_fixed_range_string) |
| fix_range (spu_fixed_range_string); |
| } |
| |
| /* Handle an attribute requiring a FUNCTION_DECL; arguments as in |
| struct attribute_spec.handler. */ |
| |
| /* Table of machine attributes. */ |
| const struct attribute_spec spu_attribute_table[] = |
| { |
| /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ |
| { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute }, |
| { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute }, |
| { NULL, 0, 0, false, false, false, NULL } |
| }; |
| |
| /* True if MODE is valid for the target. By "valid", we mean able to |
| be manipulated in non-trivial ways. In particular, this means all |
| the arithmetic is supported. */ |
| static bool |
| spu_scalar_mode_supported_p (enum machine_mode mode) |
| { |
| switch (mode) |
| { |
| case QImode: |
| case HImode: |
| case SImode: |
| case SFmode: |
| case DImode: |
| case TImode: |
| case DFmode: |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Similarly for vector modes. "Supported" here is less strict. At |
| least some operations are supported; need to check optabs or builtins |
| for further details. */ |
| static bool |
| spu_vector_mode_supported_p (enum machine_mode mode) |
| { |
| switch (mode) |
| { |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the |
| least significant bytes of the outer mode. This function returns |
| TRUE for the SUBREG's where this is correct. */ |
| int |
| valid_subreg (rtx op) |
| { |
| enum machine_mode om = GET_MODE (op); |
| enum machine_mode im = GET_MODE (SUBREG_REG (op)); |
| return om != VOIDmode && im != VOIDmode |
| && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om) |
| || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)); |
| } |
| |
| /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off |
| and adjust the start offset. */ |
| static rtx |
| adjust_operand (rtx op, HOST_WIDE_INT * start) |
| { |
| enum machine_mode mode; |
| int op_size; |
| /* Strip any SUBREG */ |
| if (GET_CODE (op) == SUBREG) |
| { |
| if (start) |
| *start -= |
| GET_MODE_BITSIZE (GET_MODE (op)) - |
| GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op))); |
| op = SUBREG_REG (op); |
| } |
| /* If it is smaller than SI, assure a SUBREG */ |
| op_size = GET_MODE_BITSIZE (GET_MODE (op)); |
| if (op_size < 32) |
| { |
| if (start) |
| *start += 32 - op_size; |
| op_size = 32; |
| } |
| /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */ |
| mode = mode_for_size (op_size, MODE_INT, 0); |
| if (mode != GET_MODE (op)) |
| op = gen_rtx_SUBREG (mode, op, 0); |
| return op; |
| } |
| |
| void |
| spu_expand_extv (rtx ops[], int unsignedp) |
| { |
| HOST_WIDE_INT width = INTVAL (ops[2]); |
| HOST_WIDE_INT start = INTVAL (ops[3]); |
| HOST_WIDE_INT src_size, dst_size; |
| enum machine_mode src_mode, dst_mode; |
| rtx dst = ops[0], src = ops[1]; |
| rtx s; |
| |
| dst = adjust_operand (ops[0], 0); |
| dst_mode = GET_MODE (dst); |
| dst_size = GET_MODE_BITSIZE (GET_MODE (dst)); |
| |
| src = adjust_operand (src, &start); |
| src_mode = GET_MODE (src); |
| src_size = GET_MODE_BITSIZE (GET_MODE (src)); |
| |
| if (start > 0) |
| { |
| s = gen_reg_rtx (src_mode); |
| switch (src_mode) |
| { |
| case SImode: |
| emit_insn (gen_ashlsi3 (s, src, GEN_INT (start))); |
| break; |
| case DImode: |
| emit_insn (gen_ashldi3 (s, src, GEN_INT (start))); |
| break; |
| case TImode: |
| emit_insn (gen_ashlti3 (s, src, GEN_INT (start))); |
| break; |
| default: |
| abort (); |
| } |
| src = s; |
| } |
| |
| if (width < src_size) |
| { |
| rtx pat; |
| int icode; |
| switch (src_mode) |
| { |
| case SImode: |
| icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3; |
| break; |
| case DImode: |
| icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3; |
| break; |
| case TImode: |
| icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3; |
| break; |
| default: |
| abort (); |
| } |
| s = gen_reg_rtx (src_mode); |
| pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width)); |
| emit_insn (pat); |
| src = s; |
| } |
| |
| convert_move (dst, src, unsignedp); |
| } |
| |
| void |
| spu_expand_insv (rtx ops[]) |
| { |
| HOST_WIDE_INT width = INTVAL (ops[1]); |
| HOST_WIDE_INT start = INTVAL (ops[2]); |
| HOST_WIDE_INT maskbits; |
| enum machine_mode dst_mode, src_mode; |
| rtx dst = ops[0], src = ops[3]; |
| int dst_size, src_size; |
| rtx mask; |
| rtx shift_reg; |
| int shift; |
| |
| |
| if (GET_CODE (ops[0]) == MEM) |
| dst = gen_reg_rtx (TImode); |
| else |
| dst = adjust_operand (dst, &start); |
| dst_mode = GET_MODE (dst); |
| dst_size = GET_MODE_BITSIZE (GET_MODE (dst)); |
| |
| if (CONSTANT_P (src)) |
| { |
| enum machine_mode m = |
| (width <= 32 ? SImode : width <= 64 ? DImode : TImode); |
| src = force_reg (m, convert_to_mode (m, src, 0)); |
| } |
| src = adjust_operand (src, 0); |
| src_mode = GET_MODE (src); |
| src_size = GET_MODE_BITSIZE (GET_MODE (src)); |
| |
| mask = gen_reg_rtx (dst_mode); |
| shift_reg = gen_reg_rtx (dst_mode); |
| shift = dst_size - start - width; |
| |
| /* It's not safe to use subreg here because the compiler assumes |
| that the SUBREG_REG is right justified in the SUBREG. */ |
| convert_move (shift_reg, src, 1); |
| |
| if (shift > 0) |
| { |
| switch (dst_mode) |
| { |
| case SImode: |
| emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift))); |
| break; |
| case DImode: |
| emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift))); |
| break; |
| case TImode: |
| emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift))); |
| break; |
| default: |
| abort (); |
| } |
| } |
| else if (shift < 0) |
| abort (); |
| |
| switch (dst_size) |
| { |
| case 32: |
| maskbits = (-1ll << (32 - width - start)); |
| if (start) |
| maskbits += (1ll << (32 - start)); |
| emit_move_insn (mask, GEN_INT (maskbits)); |
| break; |
| case 64: |
| maskbits = (-1ll << (64 - width - start)); |
| if (start) |
| maskbits += (1ll << (64 - start)); |
| emit_move_insn (mask, GEN_INT (maskbits)); |
| break; |
| case 128: |
| { |
| unsigned char arr[16]; |
| int i = start / 8; |
| memset (arr, 0, sizeof (arr)); |
| arr[i] = 0xff >> (start & 7); |
| for (i++; i <= (start + width - 1) / 8; i++) |
| arr[i] = 0xff; |
| arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7)); |
| emit_move_insn (mask, array_to_constant (TImode, arr)); |
| } |
| break; |
| default: |
| abort (); |
| } |
| if (GET_CODE (ops[0]) == MEM) |
| { |
| rtx aligned = gen_reg_rtx (SImode); |
| rtx low = gen_reg_rtx (SImode); |
| rtx addr = gen_reg_rtx (SImode); |
| rtx rotl = gen_reg_rtx (SImode); |
| rtx mask0 = gen_reg_rtx (TImode); |
| rtx mem; |
| |
| emit_move_insn (addr, XEXP (ops[0], 0)); |
| emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16))); |
| emit_insn (gen_andsi3 (low, addr, GEN_INT (15))); |
| emit_insn (gen_negsi2 (rotl, low)); |
| emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl)); |
| emit_insn (gen_rotqmby_ti (mask0, mask, rotl)); |
| mem = change_address (ops[0], TImode, aligned); |
| set_mem_alias_set (mem, 0); |
| emit_move_insn (dst, mem); |
| emit_insn (gen_selb (dst, dst, shift_reg, mask0)); |
| emit_move_insn (mem, dst); |
| if (start + width > MEM_ALIGN (ops[0])) |
| { |
| rtx shl = gen_reg_rtx (SImode); |
| rtx mask1 = gen_reg_rtx (TImode); |
| rtx dst1 = gen_reg_rtx (TImode); |
| rtx mem1; |
| emit_insn (gen_subsi3 (shl, GEN_INT (16), low)); |
| emit_insn (gen_shlqby_ti (mask1, mask, shl)); |
| mem1 = adjust_address (mem, TImode, 16); |
| set_mem_alias_set (mem1, 0); |
| emit_move_insn (dst1, mem1); |
| emit_insn (gen_selb (dst1, dst1, shift_reg, mask1)); |
| emit_move_insn (mem1, dst1); |
| } |
| } |
| else |
| emit_insn (gen_selb (dst, dst, shift_reg, mask)); |
| } |
| |
| |
| int |
| spu_expand_block_move (rtx ops[]) |
| { |
| HOST_WIDE_INT bytes, align, offset; |
| rtx src, dst, sreg, dreg, target; |
| int i; |
| if (GET_CODE (ops[2]) != CONST_INT |
| || GET_CODE (ops[3]) != CONST_INT |
| || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8)) |
| return 0; |
| |
| bytes = INTVAL (ops[2]); |
| align = INTVAL (ops[3]); |
| |
| if (bytes <= 0) |
| return 1; |
| |
| dst = ops[0]; |
| src = ops[1]; |
| |
| if (align == 16) |
| { |
| for (offset = 0; offset + 16 <= bytes; offset += 16) |
| { |
| dst = adjust_address (ops[0], V16QImode, offset); |
| src = adjust_address (ops[1], V16QImode, offset); |
| emit_move_insn (dst, src); |
| } |
| if (offset < bytes) |
| { |
| rtx mask; |
| unsigned char arr[16] = { 0 }; |
| for (i = 0; i < bytes - offset; i++) |
| arr[i] = 0xff; |
| dst = adjust_address (ops[0], V16QImode, offset); |
| src = adjust_address (ops[1], V16QImode, offset); |
| mask = gen_reg_rtx (V16QImode); |
| sreg = gen_reg_rtx (V16QImode); |
| dreg = gen_reg_rtx (V16QImode); |
| target = gen_reg_rtx (V16QImode); |
| emit_move_insn (mask, array_to_constant (V16QImode, arr)); |
| emit_move_insn (dreg, dst); |
| emit_move_insn (sreg, src); |
| emit_insn (gen_selb (target, dreg, sreg, mask)); |
| emit_move_insn (dst, target); |
| } |
| return 1; |
| } |
| return 0; |
| } |
| |
| enum spu_comp_code |
| { SPU_EQ, SPU_GT, SPU_GTU }; |
| |
| |
| int spu_comp_icode[8][3] = { |
| {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, |
| {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, |
| {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, |
| {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, |
| {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, |
| {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, |
| {0, 0, 0}, |
| {CODE_FOR_ceq_vec, 0, 0}, |
| }; |
| |
| /* Generate a compare for CODE. Return a brand-new rtx that represents |
| the result of the compare. GCC can figure this out too if we don't |
| provide all variations of compares, but GCC always wants to use |
| WORD_MODE, we can generate better code in most cases if we do it |
| ourselves. */ |
| void |
| spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) |
| { |
| int reverse_compare = 0; |
| int reverse_test = 0; |
| rtx compare_result; |
| rtx comp_rtx; |
| rtx target = operands[0]; |
| enum machine_mode comp_mode; |
| enum machine_mode op_mode; |
| enum spu_comp_code scode; |
| int index; |
| |
| /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1), |
| and so on, to keep the constant in operand 1. */ |
| if (GET_CODE (spu_compare_op1) == CONST_INT) |
| { |
| HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1; |
| if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val) |
| switch (code) |
| { |
| case GE: |
| spu_compare_op1 = GEN_INT (val); |
| code = GT; |
| break; |
| case LT: |
| spu_compare_op1 = GEN_INT (val); |
| code = LE; |
| break; |
| case GEU: |
| spu_compare_op1 = GEN_INT (val); |
| code = GTU; |
| break; |
| case LTU: |
| spu_compare_op1 = GEN_INT (val); |
| code = LEU; |
| break; |
| default: |
| break; |
| } |
| } |
| |
| switch (code) |
| { |
| case GE: |
| reverse_compare = 1; |
| reverse_test = 1; |
| scode = SPU_GT; |
| break; |
| case LE: |
| reverse_compare = 0; |
| reverse_test = 1; |
| scode = SPU_GT; |
| break; |
| case LT: |
| reverse_compare = 1; |
| reverse_test = 0; |
| scode = SPU_GT; |
| break; |
| case GEU: |
| reverse_compare = 1; |
| reverse_test = 1; |
| scode = SPU_GTU; |
| break; |
| case LEU: |
| reverse_compare = 0; |
| reverse_test = 1; |
| scode = SPU_GTU; |
| break; |
| case LTU: |
| reverse_compare = 1; |
| reverse_test = 0; |
| scode = SPU_GTU; |
| break; |
| case NE: |
| reverse_compare = 0; |
| reverse_test = 1; |
| scode = SPU_EQ; |
| break; |
| |
| case EQ: |
| scode = SPU_EQ; |
| break; |
| case GT: |
| scode = SPU_GT; |
| break; |
| case GTU: |
| scode = SPU_GTU; |
| break; |
| default: |
| scode = SPU_EQ; |
| break; |
| } |
| |
| comp_mode = SImode; |
| op_mode = GET_MODE (spu_compare_op0); |
| |
| switch (op_mode) |
| { |
| case QImode: |
| index = 0; |
| comp_mode = QImode; |
| break; |
| case HImode: |
| index = 1; |
| comp_mode = HImode; |
| break; |
| case SImode: |
| index = 2; |
| break; |
| case DImode: |
| index = 3; |
| break; |
| case TImode: |
| index = 4; |
| break; |
| case SFmode: |
| index = 5; |
| break; |
| case DFmode: |
| index = 6; |
| break; |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| index = 7; |
| break; |
| default: |
| abort (); |
| } |
| |
| if (GET_MODE (spu_compare_op1) == DFmode) |
| { |
| rtx reg = gen_reg_rtx (DFmode); |
| if (!flag_unsafe_math_optimizations |
| || (scode != SPU_GT && scode != SPU_EQ)) |
| abort (); |
| if (reverse_compare) |
| emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0)); |
| else |
| emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1)); |
| reverse_compare = 0; |
| spu_compare_op0 = reg; |
| spu_compare_op1 = CONST0_RTX (DFmode); |
| } |
| |
| if (is_set == 0 && spu_compare_op1 == const0_rtx |
| && (GET_MODE (spu_compare_op0) == SImode |
| || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ) |
| { |
| /* Don't need to set a register with the result when we are |
| comparing against zero and branching. */ |
| reverse_test = !reverse_test; |
| compare_result = spu_compare_op0; |
| } |
| else |
| { |
| compare_result = gen_reg_rtx (comp_mode); |
| |
| if (reverse_compare) |
| { |
| rtx t = spu_compare_op1; |
| spu_compare_op1 = spu_compare_op0; |
| spu_compare_op0 = t; |
| } |
| |
| if (spu_comp_icode[index][scode] == 0) |
| abort (); |
| |
| if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate) |
| (spu_compare_op0, op_mode)) |
| spu_compare_op0 = force_reg (op_mode, spu_compare_op0); |
| if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate) |
| (spu_compare_op1, op_mode)) |
| spu_compare_op1 = force_reg (op_mode, spu_compare_op1); |
| comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result, |
| spu_compare_op0, |
| spu_compare_op1); |
| if (comp_rtx == 0) |
| abort (); |
| emit_insn (comp_rtx); |
| |
| } |
| |
| if (is_set == 0) |
| { |
| rtx bcomp; |
| rtx loc_ref; |
| |
| /* We don't have branch on QI compare insns, so we convert the |
| QI compare result to a HI result. */ |
| if (comp_mode == QImode) |
| { |
| rtx old_res = compare_result; |
| compare_result = gen_reg_rtx (HImode); |
| comp_mode = HImode; |
| emit_insn (gen_extendqihi2 (compare_result, old_res)); |
| } |
| |
| if (reverse_test) |
| bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx); |
| else |
| bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx); |
| |
| loc_ref = gen_rtx_LABEL_REF (VOIDmode, target); |
| emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, |
| gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, |
| loc_ref, pc_rtx))); |
| } |
| else if (is_set == 2) |
| { |
| int compare_size = GET_MODE_BITSIZE (comp_mode); |
| int target_size = GET_MODE_BITSIZE (GET_MODE (target)); |
| enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0); |
| rtx select_mask; |
| rtx op_t = operands[2]; |
| rtx op_f = operands[3]; |
| |
| /* The result of the comparison can be SI, HI or QI mode. Create a |
| mask based on that result. */ |
| if (target_size > compare_size) |
| { |
| select_mask = gen_reg_rtx (mode); |
| emit_insn (gen_extend_compare (select_mask, compare_result)); |
| } |
| else if (target_size < compare_size) |
| select_mask = |
| gen_rtx_SUBREG (mode, compare_result, |
| (compare_size - target_size) / BITS_PER_UNIT); |
| else if (comp_mode != mode) |
| select_mask = gen_rtx_SUBREG (mode, compare_result, 0); |
| else |
| select_mask = compare_result; |
| |
| if (GET_MODE (target) != GET_MODE (op_t) |
| || GET_MODE (target) != GET_MODE (op_f)) |
| abort (); |
| |
| if (reverse_test) |
| emit_insn (gen_selb (target, op_t, op_f, select_mask)); |
| else |
| emit_insn (gen_selb (target, op_f, op_t, select_mask)); |
| } |
| else |
| { |
| if (reverse_test) |
| emit_insn (gen_rtx_SET (VOIDmode, compare_result, |
| gen_rtx_NOT (comp_mode, compare_result))); |
| if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode) |
| emit_insn (gen_extendhisi2 (target, compare_result)); |
| else if (GET_MODE (target) == SImode |
| && GET_MODE (compare_result) == QImode) |
| emit_insn (gen_extend_compare (target, compare_result)); |
| else |
| emit_move_insn (target, compare_result); |
| } |
| } |
| |
| HOST_WIDE_INT |
| const_double_to_hwint (rtx x) |
| { |
| HOST_WIDE_INT val; |
| REAL_VALUE_TYPE rv; |
| if (GET_MODE (x) == SFmode) |
| { |
| REAL_VALUE_FROM_CONST_DOUBLE (rv, x); |
| REAL_VALUE_TO_TARGET_SINGLE (rv, val); |
| } |
| else if (GET_MODE (x) == DFmode) |
| { |
| long l[2]; |
| REAL_VALUE_FROM_CONST_DOUBLE (rv, x); |
| REAL_VALUE_TO_TARGET_DOUBLE (rv, l); |
| val = l[0]; |
| val = (val << 32) | (l[1] & 0xffffffff); |
| } |
| else |
| abort (); |
| return val; |
| } |
| |
| rtx |
| hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v) |
| { |
| long tv[2]; |
| REAL_VALUE_TYPE rv; |
| gcc_assert (mode == SFmode || mode == DFmode); |
| |
| if (mode == SFmode) |
| tv[0] = (v << 32) >> 32; |
| else if (mode == DFmode) |
| { |
| tv[1] = (v << 32) >> 32; |
| tv[0] = v >> 32; |
| } |
| real_from_target (&rv, tv, mode); |
| return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode); |
| } |
| |
| void |
| print_operand_address (FILE * file, register rtx addr) |
| { |
| rtx reg; |
| rtx offset; |
| |
| if (GET_CODE (addr) == AND |
| && GET_CODE (XEXP (addr, 1)) == CONST_INT |
| && INTVAL (XEXP (addr, 1)) == -16) |
| addr = XEXP (addr, 0); |
| |
| switch (GET_CODE (addr)) |
| { |
| case REG: |
| fprintf (file, "0(%s)", reg_names[REGNO (addr)]); |
| break; |
| |
| case PLUS: |
| reg = XEXP (addr, 0); |
| offset = XEXP (addr, 1); |
| if (GET_CODE (offset) == REG) |
| { |
| fprintf (file, "%s,%s", reg_names[REGNO (reg)], |
| reg_names[REGNO (offset)]); |
| } |
| else if (GET_CODE (offset) == CONST_INT) |
| { |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", |
| INTVAL (offset), reg_names[REGNO (reg)]); |
| } |
| else |
| abort (); |
| break; |
| |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| case CONST_INT: |
| output_addr_const (file, addr); |
| break; |
| |
| default: |
| debug_rtx (addr); |
| abort (); |
| } |
| } |
| |
| void |
| print_operand (FILE * file, rtx x, int code) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| HOST_WIDE_INT val; |
| unsigned char arr[16]; |
| int xcode = GET_CODE (x); |
| int i, info; |
| if (GET_MODE (x) == VOIDmode) |
| switch (code) |
| { |
| case 'L': /* 128 bits, signed */ |
| case 'm': /* 128 bits, signed */ |
| case 'T': /* 128 bits, signed */ |
| case 't': /* 128 bits, signed */ |
| mode = TImode; |
| break; |
| case 'K': /* 64 bits, signed */ |
| case 'k': /* 64 bits, signed */ |
| case 'D': /* 64 bits, signed */ |
| case 'd': /* 64 bits, signed */ |
| mode = DImode; |
| break; |
| case 'J': /* 32 bits, signed */ |
| case 'j': /* 32 bits, signed */ |
| case 's': /* 32 bits, signed */ |
| case 'S': /* 32 bits, signed */ |
| mode = SImode; |
| break; |
| } |
| switch (code) |
| { |
| |
| case 'j': /* 32 bits, signed */ |
| case 'k': /* 64 bits, signed */ |
| case 'm': /* 128 bits, signed */ |
| if (xcode == CONST_INT |
| || xcode == CONST_DOUBLE || xcode == CONST_VECTOR) |
| { |
| gcc_assert (logical_immediate_p (x, mode)); |
| constant_to_array (mode, x, arr); |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| switch (which_logical_immediate (val)) |
| { |
| case SPU_ORI: |
| break; |
| case SPU_ORHI: |
| fprintf (file, "h"); |
| break; |
| case SPU_ORBI: |
| fprintf (file, "b"); |
| break; |
| default: |
| gcc_unreachable(); |
| } |
| } |
| else |
| gcc_unreachable(); |
| return; |
| |
| case 'J': /* 32 bits, signed */ |
| case 'K': /* 64 bits, signed */ |
| case 'L': /* 128 bits, signed */ |
| if (xcode == CONST_INT |
| || xcode == CONST_DOUBLE || xcode == CONST_VECTOR) |
| { |
| gcc_assert (logical_immediate_p (x, mode) |
| || iohl_immediate_p (x, mode)); |
| constant_to_array (mode, x, arr); |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| switch (which_logical_immediate (val)) |
| { |
| case SPU_ORI: |
| case SPU_IOHL: |
| break; |
| case SPU_ORHI: |
| val = trunc_int_for_mode (val, HImode); |
| break; |
| case SPU_ORBI: |
| val = trunc_int_for_mode (val, QImode); |
| break; |
| default: |
| gcc_unreachable(); |
| } |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); |
| } |
| else |
| gcc_unreachable(); |
| return; |
| |
| case 't': /* 128 bits, signed */ |
| case 'd': /* 64 bits, signed */ |
| case 's': /* 32 bits, signed */ |
| if (CONSTANT_P (x)) |
| { |
| enum immediate_class c = classify_immediate (x, mode); |
| switch (c) |
| { |
| case IC_IL1: |
| constant_to_array (mode, x, arr); |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| switch (which_immediate_load (val)) |
| { |
| case SPU_IL: |
| break; |
| case SPU_ILA: |
| fprintf (file, "a"); |
| break; |
| case SPU_ILH: |
| fprintf (file, "h"); |
| break; |
| case SPU_ILHU: |
| fprintf (file, "hu"); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| case IC_CPAT: |
| constant_to_array (mode, x, arr); |
| cpat_info (arr, GET_MODE_SIZE (mode), &info, 0); |
| if (info == 1) |
| fprintf (file, "b"); |
| else if (info == 2) |
| fprintf (file, "h"); |
| else if (info == 4) |
| fprintf (file, "w"); |
| else if (info == 8) |
| fprintf (file, "d"); |
| break; |
| case IC_IL1s: |
| if (xcode == CONST_VECTOR) |
| { |
| x = CONST_VECTOR_ELT (x, 0); |
| xcode = GET_CODE (x); |
| } |
| if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST) |
| fprintf (file, "a"); |
| else if (xcode == HIGH) |
| fprintf (file, "hu"); |
| break; |
| case IC_FSMBI: |
| case IC_IL2: |
| case IC_IL2s: |
| case IC_POOL: |
| abort (); |
| } |
| } |
| else |
| gcc_unreachable (); |
| return; |
| |
| case 'T': /* 128 bits, signed */ |
| case 'D': /* 64 bits, signed */ |
| case 'S': /* 32 bits, signed */ |
| if (CONSTANT_P (x)) |
| { |
| enum immediate_class c = classify_immediate (x, mode); |
| switch (c) |
| { |
| case IC_IL1: |
| constant_to_array (mode, x, arr); |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| switch (which_immediate_load (val)) |
| { |
| case SPU_IL: |
| case SPU_ILA: |
| break; |
| case SPU_ILH: |
| case SPU_ILHU: |
| val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); |
| break; |
| case IC_FSMBI: |
| constant_to_array (mode, x, arr); |
| val = 0; |
| for (i = 0; i < 16; i++) |
| { |
| val <<= 1; |
| val |= arr[i] & 1; |
| } |
| print_operand (file, GEN_INT (val), 0); |
| break; |
| case IC_CPAT: |
| constant_to_array (mode, x, arr); |
| cpat_info (arr, GET_MODE_SIZE (mode), 0, &info); |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info); |
| break; |
| case IC_IL1s: |
| if (xcode == CONST_VECTOR) |
| { |
| x = CONST_VECTOR_ELT (x, 0); |
| xcode = GET_CODE (x); |
| } |
| if (xcode == HIGH) |
| { |
| output_addr_const (file, XEXP (x, 0)); |
| fprintf (file, "@h"); |
| } |
| else |
| output_addr_const (file, x); |
| break; |
| case IC_IL2: |
| case IC_IL2s: |
| case IC_POOL: |
| abort (); |
| } |
| } |
| else |
| gcc_unreachable (); |
| return; |
| |
| case 'C': |
| if (xcode == CONST_INT) |
| { |
| /* Only 4 least significant bits are relevant for generate |
| control word instructions. */ |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15); |
| return; |
| } |
| break; |
| |
| case 'M': /* print code for c*d */ |
| if (GET_CODE (x) == CONST_INT) |
| switch (INTVAL (x)) |
| { |
| case 1: |
| fprintf (file, "b"); |
| break; |
| case 2: |
| fprintf (file, "h"); |
| break; |
| case 4: |
| fprintf (file, "w"); |
| break; |
| case 8: |
| fprintf (file, "d"); |
| break; |
| default: |
| gcc_unreachable(); |
| } |
| else |
| gcc_unreachable(); |
| return; |
| |
| case 'N': /* Negate the operand */ |
| if (xcode == CONST_INT) |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x)); |
| else if (xcode == CONST_VECTOR) |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, |
| -INTVAL (CONST_VECTOR_ELT (x, 0))); |
| return; |
| |
| case 'I': /* enable/disable interrupts */ |
| if (xcode == CONST_INT) |
| fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e"); |
| return; |
| |
| case 'b': /* branch modifiers */ |
| if (xcode == REG) |
| fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : ""); |
| else if (COMPARISON_P (x)) |
| fprintf (file, "%s", xcode == NE ? "n" : ""); |
| return; |
| |
| case 'i': /* indirect call */ |
| if (xcode == MEM) |
| { |
| if (GET_CODE (XEXP (x, 0)) == REG) |
| /* Used in indirect function calls. */ |
| fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]); |
| else |
| output_address (XEXP (x, 0)); |
| } |
| return; |
| |
| case 'p': /* load/store */ |
| if (xcode == MEM) |
| { |
| x = XEXP (x, 0); |
| xcode = GET_CODE (x); |
| } |
| if (xcode == AND) |
| { |
| x = XEXP (x, 0); |
| xcode = GET_CODE (x); |
| } |
| if (xcode == REG) |
| fprintf (file, "d"); |
| else if (xcode == CONST_INT) |
| fprintf (file, "a"); |
| else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF) |
| fprintf (file, "r"); |
| else if (xcode == PLUS || xcode == LO_SUM) |
| { |
| if (GET_CODE (XEXP (x, 1)) == REG) |
| fprintf (file, "x"); |
| else |
| fprintf (file, "d"); |
| } |
| return; |
| |
| case 0: |
| if (xcode == REG) |
| fprintf (file, "%s", reg_names[REGNO (x)]); |
| else if (xcode == MEM) |
| output_address (XEXP (x, 0)); |
| else if (xcode == CONST_VECTOR) |
| print_operand (file, CONST_VECTOR_ELT (x, 0), 0); |
| else |
| output_addr_const (file, x); |
| return; |
| |
| default: |
| output_operand_lossage ("invalid %%xn code"); |
| } |
| gcc_unreachable (); |
| } |
| |
| extern char call_used_regs[]; |
| extern char regs_ever_live[]; |
| |
| /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a |
| caller saved register. For leaf functions it is more efficient to |
| use a volatile register because we won't need to save and restore the |
| pic register. This routine is only valid after register allocation |
| is completed, so we can pick an unused register. */ |
| static rtx |
| get_pic_reg (void) |
| { |
| rtx pic_reg = pic_offset_table_rtx; |
| if (!reload_completed && !reload_in_progress) |
| abort (); |
| return pic_reg; |
| } |
| |
| /* Split constant addresses to handle cases that are too large. Also, add in |
| the pic register when in PIC mode. */ |
| int |
| spu_split_immediate (rtx * ops) |
| { |
| enum machine_mode mode = GET_MODE (ops[0]); |
| enum immediate_class c = classify_immediate (ops[1], mode); |
| |
| switch (c) |
| { |
| case IC_IL2: |
| { |
| unsigned char arrhi[16]; |
| unsigned char arrlo[16]; |
| rtx to, hi, lo; |
| int i; |
| constant_to_array (mode, ops[1], arrhi); |
| to = no_new_pseudos ? ops[0] : gen_reg_rtx (mode); |
| for (i = 0; i < 16; i += 4) |
| { |
| arrlo[i + 2] = arrhi[i + 2]; |
| arrlo[i + 3] = arrhi[i + 3]; |
| arrlo[i + 0] = arrlo[i + 1] = 0; |
| arrhi[i + 2] = arrhi[i + 3] = 0; |
| } |
| hi = array_to_constant (mode, arrhi); |
| lo = array_to_constant (mode, arrlo); |
| emit_move_insn (to, hi); |
| emit_insn (gen_rtx_SET |
| (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo))); |
| return 1; |
| } |
| case IC_POOL: |
| if (reload_in_progress || reload_completed) |
| { |
| rtx mem = force_const_mem (mode, ops[1]); |
| if (TARGET_LARGE_MEM) |
| { |
| rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0])); |
| emit_move_insn (addr, XEXP (mem, 0)); |
| mem = replace_equiv_address (mem, addr); |
| } |
| emit_move_insn (ops[0], mem); |
| return 1; |
| } |
| break; |
| case IC_IL1s: |
| case IC_IL2s: |
| if (reload_completed && GET_CODE (ops[1]) != HIGH) |
| { |
| if (c == IC_IL2s) |
| { |
| emit_insn (gen_high (ops[0], ops[1])); |
| emit_insn (gen_low (ops[0], ops[0], ops[1])); |
| } |
| else if (flag_pic) |
| emit_insn (gen_pic (ops[0], ops[1])); |
| if (flag_pic) |
| { |
| rtx pic_reg = get_pic_reg (); |
| emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg)); |
| current_function_uses_pic_offset_table = 1; |
| } |
| return flag_pic || c == IC_IL2s; |
| } |
| break; |
| case IC_IL1: |
| case IC_FSMBI: |
| case IC_CPAT: |
| break; |
| } |
| return 0; |
| } |
| |
| /* SAVING is TRUE when we are generating the actual load and store |
| instructions for REGNO. When determining the size of the stack |
| needed for saving register we must allocate enough space for the |
| worst case, because we don't always have the information early enough |
| to not allocate it. But we can at least eliminate the actual loads |
| and stores during the prologue/epilogue. */ |
| static int |
| need_to_save_reg (int regno, int saving) |
| { |
| if (regs_ever_live[regno] && !call_used_regs[regno]) |
| return 1; |
| if (flag_pic |
| && regno == PIC_OFFSET_TABLE_REGNUM |
| && (!saving || current_function_uses_pic_offset_table) |
| && (!saving |
| || !current_function_is_leaf || regs_ever_live[LAST_ARG_REGNUM])) |
| return 1; |
| return 0; |
| } |
| |
| /* This function is only correct starting with local register |
| allocation */ |
| int |
| spu_saved_regs_size (void) |
| { |
| int reg_save_size = 0; |
| int regno; |
| |
| for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno) |
| if (need_to_save_reg (regno, 0)) |
| reg_save_size += 0x10; |
| return reg_save_size; |
| } |
| |
| static rtx |
| frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset) |
| { |
| rtx reg = gen_rtx_REG (V4SImode, regno); |
| rtx mem = |
| gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset))); |
| return emit_insn (gen_movv4si (mem, reg)); |
| } |
| |
| static rtx |
| frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset) |
| { |
| rtx reg = gen_rtx_REG (V4SImode, regno); |
| rtx mem = |
| gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset))); |
| return emit_insn (gen_movv4si (reg, mem)); |
| } |
| |
| /* This happens after reload, so we need to expand it. */ |
| static rtx |
| frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch) |
| { |
| rtx insn; |
| if (satisfies_constraint_K (GEN_INT (imm))) |
| { |
| insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm))); |
| } |
| else |
| { |
| insn = emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode))); |
| REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, |
| REG_NOTES (insn)); |
| insn = emit_insn (gen_addsi3 (dst, src, scratch)); |
| if (REGNO (src) == REGNO (scratch)) |
| abort (); |
| } |
| if (REGNO (dst) == REGNO (scratch)) |
| REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, |
| REG_NOTES (insn)); |
| return insn; |
| } |
| |
| /* Return nonzero if this function is known to have a null epilogue. */ |
| |
| int |
| direct_return (void) |
| { |
| if (reload_completed) |
| { |
| if (cfun->static_chain_decl == 0 |
| && (spu_saved_regs_size () |
| + get_frame_size () |
| + current_function_outgoing_args_size |
| + current_function_pretend_args_size == 0) |
| && current_function_is_leaf) |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* |
| The stack frame looks like this: |
| +-------------+ |
| | incoming | |
| AP | args | |
| +-------------+ |
| | $lr save | |
| +-------------+ |
| prev SP | back chain | |
| +-------------+ |
| | var args | |
| | reg save | current_function_pretend_args_size bytes |
| +-------------+ |
| | ... | |
| | saved regs | spu_saved_regs_size() bytes |
| +-------------+ |
| | ... | |
| FP | vars | get_frame_size() bytes |
| +-------------+ |
| | ... | |
| | outgoing | |
| | args | current_function_outgoing_args_size bytes |
| +-------------+ |
| | $lr of next | |
| | frame | |
| +-------------+ |
| SP | back chain | |
| +-------------+ |
| |
| */ |
| void |
| spu_expand_prologue (void) |
| { |
| HOST_WIDE_INT size = get_frame_size (), offset, regno; |
| HOST_WIDE_INT total_size; |
| HOST_WIDE_INT saved_regs_size; |
| rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); |
| rtx scratch_reg_0, scratch_reg_1; |
| rtx insn, real; |
| |
| /* A NOTE_INSN_DELETED is supposed to be at the start and end of |
| the "toplevel" insn chain. */ |
| emit_note (NOTE_INSN_DELETED); |
| |
| if (flag_pic && optimize == 0) |
| current_function_uses_pic_offset_table = 1; |
| |
| if (spu_naked_function_p (current_function_decl)) |
| return; |
| |
| scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1); |
| scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2); |
| |
| saved_regs_size = spu_saved_regs_size (); |
| total_size = size + saved_regs_size |
| + current_function_outgoing_args_size |
| + current_function_pretend_args_size; |
| |
| if (!current_function_is_leaf |
| || current_function_calls_alloca || total_size > 0) |
| total_size += STACK_POINTER_OFFSET; |
| |
| /* Save this first because code after this might use the link |
| register as a scratch register. */ |
| if (!current_function_is_leaf) |
| { |
| insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| if (total_size > 0) |
| { |
| offset = -current_function_pretend_args_size; |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) |
| if (need_to_save_reg (regno, 1)) |
| { |
| offset -= 16; |
| insn = frame_emit_store (regno, sp_reg, offset); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| if (flag_pic && current_function_uses_pic_offset_table) |
| { |
| rtx pic_reg = get_pic_reg (); |
| insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0)); |
| REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, |
| REG_NOTES (insn)); |
| insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0)); |
| REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, |
| REG_NOTES (insn)); |
| } |
| |
| if (total_size > 0) |
| { |
| if (flag_stack_check) |
| { |
| /* We compare against total_size-1 because |
| ($sp >= total_size) <=> ($sp > total_size-1) */ |
| rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0)); |
| rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); |
| rtx size_v4si = spu_const (V4SImode, total_size - 1); |
| if (!satisfies_constraint_K (GEN_INT (total_size - 1))) |
| { |
| emit_move_insn (scratch_v4si, size_v4si); |
| size_v4si = scratch_v4si; |
| } |
| emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si)); |
| emit_insn (gen_vec_extractv4si |
| (scratch_reg_0, scratch_v4si, GEN_INT (1))); |
| emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0))); |
| } |
| |
| /* Adjust the stack pointer, and make sure scratch_reg_0 contains |
| the value of the previous $sp because we save it as the back |
| chain. */ |
| if (total_size <= 2000) |
| { |
| /* In this case we save the back chain first. */ |
| insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size); |
| insn = |
| frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0); |
| } |
| else if (satisfies_constraint_K (GEN_INT (-total_size))) |
| { |
| insn = emit_move_insn (scratch_reg_0, sp_reg); |
| insn = |
| emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size))); |
| } |
| else |
| { |
| insn = emit_move_insn (scratch_reg_0, sp_reg); |
| insn = |
| frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1); |
| } |
| RTX_FRAME_RELATED_P (insn) = 1; |
| real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)); |
| REG_NOTES (insn) = |
| gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn)); |
| |
| if (total_size > 2000) |
| { |
| /* Save the back chain ptr */ |
| insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0); |
| } |
| |
| if (frame_pointer_needed) |
| { |
| rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); |
| HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET |
| + current_function_outgoing_args_size; |
| /* Set the new frame_pointer */ |
| insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset)); |
| REG_NOTES (insn) = |
| gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, |
| real, REG_NOTES (insn)); |
| } |
| } |
| |
| emit_note (NOTE_INSN_DELETED); |
| } |
| |
| void |
| spu_expand_epilogue (bool sibcall_p) |
| { |
| int size = get_frame_size (), offset, regno; |
| HOST_WIDE_INT saved_regs_size, total_size; |
| rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); |
| rtx jump, scratch_reg_0; |
| |
| /* A NOTE_INSN_DELETED is supposed to be at the start and end of |
| the "toplevel" insn chain. */ |
| emit_note (NOTE_INSN_DELETED); |
| |
| if (spu_naked_function_p (current_function_decl)) |
| return; |
| |
| scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1); |
| |
| saved_regs_size = spu_saved_regs_size (); |
| total_size = size + saved_regs_size |
| + current_function_outgoing_args_size |
| + current_function_pretend_args_size; |
| |
| if (!current_function_is_leaf |
| || current_function_calls_alloca || total_size > 0) |
| total_size += STACK_POINTER_OFFSET; |
| |
| if (total_size > 0) |
| { |
| if (current_function_calls_alloca) |
| /* Load it from the back chain because our save_stack_block and |
| restore_stack_block do nothing. */ |
| frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0); |
| else |
| frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0); |
| |
| |
| if (saved_regs_size > 0) |
| { |
| offset = -current_function_pretend_args_size; |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) |
| if (need_to_save_reg (regno, 1)) |
| { |
| offset -= 0x10; |
| frame_emit_load (regno, sp_reg, offset); |
| } |
| } |
| } |
| |
| if (!current_function_is_leaf) |
| frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16); |
| |
| if (!sibcall_p) |
| { |
| emit_insn (gen_rtx_USE |
| (VOIDmode, gen_rtx_REG (SImode, LINK_REGISTER_REGNUM))); |
| jump = emit_jump_insn (gen__return ()); |
| emit_barrier_after (jump); |
| } |
| |
| emit_note (NOTE_INSN_DELETED); |
| } |
| |
| rtx |
| spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) |
| { |
| if (count != 0) |
| return 0; |
| /* This is inefficient because it ends up copying to a save-register |
| which then gets saved even though $lr has already been saved. But |
| it does generate better code for leaf functions and we don't need |
| to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only |
| used for __builtin_return_address anyway, so maybe we don't care if |
| it's inefficient. */ |
| return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM); |
| } |
| |
| |
| /* Given VAL, generate a constant appropriate for MODE. |
| If MODE is a vector mode, every element will be VAL. |
| For TImode, VAL will be zero extended to 128 bits. */ |
| rtx |
| spu_const (enum machine_mode mode, HOST_WIDE_INT val) |
| { |
| rtx inner; |
| rtvec v; |
| int units, i; |
| |
| gcc_assert (GET_MODE_CLASS (mode) == MODE_INT |
| || GET_MODE_CLASS (mode) == MODE_FLOAT |
| || GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
| || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT); |
| |
| if (GET_MODE_CLASS (mode) == MODE_INT) |
| return immed_double_const (val, 0, mode); |
| |
| /* val is the bit representation of the float */ |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| return hwint_to_const_double (mode, val); |
| |
| if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| inner = immed_double_const (val, 0, GET_MODE_INNER (mode)); |
| else |
| inner = hwint_to_const_double (GET_MODE_INNER (mode), val); |
| |
| units = GET_MODE_NUNITS (mode); |
| |
| v = rtvec_alloc (units); |
| |
| for (i = 0; i < units; ++i) |
| RTVEC_ELT (v, i) = inner; |
| |
| return gen_rtx_CONST_VECTOR (mode, v); |
| } |
| |
| /* branch hint stuff */ |
| |
| /* The hardware requires 8 insns between a hint and the branch it |
| effects. This variable describes how many rtl instructions the |
| compiler needs to see before inserting a hint. (FIXME: We should |
| accept less and insert nops to enforce it because hinting is always |
| profitable for performance, but we do need to be careful of code |
| size.) */ |
| int spu_hint_dist = (8 * 4); |
| |
| /* An array of these is used to propagate hints to predecessor blocks. */ |
| struct spu_bb_info |
| { |
| rtx prop_jump; /* propagated from another block */ |
| basic_block bb; /* the original block. */ |
| }; |
| |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| /* The special $hbr register is used to prevent the insn scheduler from |
| moving hbr insns across instructions which invalidate them. It |
| should only be used in a clobber, and this function searches for |
| insns which clobber it. */ |
| static bool |
| insn_clobbers_hbr (rtx insn) |
| { |
| if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL) |
| { |
| rtx parallel = PATTERN (insn); |
| rtx clobber; |
| int j; |
| for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--) |
| { |
| clobber = XVECEXP (parallel, 0, j); |
| if (GET_CODE (clobber) == CLOBBER |
| && GET_CODE (XEXP (clobber, 0)) == REG |
| && REGNO (XEXP (clobber, 0)) == HBR_REGNUM) |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| static void |
| spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance) |
| { |
| rtx branch_label; |
| rtx hint, insn, prev, next; |
| |
| if (before == 0 || branch == 0 || target == 0) |
| return; |
| |
| if (distance > 600) |
| return; |
| |
| |
| branch_label = gen_label_rtx (); |
| LABEL_NUSES (branch_label)++; |
| LABEL_PRESERVE_P (branch_label) = 1; |
| insn = emit_label_before (branch_label, branch); |
| branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label); |
| |
| /* If the previous insn is pipe0, make the hbr dual issue with it. If |
| the current insn is pipe0, dual issue with it. */ |
| prev = prev_active_insn (before); |
| if (prev && get_pipe (prev) == 0) |
| hint = emit_insn_before (gen_hbr (branch_label, target), before); |
| else if (get_pipe (before) == 0 && distance > spu_hint_dist) |
| { |
| next = next_active_insn (before); |
| hint = emit_insn_after (gen_hbr (branch_label, target), before); |
| if (next) |
| PUT_MODE (next, TImode); |
| } |
| else |
| { |
| hint = emit_insn_before (gen_hbr (branch_label, target), before); |
| PUT_MODE (hint, TImode); |
| } |
| recog_memoized (hint); |
| } |
| |
| /* Returns 0 if we don't want a hint for this branch. Otherwise return |
| the rtx for the branch target. */ |
| static rtx |
| get_branch_target (rtx branch) |
| { |
| if (GET_CODE (branch) == JUMP_INSN) |
| { |
| rtx set, src; |
| |
| /* Return statements */ |
| if (GET_CODE (PATTERN (branch)) == RETURN) |
| return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM); |
| |
| /* jump table */ |
| if (GET_CODE (PATTERN (branch)) == ADDR_VEC |
| || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC) |
| return 0; |
| |
| set = single_set (branch); |
| src = SET_SRC (set); |
| if (GET_CODE (SET_DEST (set)) != PC) |
| abort (); |
| |
| if (GET_CODE (src) == IF_THEN_ELSE) |
| { |
| rtx lab = 0; |
| rtx note = find_reg_note (branch, REG_BR_PROB, 0); |
| if (note) |
| { |
| /* If the more probable case is not a fall through, then |
| try a branch hint. */ |
| HOST_WIDE_INT prob = INTVAL (XEXP (note, 0)); |
| if (prob > (REG_BR_PROB_BASE * 6 / 10) |
| && GET_CODE (XEXP (src, 1)) != PC) |
| lab = XEXP (src, 1); |
| else if (prob < (REG_BR_PROB_BASE * 4 / 10) |
| && GET_CODE (XEXP (src, 2)) != PC) |
| lab = XEXP (src, 2); |
| } |
| if (lab) |
| { |
| if (GET_CODE (lab) == RETURN) |
| return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM); |
| return lab; |
| } |
| return 0; |
| } |
| |
| return src; |
| } |
| else if (GET_CODE (branch) == CALL_INSN) |
| { |
| rtx call; |
| /* All of our call patterns are in a PARALLEL and the CALL is |
| the first pattern in the PARALLEL. */ |
| if (GET_CODE (PATTERN (branch)) != PARALLEL) |
| abort (); |
| call = XVECEXP (PATTERN (branch), 0, 0); |
| if (GET_CODE (call) == SET) |
| call = SET_SRC (call); |
| if (GET_CODE (call) != CALL) |
| abort (); |
| return XEXP (XEXP (call, 0), 0); |
| } |
| return 0; |
| } |
| #endif |
| /* LLVM LOCAL end */ |
| |
| static void |
| insert_branch_hints (void) |
| { |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| struct spu_bb_info *spu_bb_info; |
| rtx branch, insn, next; |
| rtx branch_target = 0; |
| int branch_addr = 0, insn_addr, head_addr; |
| basic_block bb; |
| unsigned int j; |
| |
| spu_bb_info = |
| (struct spu_bb_info *) xcalloc (last_basic_block + 1, |
| sizeof (struct spu_bb_info)); |
| |
| /* We need exact insn addresses and lengths. */ |
| shorten_branches (get_insns ()); |
| |
| FOR_EACH_BB_REVERSE (bb) |
| { |
| head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb))); |
| branch = 0; |
| if (spu_bb_info[bb->index].prop_jump) |
| { |
| branch = spu_bb_info[bb->index].prop_jump; |
| branch_target = get_branch_target (branch); |
| branch_addr = INSN_ADDRESSES (INSN_UID (branch)); |
| } |
| /* Search from end of a block to beginning. In this loop, find |
| jumps which need a branch and emit them only when: |
| - it's an indirect branch and we're at the insn which sets |
| the register |
| - we're at an insn that will invalidate the hint. e.g., a |
| call, another hint insn, inline asm that clobbers $hbr, and |
| some inlined operations (divmodsi4). Don't consider jumps |
| because they are only at the end of a block and are |
| considered when we are deciding whether to propagate |
| - we're getting too far away from the branch. The hbr insns |
| only have a signed 10 bit offset |
| We go back as far as possible so the branch will be considered |
| for propagation when we get to the beginning of the block. */ |
| next = 0; |
| for (insn = BB_END (bb); insn; insn = PREV_INSN (insn)) |
| { |
| if (INSN_P (insn)) |
| { |
| insn_addr = INSN_ADDRESSES (INSN_UID (insn)); |
| if (branch && next |
| && ((GET_CODE (branch_target) == REG |
| && set_of (branch_target, insn) != NULL_RTX) |
| || insn_clobbers_hbr (insn) |
| || branch_addr - insn_addr > 600)) |
| { |
| int next_addr = INSN_ADDRESSES (INSN_UID (next)); |
| if (insn != BB_END (bb) |
| && branch_addr - next_addr >= spu_hint_dist) |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| "hint for %i in block %i before %i\n", |
| INSN_UID (branch), bb->index, INSN_UID (next)); |
| spu_emit_branch_hint (next, branch, branch_target, |
| branch_addr - next_addr); |
| } |
| branch = 0; |
| } |
| |
| /* JUMP_P will only be true at the end of a block. When |
| branch is already set it means we've previously decided |
| to propagate a hint for that branch into this block. */ |
| if (CALL_P (insn) || (JUMP_P (insn) && !branch)) |
| { |
| branch = 0; |
| if ((branch_target = get_branch_target (insn))) |
| { |
| branch = insn; |
| branch_addr = insn_addr; |
| } |
| } |
| |
| /* When a branch hint is emitted it will be inserted |
| before "next". Make sure next is the beginning of a |
| cycle to minimize impact on the scheduled insns. */ |
| if (GET_MODE (insn) == TImode) |
| next = insn; |
| } |
| if (insn == BB_HEAD (bb)) |
| break; |
| } |
| |
| if (branch) |
| { |
| /* If we haven't emitted a hint for this branch yet, it might |
| be profitable to emit it in one of the predecessor blocks, |
| especially for loops. */ |
| rtx bbend; |
| basic_block prev = 0, prop = 0, prev2 = 0; |
| int loop_exit = 0, simple_loop = 0; |
| int next_addr = 0; |
| if (next) |
| next_addr = INSN_ADDRESSES (INSN_UID (next)); |
| |
| for (j = 0; j < EDGE_COUNT (bb->preds); j++) |
| if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU) |
| prev = EDGE_PRED (bb, j)->src; |
| else |
| prev2 = EDGE_PRED (bb, j)->src; |
| |
| for (j = 0; j < EDGE_COUNT (bb->succs); j++) |
| if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT) |
| loop_exit = 1; |
| else if (EDGE_SUCC (bb, j)->dest == bb) |
| simple_loop = 1; |
| |
| /* If this branch is a loop exit then propagate to previous |
| fallthru block. This catches the cases when it is a simple |
| loop or when there is an initial branch into the loop. */ |
| if (prev && loop_exit && prev->loop_depth <= bb->loop_depth) |
| prop = prev; |
| |
| /* If there is only one adjacent predecessor. Don't propagate |
| outside this loop. This loop_depth test isn't perfect, but |
| I'm not sure the loop_father member is valid at this point. */ |
| else if (prev && single_pred_p (bb) |
| && prev->loop_depth == bb->loop_depth) |
| prop = prev; |
| |
| /* If this is the JOIN block of a simple IF-THEN then |
| propogate the hint to the HEADER block. */ |
| else if (prev && prev2 |
| && EDGE_COUNT (bb->preds) == 2 |
| && EDGE_COUNT (prev->preds) == 1 |
| && EDGE_PRED (prev, 0)->src == prev2 |
| && prev2->loop_depth == bb->loop_depth |
| && GET_CODE (branch_target) != REG) |
| prop = prev; |
| |
| /* Don't propagate when: |
| - this is a simple loop and the hint would be too far |
| - this is not a simple loop and there are 16 insns in |
| this block already |
| - the predecessor block ends in a branch that will be |
| hinted |
| - the predecessor block ends in an insn that invalidates |
| the hint */ |
| if (prop |
| && prop->index >= 0 |
| && (bbend = BB_END (prop)) |
| && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) < |
| (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0 |
| && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend))) |
| { |
| if (dump_file) |
| fprintf (dump_file, "propagate from %i to %i (loop depth %i) " |
| "for %i (loop_exit %i simple_loop %i dist %i)\n", |
| bb->index, prop->index, bb->loop_depth, |
| INSN_UID (branch), loop_exit, simple_loop, |
| branch_addr - INSN_ADDRESSES (INSN_UID (bbend))); |
| |
| spu_bb_info[prop->index].prop_jump = branch; |
| spu_bb_info[prop->index].bb = bb; |
| } |
| else if (next && branch_addr - next_addr >= spu_hint_dist) |
| { |
| if (dump_file) |
| fprintf (dump_file, "hint for %i in block %i before %i\n", |
| INSN_UID (branch), bb->index, INSN_UID (next)); |
| spu_emit_branch_hint (next, branch, branch_target, |
| branch_addr - next_addr); |
| } |
| branch = 0; |
| } |
| } |
| free (spu_bb_info); |
| #endif |
| /* LLVM LOCAL end */ |
| } |
| |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| /* Emit a nop for INSN such that the two will dual issue. This assumes |
| INSN is 8-byte aligned. When INSN is inline asm we emit an lnop. |
| We check for TImode to handle a MULTI1 insn which has dual issued its |
| first instruction. get_pipe returns -1 for MULTI0, inline asm, or |
| ADDR_VEC insns. */ |
| static void |
| emit_nop_for_insn (rtx insn) |
| { |
| int p; |
| rtx new_insn; |
| p = get_pipe (insn); |
| if (p == 1 && GET_MODE (insn) == TImode) |
| { |
| new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn); |
| PUT_MODE (new_insn, TImode); |
| PUT_MODE (insn, VOIDmode); |
| } |
| else |
| new_insn = emit_insn_after (gen_lnop (), insn); |
| } |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* Insert nops in basic blocks to meet dual issue alignment |
| requirements. */ |
| static void |
| insert_nops (void) |
| { |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| rtx insn, next_insn, prev_insn; |
| int length; |
| int addr; |
| |
| /* This sets up INSN_ADDRESSES. */ |
| shorten_branches (get_insns ()); |
| |
| /* Keep track of length added by nops. */ |
| length = 0; |
| |
| prev_insn = 0; |
| for (insn = get_insns (); insn; insn = next_insn) |
| { |
| next_insn = next_active_insn (insn); |
| addr = INSN_ADDRESSES (INSN_UID (insn)); |
| if (GET_MODE (insn) == TImode |
| && next_insn |
| && GET_MODE (next_insn) != TImode |
| && ((addr + length) & 7) != 0) |
| { |
| /* prev_insn will always be set because the first insn is |
| always 8-byte aligned. */ |
| emit_nop_for_insn (prev_insn); |
| length += 4; |
| } |
| prev_insn = insn; |
| } |
| #endif |
| /* LLVM LOCAL end */ |
| } |
| |
| static void |
| spu_machine_dependent_reorg (void) |
| { |
| if (optimize > 0) |
| { |
| if (TARGET_BRANCH_HINTS) |
| insert_branch_hints (); |
| insert_nops (); |
| } |
| } |
| |
| |
| /* Insn scheduling routines, primarily for dual issue. */ |
| static int |
| spu_sched_issue_rate (void) |
| { |
| return 2; |
| } |
| |
| static int |
| spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED, |
| int verbose ATTRIBUTE_UNUSED, rtx insn, |
| int can_issue_more) |
| { |
| if (GET_CODE (PATTERN (insn)) != USE |
| && GET_CODE (PATTERN (insn)) != CLOBBER |
| && get_pipe (insn) != -2) |
| can_issue_more--; |
| return can_issue_more; |
| } |
| |
| static int |
| get_pipe (rtx insn) |
| { |
| enum attr_type t; |
| /* Handle inline asm */ |
| if (INSN_CODE (insn) == -1) |
| return -1; |
| t = get_attr_type (insn); |
| switch (t) |
| { |
| case TYPE_CONVERT: |
| return -2; |
| case TYPE_MULTI0: |
| return -1; |
| |
| case TYPE_FX2: |
| case TYPE_FX3: |
| case TYPE_SPR: |
| case TYPE_NOP: |
| case TYPE_FXB: |
| case TYPE_FPD: |
| case TYPE_FP6: |
| case TYPE_FP7: |
| case TYPE_IPREFETCH: |
| return 0; |
| |
| case TYPE_LNOP: |
| case TYPE_SHUF: |
| case TYPE_LOAD: |
| case TYPE_STORE: |
| case TYPE_BR: |
| case TYPE_MULTI1: |
| case TYPE_HBR: |
| return 1; |
| default: |
| abort (); |
| } |
| } |
| |
| static int |
| spu_sched_adjust_priority (rtx insn, int pri) |
| { |
| int p = get_pipe (insn); |
| /* Schedule UNSPEC_CONVERT's early so they have less effect on |
| * scheduling. */ |
| if (GET_CODE (PATTERN (insn)) == USE |
| || GET_CODE (PATTERN (insn)) == CLOBBER |
| || p == -2) |
| return pri + 100; |
| /* Schedule pipe0 insns early for greedier dual issue. */ |
| if (p != 1) |
| return pri + 50; |
| return pri; |
| } |
| |
| /* INSN is dependent on DEP_INSN. */ |
| static int |
| spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, |
| rtx dep_insn ATTRIBUTE_UNUSED, int cost) |
| { |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| if (GET_CODE (insn) == CALL_INSN) |
| return cost - 2; |
| /* The dfa scheduler sets cost to 0 for all anti-dependencies and the |
| scheduler makes every insn in a block anti-dependent on the final |
| jump_insn. We adjust here so higher cost insns will get scheduled |
| earlier. */ |
| if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI) |
| return INSN_COST (dep_insn) - 3; |
| return cost; |
| #else |
| /* If INSN_SCHEDULING is not defined, this function is merely a stub, so |
| return something reasonable to make the compiler happy. */ |
| return cost; |
| #endif |
| /* LLVM LOCAL end */ |
| } |
| |
| /* Create a CONST_DOUBLE from a string. */ |
| struct rtx_def * |
| spu_float_const (const char *string, enum machine_mode mode) |
| { |
| REAL_VALUE_TYPE value; |
| value = REAL_VALUE_ATOF (string, mode); |
| return CONST_DOUBLE_FROM_REAL_VALUE (value, mode); |
| } |
| |
| /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the |
| CONST_INT fits constraint 'K', i.e., is small. */ |
| int |
| legitimate_const (rtx x, int aligned) |
| { |
| /* We can never know if the resulting address fits in 18 bits and can be |
| loaded with ila. Instead we should use the HI and LO relocations to |
| load a 32 bit address. */ |
| rtx sym, cst; |
| |
| gcc_assert (GET_CODE (x) == CONST); |
| |
| if (GET_CODE (XEXP (x, 0)) != PLUS) |
| return 0; |
| sym = XEXP (XEXP (x, 0), 0); |
| cst = XEXP (XEXP (x, 0), 1); |
| if (GET_CODE (sym) != SYMBOL_REF || GET_CODE (cst) != CONST_INT) |
| return 0; |
| if (aligned && ((INTVAL (cst) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym))) |
| return 0; |
| return satisfies_constraint_K (cst); |
| } |
| |
| int |
| spu_constant_address_p (rtx x) |
| { |
| return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF |
| || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST |
| || GET_CODE (x) == HIGH); |
| } |
| |
| static enum spu_immediate |
| which_immediate_load (HOST_WIDE_INT val) |
| { |
| gcc_assert (val == trunc_int_for_mode (val, SImode)); |
| |
| if (val >= -0x8000 && val <= 0x7fff) |
| return SPU_IL; |
| if (val >= 0 && val <= 0x3ffff) |
| return SPU_ILA; |
| if ((val & 0xffff) == ((val >> 16) & 0xffff)) |
| return SPU_ILH; |
| if ((val & 0xffff) == 0) |
| return SPU_ILHU; |
| |
| return SPU_NONE; |
| } |
| |
| /* Return true when OP can be loaded by one of the il instructions, or |
| when flow2 is not completed and OP can be loaded using ilhu and iohl. */ |
| int |
| immediate_load_p (rtx op, enum machine_mode mode) |
| { |
| if (CONSTANT_P (op)) |
| { |
| enum immediate_class c = classify_immediate (op, mode); |
| return c == IC_IL1 || (!flow2_completed && c == IC_IL2); |
| } |
| return 0; |
| } |
| |
| /* Return true if the first SIZE bytes of arr is a constant that can be |
| generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART |
| represent the size and offset of the instruction to use. */ |
| static int |
| cpat_info(unsigned char *arr, int size, int *prun, int *pstart) |
| { |
| int cpat, run, i, start; |
| cpat = 1; |
| run = 0; |
| start = -1; |
| for (i = 0; i < size && cpat; i++) |
| if (arr[i] != i+16) |
| { |
| if (!run) |
| { |
| start = i; |
| if (arr[i] == 3) |
| run = 1; |
| else if (arr[i] == 2 && arr[i+1] == 3) |
| run = 2; |
| else if (arr[i] == 0) |
| { |
| while (arr[i+run] == run && i+run < 16) |
| run++; |
| if (run != 4 && run != 8) |
| cpat = 0; |
| } |
| else |
| cpat = 0; |
| if ((i & (run-1)) != 0) |
| cpat = 0; |
| i += run; |
| } |
| else |
| cpat = 0; |
| } |
| if (cpat && (run || size < 16)) |
| { |
| if (run == 0) |
| run = 1; |
| if (prun) |
| *prun = run; |
| if (pstart) |
| *pstart = start == -1 ? 16-run : start; |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* OP is a CONSTANT_P. Determine what instructions can be used to load |
| it into a register. MODE is only valid when OP is a CONST_INT. */ |
| static enum immediate_class |
| classify_immediate (rtx op, enum machine_mode mode) |
| { |
| HOST_WIDE_INT val; |
| unsigned char arr[16]; |
| int i, j, repeated, fsmbi; |
| |
| gcc_assert (CONSTANT_P (op)); |
| |
| if (GET_MODE (op) != VOIDmode) |
| mode = GET_MODE (op); |
| |
| /* A V4SI const_vector with all identical symbols is ok. */ |
| if (mode == V4SImode |
| && GET_CODE (op) == CONST_VECTOR |
| && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT |
| && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE |
| && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) |
| && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2) |
| && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3)) |
| op = CONST_VECTOR_ELT (op, 0); |
| |
| switch (GET_CODE (op)) |
| { |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s; |
| |
| case CONST: |
| return TARGET_LARGE_MEM |
| || !legitimate_const (op, 0) ? IC_IL2s : IC_IL1s; |
| |
| case HIGH: |
| return IC_IL1s; |
| |
| case CONST_VECTOR: |
| for (i = 0; i < GET_MODE_NUNITS (mode); i++) |
| if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT |
| && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE) |
| return IC_POOL; |
| /* Fall through. */ |
| |
| case CONST_INT: |
| case CONST_DOUBLE: |
| constant_to_array (mode, op, arr); |
| |
| /* Check that each 4-byte slot is identical. */ |
| repeated = 1; |
| for (i = 4; i < 16; i += 4) |
| for (j = 0; j < 4; j++) |
| if (arr[j] != arr[i + j]) |
| repeated = 0; |
| |
| if (repeated) |
| { |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| |
| if (which_immediate_load (val) != SPU_NONE) |
| return IC_IL1; |
| } |
| |
| /* Any mode of 2 bytes or smaller can be loaded with an il |
| instruction. */ |
| gcc_assert (GET_MODE_SIZE (mode) > 2); |
| |
| fsmbi = 1; |
| for (i = 0; i < 16 && fsmbi; i++) |
| if (arr[i] != 0 && arr[i] != 0xff) |
| fsmbi = 0; |
| if (fsmbi) |
| return IC_FSMBI; |
| |
| if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0)) |
| return IC_CPAT; |
| |
| if (repeated) |
| return IC_IL2; |
| |
| return IC_POOL; |
| default: |
| break; |
| } |
| gcc_unreachable (); |
| } |
| |
| static enum spu_immediate |
| which_logical_immediate (HOST_WIDE_INT val) |
| { |
| gcc_assert (val == trunc_int_for_mode (val, SImode)); |
| |
| if (val >= -0x200 && val <= 0x1ff) |
| return SPU_ORI; |
| if (val >= 0 && val <= 0xffff) |
| return SPU_IOHL; |
| if ((val & 0xffff) == ((val >> 16) & 0xffff)) |
| { |
| val = trunc_int_for_mode (val, HImode); |
| if (val >= -0x200 && val <= 0x1ff) |
| return SPU_ORHI; |
| if ((val & 0xff) == ((val >> 8) & 0xff)) |
| { |
| val = trunc_int_for_mode (val, QImode); |
| if (val >= -0x200 && val <= 0x1ff) |
| return SPU_ORBI; |
| } |
| } |
| return SPU_NONE; |
| } |
| |
| int |
| logical_immediate_p (rtx op, enum machine_mode mode) |
| { |
| HOST_WIDE_INT val; |
| unsigned char arr[16]; |
| int i, j; |
| |
| gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE |
| || GET_CODE (op) == CONST_VECTOR); |
| |
| if (GET_MODE (op) != VOIDmode) |
| mode = GET_MODE (op); |
| |
| constant_to_array (mode, op, arr); |
| |
| /* Check that bytes are repeated. */ |
| for (i = 4; i < 16; i += 4) |
| for (j = 0; j < 4; j++) |
| if (arr[j] != arr[i + j]) |
| return 0; |
| |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| |
| i = which_logical_immediate (val); |
| return i != SPU_NONE && i != SPU_IOHL; |
| } |
| |
| int |
| iohl_immediate_p (rtx op, enum machine_mode mode) |
| { |
| HOST_WIDE_INT val; |
| unsigned char arr[16]; |
| int i, j; |
| |
| gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE |
| || GET_CODE (op) == CONST_VECTOR); |
| |
| if (GET_MODE (op) != VOIDmode) |
| mode = GET_MODE (op); |
| |
| constant_to_array (mode, op, arr); |
| |
| /* Check that bytes are repeated. */ |
| for (i = 4; i < 16; i += 4) |
| for (j = 0; j < 4; j++) |
| if (arr[j] != arr[i + j]) |
| return 0; |
| |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| |
| return val >= 0 && val <= 0xffff; |
| } |
| |
| int |
| arith_immediate_p (rtx op, enum machine_mode mode, |
| HOST_WIDE_INT low, HOST_WIDE_INT high) |
| { |
| HOST_WIDE_INT val; |
| unsigned char arr[16]; |
| int bytes, i, j; |
| |
| gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE |
| || GET_CODE (op) == CONST_VECTOR); |
| |
| if (GET_MODE (op) != VOIDmode) |
| mode = GET_MODE (op); |
| |
| constant_to_array (mode, op, arr); |
| |
| if (VECTOR_MODE_P (mode)) |
| mode = GET_MODE_INNER (mode); |
| |
| bytes = GET_MODE_SIZE (mode); |
| mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); |
| |
| /* Check that bytes are repeated. */ |
| for (i = bytes; i < 16; i += bytes) |
| for (j = 0; j < bytes; j++) |
| if (arr[j] != arr[i + j]) |
| return 0; |
| |
| val = arr[0]; |
| for (j = 1; j < bytes; j++) |
| val = (val << 8) | arr[j]; |
| |
| val = trunc_int_for_mode (val, mode); |
| |
| return val >= low && val <= high; |
| } |
| |
| /* We accept: |
| - any 32 bit constant (SImode, SFmode) |
| - any constant that can be generated with fsmbi (any mode) |
| - a 64 bit constant where the high and low bits are identical |
| (DImode, DFmode) |
| - a 128 bit constant where the four 32 bit words match. */ |
| int |
| spu_legitimate_constant_p (rtx x) |
| { |
| int i; |
| /* V4SI with all identical symbols is valid. */ |
| if (GET_MODE (x) == V4SImode |
| && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF |
| || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF |
| || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST |
| || GET_CODE (CONST_VECTOR_ELT (x, 0)) == HIGH)) |
| return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1) |
| && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2) |
| && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3); |
| |
| if (VECTOR_MODE_P (GET_MODE (x))) |
| for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++) |
| if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT |
| && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE) |
| return 0; |
| return 1; |
| } |
| |
| /* Valid address are: |
| - symbol_ref, label_ref, const |
| - reg |
| - reg + const, where either reg or const is 16 byte aligned |
| - reg + reg, alignment doesn't matter |
| The alignment matters in the reg+const case because lqd and stqd |
| ignore the 4 least significant bits of the const. (TODO: It might be |
| preferable to allow any alignment and fix it up when splitting.) */ |
| int |
| spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED, |
| rtx x, int reg_ok_strict) |
| { |
| if (mode == TImode && GET_CODE (x) == AND |
| && GET_CODE (XEXP (x, 1)) == CONST_INT |
| && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16) |
| x = XEXP (x, 0); |
| switch (GET_CODE (x)) |
| { |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return !TARGET_LARGE_MEM; |
| |
| case CONST: |
| return !TARGET_LARGE_MEM && legitimate_const (x, 0); |
| |
| case CONST_INT: |
| return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; |
| |
| case SUBREG: |
| x = XEXP (x, 0); |
| gcc_assert (GET_CODE (x) == REG); |
| |
| case REG: |
| return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); |
| |
| case PLUS: |
| case LO_SUM: |
| { |
| rtx op0 = XEXP (x, 0); |
| rtx op1 = XEXP (x, 1); |
| if (GET_CODE (op0) == SUBREG) |
| op0 = XEXP (op0, 0); |
| if (GET_CODE (op1) == SUBREG) |
| op1 = XEXP (op1, 0); |
| /* We can't just accept any aligned register because CSE can |
| change it to a register that is not marked aligned and then |
| recog will fail. So we only accept frame registers because |
| they will only be changed to other frame registers. */ |
| if (GET_CODE (op0) == REG |
| && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) |
| && GET_CODE (op1) == CONST_INT |
| && INTVAL (op1) >= -0x2000 |
| && INTVAL (op1) <= 0x1fff |
| && (REGNO_PTR_FRAME_P (REGNO (op0)) || (INTVAL (op1) & 15) == 0)) |
| return 1; |
| if (GET_CODE (op0) == REG |
| && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) |
| && GET_CODE (op1) == REG |
| && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) |
| return 1; |
| } |
| break; |
| |
| default: |
| break; |
| } |
| return 0; |
| } |
| |
| /* When the address is reg + const_int, force the const_int into a |
| register. */ |
| rtx |
| spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, |
| enum machine_mode mode) |
| { |
| rtx op0, op1; |
| /* Make sure both operands are registers. */ |
| if (GET_CODE (x) == PLUS) |
| { |
| op0 = XEXP (x, 0); |
| op1 = XEXP (x, 1); |
| if (ALIGNED_SYMBOL_REF_P (op0)) |
| { |
| op0 = force_reg (Pmode, op0); |
| mark_reg_pointer (op0, 128); |
| } |
| else if (GET_CODE (op0) != REG) |
| op0 = force_reg (Pmode, op0); |
| if (ALIGNED_SYMBOL_REF_P (op1)) |
| { |
| op1 = force_reg (Pmode, op1); |
| mark_reg_pointer (op1, 128); |
| } |
| else if (GET_CODE (op1) != REG) |
| op1 = force_reg (Pmode, op1); |
| x = gen_rtx_PLUS (Pmode, op0, op1); |
| if (spu_legitimate_address (mode, x, 0)) |
| return x; |
| } |
| return NULL_RTX; |
| } |
| |
| /* Handle an attribute requiring a FUNCTION_DECL; arguments as in |
| struct attribute_spec.handler. */ |
| static tree |
| spu_handle_fndecl_attribute (tree * node, |
| tree name, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (0, "`%s' attribute only applies to functions", |
| IDENTIFIER_POINTER (name)); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Handle the "vector" attribute. */ |
| static tree |
| spu_handle_vector_attribute (tree * node, tree name, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) |
| { |
| tree type = *node, result = NULL_TREE; |
| enum machine_mode mode; |
| int unsigned_p; |
| |
| while (POINTER_TYPE_P (type) |
| || TREE_CODE (type) == FUNCTION_TYPE |
| || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE) |
| type = TREE_TYPE (type); |
| |
| mode = TYPE_MODE (type); |
| |
| unsigned_p = TYPE_UNSIGNED (type); |
| switch (mode) |
| { |
| case DImode: |
| result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); |
| break; |
| case SImode: |
| result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); |
| break; |
| case HImode: |
| result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); |
| break; |
| case QImode: |
| result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); |
| break; |
| case SFmode: |
| result = V4SF_type_node; |
| break; |
| case DFmode: |
| result = V2DF_type_node; |
| break; |
| default: |
| break; |
| } |
| |
| /* Propagate qualifiers attached to the element type |
| onto the vector type. */ |
| if (result && result != type && TYPE_QUALS (type)) |
| result = build_qualified_type (result, TYPE_QUALS (type)); |
| |
| *no_add_attrs = true; /* No need to hang on to the attribute. */ |
| |
| if (!result) |
| warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name)); |
| else |
| *node = reconstruct_complex_type (*node, result); |
| |
| return NULL_TREE; |
| } |
| |
| /* Return non-zero if FUNC is a naked function. */ |
| static int |
| spu_naked_function_p (tree func) |
| { |
| tree a; |
| |
| if (TREE_CODE (func) != FUNCTION_DECL) |
| abort (); |
| |
| a = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); |
| return a != NULL_TREE; |
| } |
| |
| int |
| spu_initial_elimination_offset (int from, int to) |
| { |
| int saved_regs_size = spu_saved_regs_size (); |
| int sp_offset = 0; |
| if (!current_function_is_leaf || current_function_outgoing_args_size |
| || get_frame_size () || saved_regs_size) |
| sp_offset = STACK_POINTER_OFFSET; |
| if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
| return (sp_offset + current_function_outgoing_args_size); |
| else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| return 0; |
| else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
| return sp_offset + current_function_outgoing_args_size |
| + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET; |
| else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| return get_frame_size () + saved_regs_size + sp_offset; |
| return 0; |
| } |
| |
| rtx |
| spu_function_value (tree type, tree func ATTRIBUTE_UNUSED) |
| { |
| enum machine_mode mode = TYPE_MODE (type); |
| int byte_size = ((mode == BLKmode) |
| ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); |
| |
| /* Make sure small structs are left justified in a register. */ |
| if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) |
| && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0) |
| { |
| enum machine_mode smode; |
| rtvec v; |
| int i; |
| int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| int n = byte_size / UNITS_PER_WORD; |
| v = rtvec_alloc (nregs); |
| for (i = 0; i < n; i++) |
| { |
| RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (TImode, |
| FIRST_RETURN_REGNUM |
| + i), |
| GEN_INT (UNITS_PER_WORD * i)); |
| byte_size -= UNITS_PER_WORD; |
| } |
| |
| if (n < nregs) |
| { |
| if (byte_size < 4) |
| byte_size = 4; |
| smode = |
| smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); |
| RTVEC_ELT (v, n) = |
| gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n), |
| GEN_INT (UNITS_PER_WORD * n)); |
| } |
| return gen_rtx_PARALLEL (mode, v); |
| } |
| return gen_rtx_REG (mode, FIRST_RETURN_REGNUM); |
| } |
| |
| rtx |
| spu_function_arg (CUMULATIVE_ARGS cum, |
| enum machine_mode mode, |
| tree type, int named ATTRIBUTE_UNUSED) |
| { |
| int byte_size; |
| |
| if (cum >= MAX_REGISTER_ARGS) |
| return 0; |
| |
| byte_size = ((mode == BLKmode) |
| ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); |
| |
| /* The ABI does not allow parameters to be passed partially in |
| reg and partially in stack. */ |
| if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS) |
| return 0; |
| |
| /* Make sure small structs are left justified in a register. */ |
| if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) |
| && byte_size < UNITS_PER_WORD && byte_size > 0) |
| { |
| enum machine_mode smode; |
| rtx gr_reg; |
| if (byte_size < 4) |
| byte_size = 4; |
| smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); |
| gr_reg = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum), |
| const0_rtx); |
| return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); |
| } |
| else |
| return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum); |
| } |
| |
| /* Variable sized types are passed by reference. */ |
| static bool |
| spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| tree type, bool named ATTRIBUTE_UNUSED) |
| { |
| return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; |
| } |
| |
| |
| /* Var args. */ |
| |
| /* Create and return the va_list datatype. |
| |
| On SPU, va_list is an array type equivalent to |
| |
| typedef struct __va_list_tag |
| { |
| void *__args __attribute__((__aligned(16))); |
| void *__skip __attribute__((__aligned(16))); |
| |
| } va_list[1]; |
| |
| where __args points to the arg that will be returned by the next |
| va_arg(), and __skip points to the previous stack frame such that |
| when __args == __skip we should advance __args by 32 bytes. */ |
| static tree |
| spu_build_builtin_va_list (void) |
| { |
| tree f_args, f_skip, record, type_decl; |
| bool owp; |
| |
| record = (*lang_hooks.types.make_type) (RECORD_TYPE); |
| |
| type_decl = |
| build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); |
| |
| f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node); |
| f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node); |
| |
| DECL_FIELD_CONTEXT (f_args) = record; |
| DECL_ALIGN (f_args) = 128; |
| DECL_USER_ALIGN (f_args) = 1; |
| |
| DECL_FIELD_CONTEXT (f_skip) = record; |
| DECL_ALIGN (f_skip) = 128; |
| DECL_USER_ALIGN (f_skip) = 1; |
| |
| TREE_CHAIN (record) = type_decl; |
| TYPE_NAME (record) = type_decl; |
| TYPE_FIELDS (record) = f_args; |
| TREE_CHAIN (f_args) = f_skip; |
| |
| /* We know this is being padded and we want it too. It is an internal |
| type so hide the warnings from the user. */ |
| owp = warn_padded; |
| warn_padded = false; |
| |
| layout_type (record); |
| |
| warn_padded = owp; |
| |
| /* The correct type is an array type of one element. */ |
| return build_array_type (record, build_index_type (size_zero_node)); |
| } |
| |
| /* Implement va_start by filling the va_list structure VALIST. |
| NEXTARG points to the first anonymous stack argument. |
| |
| The following global variables are used to initialize |
| the va_list structure: |
| |
| current_function_args_info; |
| the CUMULATIVE_ARGS for this function |
| |
| current_function_arg_offset_rtx: |
| holds the offset of the first anonymous stack argument |
| (relative to the virtual arg pointer). */ |
| |
| void |
| spu_va_start (tree valist, rtx nextarg) |
| { |
| tree f_args, f_skip; |
| tree args, skip, t; |
| |
| f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); |
| f_skip = TREE_CHAIN (f_args); |
| |
| valist = build_va_arg_indirect_ref (valist); |
| args = |
| build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE); |
| skip = |
| build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE); |
| |
| /* Find the __args area. */ |
| t = make_tree (TREE_TYPE (args), nextarg); |
| if (current_function_pretend_args_size > 0) |
| t = build2 (PLUS_EXPR, TREE_TYPE (args), t, |
| build_int_cst (integer_type_node, -STACK_POINTER_OFFSET)); |
| t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| /* Find the __skip area. */ |
| t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx); |
| t = build2 (PLUS_EXPR, TREE_TYPE (skip), t, |
| build_int_cst (integer_type_node, |
| (current_function_pretend_args_size |
| - STACK_POINTER_OFFSET))); |
| t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| |
| /* Gimplify va_arg by updating the va_list structure |
| VALIST as required to retrieve an argument of type |
| TYPE, and returning that argument. |
| |
| ret = va_arg(VALIST, TYPE); |
| |
| generates code equivalent to: |
| |
| paddedsize = (sizeof(TYPE) + 15) & -16; |
| if (VALIST.__args + paddedsize > VALIST.__skip |
| && VALIST.__args <= VALIST.__skip) |
| addr = VALIST.__skip + 32; |
| else |
| addr = VALIST.__args; |
| VALIST.__args = addr + paddedsize; |
| ret = *(TYPE *)addr; |
| */ |
| static tree |
| spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p, |
| tree * post_p ATTRIBUTE_UNUSED) |
| { |
| tree f_args, f_skip; |
| tree args, skip; |
| HOST_WIDE_INT size, rsize; |
| tree paddedsize, addr, tmp; |
| bool pass_by_reference_p; |
| |
| f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); |
| f_skip = TREE_CHAIN (f_args); |
| |
| valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); |
| args = |
| build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE); |
| skip = |
| build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE); |
| |
| addr = create_tmp_var (ptr_type_node, "va_arg"); |
| DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); |
| |
| /* if an object is dynamically sized, a pointer to it is passed |
| instead of the object itself. */ |
| pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type, |
| false); |
| if (pass_by_reference_p) |
| type = build_pointer_type (type); |
| size = int_size_in_bytes (type); |
| rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD; |
| |
| /* build conditional expression to calculate addr. The expression |
| will be gimplified later. */ |
| paddedsize = fold_convert (ptr_type_node, size_int (rsize)); |
| tmp = build2 (PLUS_EXPR, ptr_type_node, args, paddedsize); |
| tmp = build2 (TRUTH_AND_EXPR, boolean_type_node, |
| build2 (GT_EXPR, boolean_type_node, tmp, skip), |
| build2 (LE_EXPR, boolean_type_node, args, skip)); |
| |
| tmp = build3 (COND_EXPR, ptr_type_node, tmp, |
| build2 (PLUS_EXPR, ptr_type_node, skip, |
| fold_convert (ptr_type_node, size_int (32))), args); |
| |
| tmp = build2 (MODIFY_EXPR, ptr_type_node, addr, tmp); |
| gimplify_and_add (tmp, pre_p); |
| |
| /* update VALIST.__args */ |
| tmp = build2 (PLUS_EXPR, ptr_type_node, addr, paddedsize); |
| tmp = build2 (MODIFY_EXPR, TREE_TYPE (args), args, tmp); |
| gimplify_and_add (tmp, pre_p); |
| |
| addr = fold_convert (build_pointer_type (type), addr); |
| |
| if (pass_by_reference_p) |
| addr = build_va_arg_indirect_ref (addr); |
| |
| return build_va_arg_indirect_ref (addr); |
| } |
| |
| /* Save parameter registers starting with the register that corresponds |
| to the first unnamed parameters. If the first unnamed parameter is |
| in the stack then save no registers. Set pretend_args_size to the |
| amount of space needed to save the registers. */ |
| void |
| spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode, |
| tree type, int *pretend_size, int no_rtl) |
| { |
| if (!no_rtl) |
| { |
| rtx tmp; |
| int regno; |
| int offset; |
| int ncum = *cum; |
| |
| /* cum currently points to the last named argument, we want to |
| start at the next argument. */ |
| FUNCTION_ARG_ADVANCE (ncum, mode, type, 1); |
| |
| offset = -STACK_POINTER_OFFSET; |
| for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++) |
| { |
| tmp = gen_frame_mem (V4SImode, |
| plus_constant (virtual_incoming_args_rtx, |
| offset)); |
| emit_move_insn (tmp, |
| gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno)); |
| offset += 16; |
| } |
| *pretend_size = offset + STACK_POINTER_OFFSET; |
| } |
| } |
| |
| void |
| spu_conditional_register_usage (void) |
| { |
| if (flag_pic) |
| { |
| fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; |
| call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; |
| } |
| global_regs[INTR_REGNUM] = 1; |
| } |
| |
| /* This is called to decide when we can simplify a load instruction. We |
| must only return true for registers which we know will always be |
| aligned. Taking into account that CSE might replace this reg with |
| another one that has not been marked aligned. |
| So this is really only true for frame, stack and virtual registers, |
| which we know are always aligned and should not be adversely effected |
| by CSE. */ |
| static int |
| regno_aligned_for_load (int regno) |
| { |
| return regno == FRAME_POINTER_REGNUM |
| || regno == HARD_FRAME_POINTER_REGNUM |
| || regno == STACK_POINTER_REGNUM |
| || (regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER); |
| } |
| |
| /* Return TRUE when mem is known to be 16-byte aligned. */ |
| int |
| aligned_mem_p (rtx mem) |
| { |
| if (MEM_ALIGN (mem) >= 128) |
| return 1; |
| if (GET_MODE_SIZE (GET_MODE (mem)) >= 16) |
| return 1; |
| if (GET_CODE (XEXP (mem, 0)) == PLUS) |
| { |
| rtx p0 = XEXP (XEXP (mem, 0), 0); |
| rtx p1 = XEXP (XEXP (mem, 0), 1); |
| if (regno_aligned_for_load (REGNO (p0))) |
| { |
| if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1))) |
| return 1; |
| if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0) |
| return 1; |
| } |
| } |
| else if (GET_CODE (XEXP (mem, 0)) == REG) |
| { |
| if (regno_aligned_for_load (REGNO (XEXP (mem, 0)))) |
| return 1; |
| } |
| else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0))) |
| return 1; |
| else if (GET_CODE (XEXP (mem, 0)) == CONST) |
| { |
| rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0); |
| rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1); |
| if (GET_CODE (p0) == SYMBOL_REF |
| && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0) |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF |
| into its SYMBOL_REF_FLAGS. */ |
| static void |
| spu_encode_section_info (tree decl, rtx rtl, int first) |
| { |
| default_encode_section_info (decl, rtl, first); |
| |
| /* If a variable has a forced alignment to < 16 bytes, mark it with |
| SYMBOL_FLAG_ALIGN1. */ |
| if (TREE_CODE (decl) == VAR_DECL |
| && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128) |
| SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1; |
| } |
| |
| /* Return TRUE if we are certain the mem refers to a complete object |
| which is both 16-byte aligned and padded to a 16-byte boundary. This |
| would make it safe to store with a single instruction. |
| We guarantee the alignment and padding for static objects by aligning |
| all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.) |
| FIXME: We currently cannot guarantee this for objects on the stack |
| because assign_parm_setup_stack calls assign_stack_local with the |
| alignment of the parameter mode and in that case the alignment never |
| gets adjusted by LOCAL_ALIGNMENT. */ |
| static int |
| store_with_one_insn_p (rtx mem) |
| { |
| rtx addr = XEXP (mem, 0); |
| if (GET_MODE (mem) == BLKmode) |
| return 0; |
| /* Only static objects. */ |
| if (GET_CODE (addr) == SYMBOL_REF) |
| { |
| /* We use the associated declaration to make sure the access is |
| referring to the whole object. |
| We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure |
| if it is necessary. Will there be cases where one exists, and |
| the other does not? Will there be cases where both exist, but |
| have different types? */ |
| tree decl = MEM_EXPR (mem); |
| if (decl |
| && TREE_CODE (decl) == VAR_DECL |
| && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl))) |
| return 1; |
| decl = SYMBOL_REF_DECL (addr); |
| if (decl |
| && TREE_CODE (decl) == VAR_DECL |
| && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl))) |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| spu_expand_mov (rtx * ops, enum machine_mode mode) |
| { |
| if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0])) |
| abort (); |
| |
| if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1])) |
| { |
| rtx from = SUBREG_REG (ops[1]); |
| enum machine_mode imode = GET_MODE (from); |
| |
| gcc_assert (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_CLASS (imode) == MODE_INT |
| && subreg_lowpart_p (ops[1])); |
| |
| if (GET_MODE_SIZE (imode) < 4) |
| { |
| from = gen_rtx_SUBREG (SImode, from, 0); |
| imode = SImode; |
| } |
| |
| if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode)) |
| { |
| enum insn_code icode = trunc_optab->handlers[mode][imode].insn_code; |
| emit_insn (GEN_FCN (icode) (ops[0], from)); |
| } |
| else |
| emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1)); |
| return 1; |
| } |
| |
| /* At least one of the operands needs to be a register. */ |
| if ((reload_in_progress | reload_completed) == 0 |
| && !register_operand (ops[0], mode) && !register_operand (ops[1], mode)) |
| { |
| rtx temp = force_reg (mode, ops[1]); |
| emit_move_insn (ops[0], temp); |
| return 1; |
| } |
| if (reload_in_progress || reload_completed) |
| { |
| if (CONSTANT_P (ops[1])) |
| return spu_split_immediate (ops); |
| return 0; |
| } |
| else |
| { |
| if (GET_CODE (ops[0]) == MEM) |
| { |
| if (!spu_valid_move (ops)) |
| { |
| emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode), |
| gen_reg_rtx (TImode))); |
| return 1; |
| } |
| } |
| else if (GET_CODE (ops[1]) == MEM) |
| { |
| if (!spu_valid_move (ops)) |
| { |
| emit_insn (gen_load |
| (ops[0], ops[1], gen_reg_rtx (TImode), |
| gen_reg_rtx (SImode))); |
| return 1; |
| } |
| } |
| /* Catch the SImode immediates greater than 0x7fffffff, and sign |
| extend them. */ |
| if (GET_CODE (ops[1]) == CONST_INT) |
| { |
| HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode); |
| if (val != INTVAL (ops[1])) |
| { |
| emit_move_insn (ops[0], GEN_INT (val)); |
| return 1; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| static int |
| reg_align (rtx reg) |
| { |
| /* For now, only frame registers are known to be aligned at all times. |
| We can't trust REGNO_POINTER_ALIGN because optimization will move |
| registers around, potentially changing an "aligned" register in an |
| address to an unaligned register, which would result in an invalid |
| address. */ |
| int regno = REGNO (reg); |
| return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1; |
| } |
| |
| void |
| spu_split_load (rtx * ops) |
| { |
| enum machine_mode mode = GET_MODE (ops[0]); |
| rtx addr, load, rot, mem, p0, p1; |
| int rot_amt; |
| |
| addr = XEXP (ops[1], 0); |
| |
| rot = 0; |
| rot_amt = 0; |
| if (GET_CODE (addr) == PLUS) |
| { |
| /* 8 cases: |
| aligned reg + aligned reg => lqx |
| aligned reg + unaligned reg => lqx, rotqby |
| aligned reg + aligned const => lqd |
| aligned reg + unaligned const => lqd, rotqbyi |
| unaligned reg + aligned reg => lqx, rotqby |
| unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch) |
| unaligned reg + aligned const => lqd, rotqby |
| unaligned reg + unaligned const -> not allowed by legitimate address |
| */ |
| p0 = XEXP (addr, 0); |
| p1 = XEXP (addr, 1); |
| if (reg_align (p0) < 128) |
| { |
| if (GET_CODE (p1) == REG && reg_align (p1) < 128) |
| { |
| emit_insn (gen_addsi3 (ops[3], p0, p1)); |
| rot = ops[3]; |
| } |
| else |
| rot = p0; |
| } |
| else |
| { |
| if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) |
| { |
| rot_amt = INTVAL (p1) & 15; |
| p1 = GEN_INT (INTVAL (p1) & -16); |
| addr = gen_rtx_PLUS (SImode, p0, p1); |
| } |
| else if (GET_CODE (p1) == REG && reg_align (p1) < 128) |
| rot = p1; |
| } |
| } |
| else if (GET_CODE (addr) == REG) |
| { |
| if (reg_align (addr) < 128) |
| rot = addr; |
| } |
| else if (GET_CODE (addr) == CONST) |
| { |
| if (GET_CODE (XEXP (addr, 0)) == PLUS |
| && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) |
| && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) |
| { |
| rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1)); |
| if (rot_amt & -16) |
| addr = gen_rtx_CONST (Pmode, |
| gen_rtx_PLUS (Pmode, |
| XEXP (XEXP (addr, 0), 0), |
| GEN_INT (rot_amt & -16))); |
| else |
| addr = XEXP (XEXP (addr, 0), 0); |
| } |
| else |
| rot = addr; |
| } |
| else if (GET_CODE (addr) == CONST_INT) |
| { |
| rot_amt = INTVAL (addr); |
| addr = GEN_INT (rot_amt & -16); |
| } |
| else if (!ALIGNED_SYMBOL_REF_P (addr)) |
| rot = addr; |
| |
| if (GET_MODE_SIZE (mode) < 4) |
| rot_amt += GET_MODE_SIZE (mode) - 4; |
| |
| rot_amt &= 15; |
| |
| if (rot && rot_amt) |
| { |
| emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt))); |
| rot = ops[3]; |
| rot_amt = 0; |
| } |
| |
| load = ops[2]; |
| |
| addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); |
| mem = change_address (ops[1], TImode, addr); |
| |
| emit_insn (gen_movti (load, mem)); |
| |
| if (rot) |
| emit_insn (gen_rotqby_ti (load, load, rot)); |
| else if (rot_amt) |
| emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8))); |
| |
| if (reload_completed) |
| emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load))); |
| else |
| emit_insn (gen_spu_convert (ops[0], load)); |
| } |
| |
| void |
| spu_split_store (rtx * ops) |
| { |
| enum machine_mode mode = GET_MODE (ops[0]); |
| rtx pat = ops[2]; |
| rtx reg = ops[3]; |
| rtx addr, p0, p1, p1_lo, smem; |
| int aform; |
| int scalar; |
| |
| addr = XEXP (ops[0], 0); |
| |
| if (GET_CODE (addr) == PLUS) |
| { |
| /* 8 cases: |
| aligned reg + aligned reg => lqx, c?x, shuf, stqx |
| aligned reg + unaligned reg => lqx, c?x, shuf, stqx |
| aligned reg + aligned const => lqd, c?d, shuf, stqx |
| aligned reg + unaligned const => lqd, c?d, shuf, stqx |
| unaligned reg + aligned reg => lqx, c?x, shuf, stqx |
| unaligned reg + unaligned reg => lqx, c?x, shuf, stqx |
| unaligned reg + aligned const => lqd, c?d, shuf, stqx |
| unaligned reg + unaligned const -> not allowed by legitimate address |
| */ |
| aform = 0; |
| p0 = XEXP (addr, 0); |
| p1 = p1_lo = XEXP (addr, 1); |
| if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT) |
| { |
| p1_lo = GEN_INT (INTVAL (p1) & 15); |
| p1 = GEN_INT (INTVAL (p1) & -16); |
| addr = gen_rtx_PLUS (SImode, p0, p1); |
| } |
| } |
| else if (GET_CODE (addr) == REG) |
| { |
| aform = 0; |
| p0 = addr; |
| p1 = p1_lo = const0_rtx; |
| } |
| else |
| { |
| aform = 1; |
| p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); |
| p1 = 0; /* aform doesn't use p1 */ |
| p1_lo = addr; |
| if (ALIGNED_SYMBOL_REF_P (addr)) |
| p1_lo = const0_rtx; |
| else if (GET_CODE (addr) == CONST) |
| { |
| if (GET_CODE (XEXP (addr, 0)) == PLUS |
| && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) |
| && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) |
| { |
| HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); |
| if ((v & -16) != 0) |
| addr = gen_rtx_CONST (Pmode, |
| gen_rtx_PLUS (Pmode, |
| XEXP (XEXP (addr, 0), 0), |
| GEN_INT (v & -16))); |
| else |
| addr = XEXP (XEXP (addr, 0), 0); |
| p1_lo = GEN_INT (v & 15); |
| } |
| } |
| else if (GET_CODE (addr) == CONST_INT) |
| { |
| p1_lo = GEN_INT (INTVAL (addr) & 15); |
| addr = GEN_INT (INTVAL (addr) & -16); |
| } |
| } |
| |
| addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); |
| |
| scalar = store_with_one_insn_p (ops[0]); |
| if (!scalar) |
| { |
| /* We could copy the flags from the ops[0] MEM to mem here, |
| We don't because we want this load to be optimized away if |
| possible, and copying the flags will prevent that in certain |
| cases, e.g. consider the volatile flag. */ |
| |
| rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); |
| set_mem_alias_set (lmem, 0); |
| emit_insn (gen_movti (reg, lmem)); |
| |
| if (!p0 || reg_align (p0) >= 128) |
| p0 = stack_pointer_rtx; |
| if (!p1_lo) |
| p1_lo = const0_rtx; |
| |
| emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); |
| emit_insn (gen_shufb (reg, ops[1], reg, pat)); |
| } |
| else if (reload_completed) |
| { |
| if (GET_CODE (ops[1]) == REG) |
| emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1]))); |
| else if (GET_CODE (ops[1]) == SUBREG) |
| emit_move_insn (reg, |
| gen_rtx_REG (GET_MODE (reg), |
| REGNO (SUBREG_REG (ops[1])))); |
| else |
| abort (); |
| } |
| else |
| { |
| if (GET_CODE (ops[1]) == REG) |
| emit_insn (gen_spu_convert (reg, ops[1])); |
| else if (GET_CODE (ops[1]) == SUBREG) |
| emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1]))); |
| else |
| abort (); |
| } |
| |
| if (GET_MODE_SIZE (mode) < 4 && scalar) |
| emit_insn (gen_shlqby_ti |
| (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode)))); |
| |
| smem = change_address (ops[0], TImode, addr); |
| /* We can't use the previous alias set because the memory has changed |
| size and can potentially overlap objects of other types. */ |
| set_mem_alias_set (smem, 0); |
| |
| emit_insn (gen_movti (smem, reg)); |
| } |
| |
| /* Return TRUE if X is MEM which is a struct member reference |
| and the member can safely be loaded and stored with a single |
| instruction because it is padded. */ |
| static int |
| mem_is_padded_component_ref (rtx x) |
| { |
| tree t = MEM_EXPR (x); |
| tree r; |
| if (!t || TREE_CODE (t) != COMPONENT_REF) |
| return 0; |
| t = TREE_OPERAND (t, 1); |
| if (!t || TREE_CODE (t) != FIELD_DECL |
| || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t))) |
| return 0; |
| /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */ |
| r = DECL_FIELD_CONTEXT (t); |
| if (!r || TREE_CODE (r) != RECORD_TYPE) |
| return 0; |
| /* Make sure they are the same mode */ |
| if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t))) |
| return 0; |
| /* If there are no following fields then the field alignment assures |
| the structure is padded to the alignment which means this field is |
| padded too. */ |
| if (TREE_CHAIN (t) == 0) |
| return 1; |
| /* If the following field is also aligned then this field will be |
| padded. */ |
| t = TREE_CHAIN (t); |
| if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128) |
| return 1; |
| return 0; |
| } |
| |
| /* Parse the -mfixed-range= option string. */ |
| static void |
| fix_range (const char *const_str) |
| { |
| int i, first, last; |
| char *str, *dash, *comma; |
| |
| /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and |
| REG2 are either register names or register numbers. The effect |
| of this option is to mark the registers in the range from REG1 to |
| REG2 as ``fixed'' so they won't be used by the compiler. */ |
| |
| i = strlen (const_str); |
| str = (char *) alloca (i + 1); |
| memcpy (str, const_str, i + 1); |
| |
| while (1) |
| { |
| dash = strchr (str, '-'); |
| if (!dash) |
| { |
| warning (0, "value of -mfixed-range must have form REG1-REG2"); |
| return; |
| } |
| *dash = '\0'; |
| comma = strchr (dash + 1, ','); |
| if (comma) |
| *comma = '\0'; |
| |
| first = decode_reg_name (str); |
| if (first < 0) |
| { |
| warning (0, "unknown register name: %s", str); |
| return; |
| } |
| |
| last = decode_reg_name (dash + 1); |
| if (last < 0) |
| { |
| warning (0, "unknown register name: %s", dash + 1); |
| return; |
| } |
| |
| *dash = '-'; |
| |
| if (first > last) |
| { |
| warning (0, "%s-%s is an empty range", str, dash + 1); |
| return; |
| } |
| |
| for (i = first; i <= last; ++i) |
| fixed_regs[i] = call_used_regs[i] = 1; |
| |
| if (!comma) |
| break; |
| |
| *comma = ','; |
| str = comma + 1; |
| } |
| } |
| |
| int |
| spu_valid_move (rtx * ops) |
| { |
| enum machine_mode mode = GET_MODE (ops[0]); |
| if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode)) |
| return 0; |
| |
| /* init_expr_once tries to recog against load and store insns to set |
| the direct_load[] and direct_store[] arrays. We always want to |
| consider those loads and stores valid. init_expr_once is called in |
| the context of a dummy function which does not have a decl. */ |
| if (cfun->decl == 0) |
| return 1; |
| |
| /* Don't allows loads/stores which would require more than 1 insn. |
| During and after reload we assume loads and stores only take 1 |
| insn. */ |
| if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed) |
| { |
| if (GET_CODE (ops[0]) == MEM |
| && (GET_MODE_SIZE (mode) < 4 |
| || !(store_with_one_insn_p (ops[0]) |
| || mem_is_padded_component_ref (ops[0])))) |
| return 0; |
| if (GET_CODE (ops[1]) == MEM |
| && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1]))) |
| return 0; |
| } |
| return 1; |
| } |
| |
| /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that |
| can be generated using the fsmbi instruction. */ |
| int |
| fsmbi_const_p (rtx x) |
| { |
| if (CONSTANT_P (x)) |
| { |
| /* We can always choose DImode for CONST_INT because the high bits |
| of an SImode will always be all 1s, i.e., valid for fsmbi. */ |
| enum immediate_class c = classify_immediate (x, DImode); |
| return c == IC_FSMBI; |
| } |
| return 0; |
| } |
| |
| /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that |
| can be generated using the cbd, chd, cwd or cdd instruction. */ |
| int |
| cpat_const_p (rtx x, enum machine_mode mode) |
| { |
| if (CONSTANT_P (x)) |
| { |
| enum immediate_class c = classify_immediate (x, mode); |
| return c == IC_CPAT; |
| } |
| return 0; |
| } |
| |
| rtx |
| gen_cpat_const (rtx * ops) |
| { |
| unsigned char dst[16]; |
| int i, offset, shift, isize; |
| if (GET_CODE (ops[3]) != CONST_INT |
| || GET_CODE (ops[2]) != CONST_INT |
| || (GET_CODE (ops[1]) != CONST_INT |
| && GET_CODE (ops[1]) != REG)) |
| return 0; |
| if (GET_CODE (ops[1]) == REG |
| && (!REG_POINTER (ops[1]) |
| || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128)) |
| return 0; |
| |
| for (i = 0; i < 16; i++) |
| dst[i] = i + 16; |
| isize = INTVAL (ops[3]); |
| if (isize == 1) |
| shift = 3; |
| else if (isize == 2) |
| shift = 2; |
| else |
| shift = 0; |
| offset = (INTVAL (ops[2]) + |
| (GET_CODE (ops[1]) == |
| CONST_INT ? INTVAL (ops[1]) : 0)) & 15; |
| for (i = 0; i < isize; i++) |
| dst[offset + i] = i + shift; |
| return array_to_constant (TImode, dst); |
| } |
| |
| /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte |
| array. Use MODE for CONST_INT's. When the constant's mode is smaller |
| than 16 bytes, the value is repeated across the rest of the array. */ |
| void |
| constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16]) |
| { |
| HOST_WIDE_INT val; |
| int i, j, first; |
| |
| memset (arr, 0, 16); |
| mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode; |
| if (GET_CODE (x) == CONST_INT |
| || (GET_CODE (x) == CONST_DOUBLE |
| && (mode == SFmode || mode == DFmode))) |
| { |
| gcc_assert (mode != VOIDmode && mode != BLKmode); |
| |
| if (GET_CODE (x) == CONST_DOUBLE) |
| val = const_double_to_hwint (x); |
| else |
| val = INTVAL (x); |
| first = GET_MODE_SIZE (mode) - 1; |
| for (i = first; i >= 0; i--) |
| { |
| arr[i] = val & 0xff; |
| val >>= 8; |
| } |
| /* Splat the constant across the whole array. */ |
| for (j = 0, i = first + 1; i < 16; i++) |
| { |
| arr[i] = arr[j]; |
| j = (j == first) ? 0 : j + 1; |
| } |
| } |
| else if (GET_CODE (x) == CONST_DOUBLE) |
| { |
| val = CONST_DOUBLE_LOW (x); |
| for (i = 15; i >= 8; i--) |
| { |
| arr[i] = val & 0xff; |
| val >>= 8; |
| } |
| val = CONST_DOUBLE_HIGH (x); |
| for (i = 7; i >= 0; i--) |
| { |
| arr[i] = val & 0xff; |
| val >>= 8; |
| } |
| } |
| else if (GET_CODE (x) == CONST_VECTOR) |
| { |
| int units; |
| rtx elt; |
| mode = GET_MODE_INNER (mode); |
| units = CONST_VECTOR_NUNITS (x); |
| for (i = 0; i < units; i++) |
| { |
| elt = CONST_VECTOR_ELT (x, i); |
| if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE) |
| { |
| if (GET_CODE (elt) == CONST_DOUBLE) |
| val = const_double_to_hwint (elt); |
| else |
| val = INTVAL (elt); |
| first = GET_MODE_SIZE (mode) - 1; |
| if (first + i * GET_MODE_SIZE (mode) > 16) |
| abort (); |
| for (j = first; j >= 0; j--) |
| { |
| arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff; |
| val >>= 8; |
| } |
| } |
| } |
| } |
| else |
| gcc_unreachable(); |
| } |
| |
| /* Convert a 16 byte array to a constant of mode MODE. When MODE is |
| smaller than 16 bytes, use the bytes that would represent that value |
| in a register, e.g., for QImode return the value of arr[3]. */ |
| rtx |
| array_to_constant (enum machine_mode mode, unsigned char arr[16]) |
| { |
| enum machine_mode inner_mode; |
| rtvec v; |
| int units, size, i, j, k; |
| HOST_WIDE_INT val; |
| |
| if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) |
| { |
| j = GET_MODE_SIZE (mode); |
| i = j < 4 ? 4 - j : 0; |
| for (val = 0; i < j; i++) |
| val = (val << 8) | arr[i]; |
| val = trunc_int_for_mode (val, mode); |
| return GEN_INT (val); |
| } |
| |
| if (mode == TImode) |
| { |
| HOST_WIDE_INT high; |
| for (i = high = 0; i < 8; i++) |
| high = (high << 8) | arr[i]; |
| for (i = 8, val = 0; i < 16; i++) |
| val = (val << 8) | arr[i]; |
| return immed_double_const (val, high, TImode); |
| } |
| if (mode == SFmode) |
| { |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val = trunc_int_for_mode (val, SImode); |
| return hwint_to_const_double (SFmode, val); |
| } |
| if (mode == DFmode) |
| { |
| val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; |
| val <<= 32; |
| val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7]; |
| return hwint_to_const_double (DFmode, val); |
| } |
| |
| if (!VECTOR_MODE_P (mode)) |
| abort (); |
| |
| units = GET_MODE_NUNITS (mode); |
| size = GET_MODE_UNIT_SIZE (mode); |
| inner_mode = GET_MODE_INNER (mode); |
| v = rtvec_alloc (units); |
| |
| for (k = i = 0; i < units; ++i) |
| { |
| val = 0; |
| for (j = 0; j < size; j++, k++) |
| val = (val << 8) | arr[k]; |
| |
| if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) |
| RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val); |
| else |
| RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode)); |
| } |
| if (k > 16) |
| abort (); |
| |
| return gen_rtx_CONST_VECTOR (mode, v); |
| } |
| |
| static void |
| reloc_diagnostic (rtx x) |
| { |
| tree loc_decl, decl = 0; |
| const char *msg; |
| if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC)) |
| return; |
| |
| if (GET_CODE (x) == SYMBOL_REF) |
| decl = SYMBOL_REF_DECL (x); |
| else if (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) |
| decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0)); |
| |
| /* SYMBOL_REF_DECL is not necessarily a DECL. */ |
| if (decl && !DECL_P (decl)) |
| decl = 0; |
| |
| /* We use last_assemble_variable_decl to get line information. It's |
| not always going to be right and might not even be close, but will |
| be right for the more common cases. */ |
| if (!last_assemble_variable_decl) |
| loc_decl = decl; |
| else |
| loc_decl = last_assemble_variable_decl; |
| |
| /* The decl could be a string constant. */ |
| if (decl && DECL_P (decl)) |
| msg = "%Jcreating run-time relocation for %qD"; |
| else |
| msg = "creating run-time relocation"; |
| |
| if (TARGET_WARN_RELOC) |
| warning (0, msg, loc_decl, decl); |
| else |
| error (msg, loc_decl, decl); |
| } |
| |
| /* Hook into assemble_integer so we can generate an error for run-time |
| relocations. The SPU ABI disallows them. */ |
| static bool |
| spu_assemble_integer (rtx x, unsigned int size, int aligned_p) |
| { |
| /* By default run-time relocations aren't supported, but we allow them |
| in case users support it in their own run-time loader. And we provide |
| a warning for those users that don't. */ |
| if ((GET_CODE (x) == SYMBOL_REF) |
| || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST) |
| reloc_diagnostic (x); |
| |
| return default_assemble_integer (x, size, aligned_p); |
| } |
| |
| static void |
| spu_asm_globalize_label (FILE * file, const char *name) |
| { |
| fputs ("\t.global\t", file); |
| assemble_name (file, name); |
| fputs ("\n", file); |
| } |
| |
| static bool |
| spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| int cost = COSTS_N_INSNS (2); |
| |
| /* Folding to a CONST_VECTOR will use extra space but there might |
| be only a small savings in cycles. We'd like to use a CONST_VECTOR |
| only if it allows us to fold away multiple insns. Changing the cost |
| of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though |
| because this cost will only be compared against a single insn. |
| if (code == CONST_VECTOR) |
| return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6); |
| */ |
| |
| /* Use defaults for float operations. Not accurate but good enough. */ |
| if (mode == DFmode) |
| { |
| *total = COSTS_N_INSNS (13); |
| return true; |
| } |
| if (mode == SFmode) |
| { |
| *total = COSTS_N_INSNS (6); |
| return true; |
| } |
| switch (code) |
| { |
| case CONST_INT: |
| if (satisfies_constraint_K (x)) |
| *total = 0; |
| else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll) |
| *total = COSTS_N_INSNS (1); |
| else |
| *total = COSTS_N_INSNS (3); |
| return true; |
| |
| case CONST: |
| *total = COSTS_N_INSNS (3); |
| return true; |
| |
| case LABEL_REF: |
| case SYMBOL_REF: |
| *total = COSTS_N_INSNS (0); |
| return true; |
| |
| case CONST_DOUBLE: |
| *total = COSTS_N_INSNS (5); |
| return true; |
| |
| case FLOAT_EXTEND: |
| case FLOAT_TRUNCATE: |
| case FLOAT: |
| case UNSIGNED_FLOAT: |
| case FIX: |
| case UNSIGNED_FIX: |
| *total = COSTS_N_INSNS (7); |
| return true; |
| |
| case PLUS: |
| if (mode == TImode) |
| { |
| *total = COSTS_N_INSNS (9); |
| return true; |
| } |
| break; |
| |
| case MULT: |
| cost = |
| GET_CODE (XEXP (x, 0)) == |
| REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7); |
| if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG) |
| { |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT) |
| { |
| HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); |
| cost = COSTS_N_INSNS (14); |
| if ((val & 0xffff) == 0) |
| cost = COSTS_N_INSNS (9); |
| else if (val > 0 && val < 0x10000) |
| cost = COSTS_N_INSNS (11); |
| } |
| } |
| *total = cost; |
| return true; |
| case DIV: |
| case UDIV: |
| case MOD: |
| case UMOD: |
| *total = COSTS_N_INSNS (20); |
| return true; |
| case ROTATE: |
| case ROTATERT: |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| *total = COSTS_N_INSNS (4); |
| return true; |
| case UNSPEC: |
| if (XINT (x, 1) == UNSPEC_CONVERT) |
| *total = COSTS_N_INSNS (0); |
| else |
| *total = COSTS_N_INSNS (4); |
| return true; |
| } |
| /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */ |
| if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl) |
| cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)) |
| * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); |
| *total = cost; |
| return true; |
| } |
| |
| enum machine_mode |
| spu_eh_return_filter_mode (void) |
| { |
| /* We would like this to be SImode, but sjlj exceptions seems to work |
| only with word_mode. */ |
| return TImode; |
| } |
| |
| /* Decide whether we can make a sibling call to a function. DECL is the |
| declaration of the function being targeted by the call and EXP is the |
| CALL_EXPR representing the call. */ |
| static bool |
| spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) |
| { |
| return decl && !TARGET_LARGE_MEM; |
| } |
| |
| /* We need to correctly update the back chain pointer and the Available |
| Stack Size (which is in the second slot of the sp register.) */ |
| void |
| spu_allocate_stack (rtx op0, rtx op1) |
| { |
| HOST_WIDE_INT v; |
| rtx chain = gen_reg_rtx (V4SImode); |
| rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx); |
| rtx sp = gen_reg_rtx (V4SImode); |
| rtx splatted = gen_reg_rtx (V4SImode); |
| rtx pat = gen_reg_rtx (TImode); |
| |
| /* copy the back chain so we can save it back again. */ |
| emit_move_insn (chain, stack_bot); |
| |
| op1 = force_reg (SImode, op1); |
| |
| v = 0x1020300010203ll; |
| emit_move_insn (pat, immed_double_const (v, v, TImode)); |
| emit_insn (gen_shufb (splatted, op1, op1, pat)); |
| |
| emit_insn (gen_spu_convert (sp, stack_pointer_rtx)); |
| emit_insn (gen_subv4si3 (sp, sp, splatted)); |
| |
| if (flag_stack_check) |
| { |
| rtx avail = gen_reg_rtx(SImode); |
| rtx result = gen_reg_rtx(SImode); |
| emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1))); |
| emit_insn (gen_cgt_si(result, avail, GEN_INT (-1))); |
| emit_insn (gen_spu_heq (result, GEN_INT(0) )); |
| } |
| |
| emit_insn (gen_spu_convert (stack_pointer_rtx, sp)); |
| |
| emit_move_insn (stack_bot, chain); |
| |
| emit_move_insn (op0, virtual_stack_dynamic_rtx); |
| } |
| |
| void |
| spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1) |
| { |
| static unsigned char arr[16] = |
| { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; |
| rtx temp = gen_reg_rtx (SImode); |
| rtx temp2 = gen_reg_rtx (SImode); |
| rtx temp3 = gen_reg_rtx (V4SImode); |
| rtx temp4 = gen_reg_rtx (V4SImode); |
| rtx pat = gen_reg_rtx (TImode); |
| rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); |
| |
| /* Restore the backchain from the first word, sp from the second. */ |
| emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0)); |
| emit_move_insn (temp, adjust_address_nv (op1, SImode, 4)); |
| |
| emit_move_insn (pat, array_to_constant (TImode, arr)); |
| |
| /* Compute Available Stack Size for sp */ |
| emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx)); |
| emit_insn (gen_shufb (temp3, temp, temp, pat)); |
| |
| /* Compute Available Stack Size for back chain */ |
| emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx)); |
| emit_insn (gen_shufb (temp4, temp2, temp2, pat)); |
| emit_insn (gen_addv4si3 (temp4, sp, temp4)); |
| |
| emit_insn (gen_addv4si3 (sp, sp, temp3)); |
| emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4); |
| } |
| |
| static void |
| spu_init_libfuncs (void) |
| { |
| set_optab_libfunc (smul_optab, DImode, "__muldi3"); |
| set_optab_libfunc (sdiv_optab, DImode, "__divdi3"); |
| set_optab_libfunc (smod_optab, DImode, "__moddi3"); |
| set_optab_libfunc (udiv_optab, DImode, "__udivdi3"); |
| set_optab_libfunc (umod_optab, DImode, "__umoddi3"); |
| set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4"); |
| set_optab_libfunc (ffs_optab, DImode, "__ffsdi2"); |
| set_optab_libfunc (clz_optab, DImode, "__clzdi2"); |
| set_optab_libfunc (ctz_optab, DImode, "__ctzdi2"); |
| set_optab_libfunc (popcount_optab, DImode, "__popcountdi2"); |
| set_optab_libfunc (parity_optab, DImode, "__paritydi2"); |
| |
| set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf"); |
| set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf"); |
| } |
| |
| /* Make a subreg, stripping any existing subreg. We could possibly just |
| call simplify_subreg, but in this case we know what we want. */ |
| rtx |
| spu_gen_subreg (enum machine_mode mode, rtx x) |
| { |
| if (GET_CODE (x) == SUBREG) |
| x = SUBREG_REG (x); |
| if (GET_MODE (x) == mode) |
| return x; |
| return gen_rtx_SUBREG (mode, x, 0); |
| } |
| |
| static bool |
| spu_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED) |
| { |
| return (TYPE_MODE (type) == BLKmode |
| && ((type) == 0 |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST |
| || int_size_in_bytes (type) > |
| (MAX_REGISTER_RETURN * UNITS_PER_WORD))); |
| } |
| |
| /* Create the built-in types and functions */ |
| |
| struct spu_builtin_description spu_builtins[] = { |
| #define DEF_BUILTIN(fcode, icode, name, type, params) \ |
| {fcode, icode, name, type, params, NULL_TREE}, |
| #include "spu-builtins.def" |
| #undef DEF_BUILTIN |
| }; |
| |
| static void |
| spu_init_builtins (void) |
| { |
| struct spu_builtin_description *d; |
| unsigned int i; |
| |
| V16QI_type_node = build_vector_type (intQI_type_node, 16); |
| V8HI_type_node = build_vector_type (intHI_type_node, 8); |
| V4SI_type_node = build_vector_type (intSI_type_node, 4); |
| V2DI_type_node = build_vector_type (intDI_type_node, 2); |
| V4SF_type_node = build_vector_type (float_type_node, 4); |
| V2DF_type_node = build_vector_type (double_type_node, 2); |
| |
| unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16); |
| unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8); |
| unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4); |
| unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2); |
| |
| spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node; |
| |
| spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE]; |
| |
| spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE]; |
| spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE]; |
| spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE]; |
| spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE]; |
| spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE]; |
| spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE]; |
| spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE]; |
| |
| spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE]; |
| spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE]; |
| |
| spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE]; |
| |
| spu_builtin_types[SPU_BTI_PTR] = |
| build_pointer_type (build_qualified_type |
| (void_type_node, |
| TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); |
| |
| /* For each builtin we build a new prototype. The tree code will make |
| sure nodes are shared. */ |
| for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++) |
| { |
| tree p; |
| char name[64]; /* build_function will make a copy. */ |
| int parm; |
| |
| if (d->name == 0) |
| continue; |
| |
| /* find last parm */ |
| for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++) |
| { |
| } |
| |
| p = void_list_node; |
| while (parm > 1) |
| p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p); |
| |
| p = build_function_type (spu_builtin_types[d->parm[0]], p); |
| |
| sprintf (name, "__builtin_%s", d->name); |
| d->fndecl = |
| builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD, |
| NULL, NULL_TREE); |
| if (d->fcode == SPU_MASK_FOR_LOAD) |
| TREE_READONLY (d->fndecl) = 1; |
| } |
| } |
| |
| int |
| spu_safe_dma (HOST_WIDE_INT channel) |
| { |
| return (channel >= 21 && channel <= 27); |
| } |
| |
| void |
| spu_builtin_splats (rtx ops[]) |
| { |
| enum machine_mode mode = GET_MODE (ops[0]); |
| if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE) |
| { |
| unsigned char arr[16]; |
| constant_to_array (GET_MODE_INNER (mode), ops[1], arr); |
| emit_move_insn (ops[0], array_to_constant (mode, arr)); |
| } |
| else if (GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1])) |
| { |
| rtvec v = rtvec_alloc (4); |
| RTVEC_ELT (v, 0) = ops[1]; |
| RTVEC_ELT (v, 1) = ops[1]; |
| RTVEC_ELT (v, 2) = ops[1]; |
| RTVEC_ELT (v, 3) = ops[1]; |
| emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v)); |
| } |
| else |
| { |
| rtx reg = gen_reg_rtx (TImode); |
| rtx shuf; |
| if (GET_CODE (ops[1]) != REG |
| && GET_CODE (ops[1]) != SUBREG) |
| ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]); |
| switch (mode) |
| { |
| case V2DImode: |
| case V2DFmode: |
| shuf = |
| immed_double_const (0x0001020304050607ll, 0x1011121314151617ll, |
| TImode); |
| break; |
| case V4SImode: |
| case V4SFmode: |
| shuf = |
| immed_double_const (0x0001020300010203ll, 0x0001020300010203ll, |
| TImode); |
| break; |
| case V8HImode: |
| shuf = |
| immed_double_const (0x0203020302030203ll, 0x0203020302030203ll, |
| TImode); |
| break; |
| case V16QImode: |
| shuf = |
| immed_double_const (0x0303030303030303ll, 0x0303030303030303ll, |
| TImode); |
| break; |
| default: |
| abort (); |
| } |
| emit_move_insn (reg, shuf); |
| emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg)); |
| } |
| } |
| |
| void |
| spu_builtin_extract (rtx ops[]) |
| { |
| enum machine_mode mode; |
| rtx rot, from, tmp; |
| |
| mode = GET_MODE (ops[1]); |
| |
| if (GET_CODE (ops[2]) == CONST_INT) |
| { |
| switch (mode) |
| { |
| case V16QImode: |
| emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2])); |
| break; |
| case V8HImode: |
| emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2])); |
| break; |
| case V4SFmode: |
| emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2])); |
| break; |
| case V4SImode: |
| emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2])); |
| break; |
| case V2DImode: |
| emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2])); |
| break; |
| case V2DFmode: |
| emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2])); |
| break; |
| default: |
| abort (); |
| } |
| return; |
| } |
| |
| from = spu_gen_subreg (TImode, ops[1]); |
| rot = gen_reg_rtx (TImode); |
| tmp = gen_reg_rtx (SImode); |
| |
| switch (mode) |
| { |
| case V16QImode: |
| emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3))); |
| break; |
| case V8HImode: |
| emit_insn (gen_addsi3 (tmp, ops[2], ops[2])); |
| emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2))); |
| break; |
| case V4SFmode: |
| case V4SImode: |
| emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2))); |
| break; |
| case V2DImode: |
| case V2DFmode: |
| emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3))); |
| break; |
| default: |
| abort (); |
| } |
| emit_insn (gen_rotqby_ti (rot, from, tmp)); |
| |
| emit_insn (gen_spu_convert (ops[0], rot)); |
| } |
| |
| void |
| spu_builtin_insert (rtx ops[]) |
| { |
| enum machine_mode mode = GET_MODE (ops[0]); |
| enum machine_mode imode = GET_MODE_INNER (mode); |
| rtx mask = gen_reg_rtx (TImode); |
| rtx offset; |
| |
| if (GET_CODE (ops[3]) == CONST_INT) |
| offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode)); |
| else |
| { |
| offset = gen_reg_rtx (SImode); |
| emit_insn (gen_mulsi3 |
| (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode)))); |
| } |
| emit_insn (gen_cpat |
| (mask, stack_pointer_rtx, offset, |
| GEN_INT (GET_MODE_SIZE (imode)))); |
| emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask)); |
| } |
| |
| void |
| spu_builtin_promote (rtx ops[]) |
| { |
| enum machine_mode mode, imode; |
| rtx rot, from, offset; |
| HOST_WIDE_INT pos; |
| |
| mode = GET_MODE (ops[0]); |
| imode = GET_MODE_INNER (mode); |
| |
| from = gen_reg_rtx (TImode); |
| rot = spu_gen_subreg (TImode, ops[0]); |
| |
| emit_insn (gen_spu_convert (from, ops[1])); |
| |
| if (GET_CODE (ops[2]) == CONST_INT) |
| { |
| pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]); |
| if (GET_MODE_SIZE (imode) < 4) |
| pos += 4 - GET_MODE_SIZE (imode); |
| offset = GEN_INT (pos & 15); |
| } |
| else |
| { |
| offset = gen_reg_rtx (SImode); |
| switch (mode) |
| { |
| case V16QImode: |
| emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2])); |
| break; |
| case V8HImode: |
| emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2])); |
| emit_insn (gen_addsi3 (offset, offset, offset)); |
| break; |
| case V4SFmode: |
| case V4SImode: |
| emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2])); |
| emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2))); |
| break; |
| case V2DImode: |
| case V2DFmode: |
| emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3))); |
| break; |
| default: |
| abort (); |
| } |
| } |
| emit_insn (gen_rotqby_ti (rot, from, offset)); |
| } |
| |
| void |
| spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) |
| { |
| rtx shuf = gen_reg_rtx (V4SImode); |
| rtx insn = gen_reg_rtx (V4SImode); |
| rtx shufc; |
| rtx insnc; |
| rtx mem; |
| |
| fnaddr = force_reg (SImode, fnaddr); |
| cxt = force_reg (SImode, cxt); |
| |
| if (TARGET_LARGE_MEM) |
| { |
| rtx rotl = gen_reg_rtx (V4SImode); |
| rtx mask = gen_reg_rtx (V4SImode); |
| rtx bi = gen_reg_rtx (SImode); |
| unsigned char shufa[16] = { |
| 2, 3, 0, 1, 18, 19, 16, 17, |
| 0, 1, 2, 3, 16, 17, 18, 19 |
| }; |
| unsigned char insna[16] = { |
| 0x41, 0, 0, 79, |
| 0x41, 0, 0, STATIC_CHAIN_REGNUM, |
| 0x60, 0x80, 0, 79, |
| 0x60, 0x80, 0, STATIC_CHAIN_REGNUM |
| }; |
| |
| shufc = force_reg (TImode, array_to_constant (TImode, shufa)); |
| insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna)); |
| |
| emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc)); |
| emit_insn (gen_rotlv4si3 (rotl, shuf, spu_const (V4SImode, 7))); |
| emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7))); |
| emit_insn (gen_selb (insn, insnc, rotl, mask)); |
| |
| mem = memory_address (Pmode, tramp); |
| emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn); |
| |
| emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7))); |
| mem = memory_address (Pmode, plus_constant (tramp, 16)); |
| emit_move_insn (gen_rtx_MEM (Pmode, mem), bi); |
| } |
| else |
| { |
| rtx scxt = gen_reg_rtx (SImode); |
| rtx sfnaddr = gen_reg_rtx (SImode); |
| unsigned char insna[16] = { |
| 0x42, 0, 0, STATIC_CHAIN_REGNUM, |
| 0x30, 0, 0, 0, |
| 0, 0, 0, 0, |
| 0, 0, 0, 0 |
| }; |
| |
| shufc = gen_reg_rtx (TImode); |
| insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna)); |
| |
| /* By or'ing all of cxt with the ila opcode we are assuming cxt |
| fits 18 bits and the last 4 are zeros. This will be true if |
| the stack pointer is initialized to 0x3fff0 at program start, |
| otherwise the ila instruction will be garbage. */ |
| |
| emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7))); |
| emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5))); |
| emit_insn (gen_cpat |
| (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4))); |
| emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc)); |
| emit_insn (gen_iorv4si3 (insn, insnc, shuf)); |
| |
| mem = memory_address (Pmode, tramp); |
| emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn); |
| |
| } |
| emit_insn (gen_sync ()); |
| } |
| |
| void |
| spu_expand_sign_extend (rtx ops[]) |
| { |
| unsigned char arr[16]; |
| rtx pat = gen_reg_rtx (TImode); |
| rtx sign, c; |
| int i, last; |
| last = GET_MODE (ops[0]) == DImode ? 7 : 15; |
| if (GET_MODE (ops[1]) == QImode) |
| { |
| sign = gen_reg_rtx (HImode); |
| emit_insn (gen_extendqihi2 (sign, ops[1])); |
| for (i = 0; i < 16; i++) |
| arr[i] = 0x12; |
| arr[last] = 0x13; |
| } |
| else |
| { |
| for (i = 0; i < 16; i++) |
| arr[i] = 0x10; |
| switch (GET_MODE (ops[1])) |
| { |
| case HImode: |
| sign = gen_reg_rtx (SImode); |
| emit_insn (gen_extendhisi2 (sign, ops[1])); |
| arr[last] = 0x03; |
| arr[last - 1] = 0x02; |
| break; |
| case SImode: |
| sign = gen_reg_rtx (SImode); |
| emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31))); |
| for (i = 0; i < 4; i++) |
| arr[last - i] = 3 - i; |
| break; |
| case DImode: |
| sign = gen_reg_rtx (SImode); |
| c = gen_reg_rtx (SImode); |
| emit_insn (gen_spu_convert (c, ops[1])); |
| emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31))); |
| for (i = 0; i < 8; i++) |
| arr[last - i] = 7 - i; |
| break; |
| default: |
| abort (); |
| } |
| } |
| emit_move_insn (pat, array_to_constant (TImode, arr)); |
| emit_insn (gen_shufb (ops[0], ops[1], sign, pat)); |
| } |
| |
| /* expand vector initialization. If there are any constant parts, |
| load constant parts first. Then load any non-constant parts. */ |
| void |
| spu_expand_vector_init (rtx target, rtx vals) |
| { |
| enum machine_mode mode = GET_MODE (target); |
| int n_elts = GET_MODE_NUNITS (mode); |
| int n_var = 0; |
| bool all_same = true; |
| rtx first, x = NULL_RTX, first_constant = NULL_RTX; |
| int i; |
| |
| first = XVECEXP (vals, 0, 0); |
| for (i = 0; i < n_elts; ++i) |
| { |
| x = XVECEXP (vals, 0, i); |
| if (!CONSTANT_P (x)) |
| ++n_var; |
| else |
| { |
| if (first_constant == NULL_RTX) |
| first_constant = x; |
| } |
| if (i > 0 && !rtx_equal_p (x, first)) |
| all_same = false; |
| } |
| |
| /* if all elements are the same, use splats to repeat elements */ |
| if (all_same) |
| { |
| if (!CONSTANT_P (first) |
| && !register_operand (first, GET_MODE (x))) |
| first = force_reg (GET_MODE (first), first); |
| emit_insn (gen_spu_splats (target, first)); |
| return; |
| } |
| |
| /* load constant parts */ |
| if (n_var != n_elts) |
| { |
| if (n_var == 0) |
| { |
| emit_move_insn (target, |
| gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); |
| } |
| else |
| { |
| rtx constant_parts_rtx = copy_rtx (vals); |
| |
| gcc_assert (first_constant != NULL_RTX); |
| /* fill empty slots with the first constant, this increases |
| our chance of using splats in the recursive call below. */ |
| for (i = 0; i < n_elts; ++i) |
| if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i))) |
| XVECEXP (constant_parts_rtx, 0, i) = first_constant; |
| |
| spu_expand_vector_init (target, constant_parts_rtx); |
| } |
| } |
| |
| /* load variable parts */ |
| if (n_var != 0) |
| { |
| rtx insert_operands[4]; |
| |
| insert_operands[0] = target; |
| insert_operands[2] = target; |
| for (i = 0; i < n_elts; ++i) |
| { |
| x = XVECEXP (vals, 0, i); |
| if (!CONSTANT_P (x)) |
| { |
| if (!register_operand (x, GET_MODE (x))) |
| x = force_reg (GET_MODE (x), x); |
| insert_operands[1] = x; |
| insert_operands[3] = GEN_INT (i); |
| spu_builtin_insert (insert_operands); |
| } |
| } |
| } |
| } |
| |
| static rtx |
| spu_force_reg (enum machine_mode mode, rtx op) |
| { |
| rtx x, r; |
| if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode) |
| { |
| if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT) |
| || GET_MODE (op) == BLKmode) |
| return force_reg (mode, convert_to_mode (mode, op, 0)); |
| abort (); |
| } |
| |
| r = force_reg (GET_MODE (op), op); |
| if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode)) |
| { |
| x = simplify_gen_subreg (mode, r, GET_MODE (op), 0); |
| if (x) |
| return x; |
| } |
| |
| x = gen_reg_rtx (mode); |
| emit_insn (gen_spu_convert (x, r)); |
| return x; |
| } |
| |
| static void |
| spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p) |
| { |
| HOST_WIDE_INT v = 0; |
| int lsbits; |
| /* Check the range of immediate operands. */ |
| if (p >= SPU_BTI_7 && p <= SPU_BTI_U18) |
| { |
| int range = p - SPU_BTI_7; |
| if (!CONSTANT_P (op) |
| || (GET_CODE (op) == CONST_INT |
| && (INTVAL (op) < spu_builtin_range[range].low |
| || INTVAL (op) > spu_builtin_range[range].high))) |
| error ("%s expects an integer literal in the range [%d, %d].", |
| d->name, |
| spu_builtin_range[range].low, spu_builtin_range[range].high); |
| |
| if (GET_CODE (op) == CONST |
| && (GET_CODE (XEXP (op, 0)) == PLUS |
| || GET_CODE (XEXP (op, 0)) == MINUS)) |
| { |
| v = INTVAL (XEXP (XEXP (op, 0), 1)); |
| op = XEXP (XEXP (op, 0), 0); |
| } |
| else if (GET_CODE (op) == CONST_INT) |
| v = INTVAL (op); |
| |
| switch (p) |
| { |
| case SPU_BTI_S10_4: |
| lsbits = 4; |
| break; |
| case SPU_BTI_U16_2: |
| /* This is only used in lqa, and stqa. Even though the insns |
| encode 16 bits of the address (all but the 2 least |
| significant), only 14 bits are used because it is masked to |
| be 16 byte aligned. */ |
| lsbits = 4; |
| break; |
| case SPU_BTI_S16_2: |
| /* This is used for lqr and stqr. */ |
| lsbits = 2; |
| break; |
| default: |
| lsbits = 0; |
| } |
| |
| if (GET_CODE (op) == LABEL_REF |
| || (GET_CODE (op) == SYMBOL_REF |
| && SYMBOL_REF_FUNCTION_P (op)) |
| || (INTVAL (op) & ((1 << lsbits) - 1)) != 0) |
| warning (0, "%d least significant bits of %s are ignored.", lsbits, |
| d->name); |
| } |
| } |
| |
| |
| static void |
| expand_builtin_args (struct spu_builtin_description *d, tree arglist, |
| rtx target, rtx ops[]) |
| { |
| enum insn_code icode = d->icode; |
| int i = 0; |
| |
| /* Expand the arguments into rtl. */ |
| |
| if (d->parm[0] != SPU_BTI_VOID) |
| ops[i++] = target; |
| |
| for (; i < insn_data[icode].n_operands; i++) |
| { |
| tree arg = TREE_VALUE (arglist); |
| if (arg == 0) |
| abort (); |
| ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0); |
| arglist = TREE_CHAIN (arglist); |
| } |
| } |
| |
| static rtx |
| spu_expand_builtin_1 (struct spu_builtin_description *d, |
| tree arglist, rtx target) |
| { |
| rtx pat; |
| rtx ops[8]; |
| enum insn_code icode = d->icode; |
| enum machine_mode mode, tmode; |
| int i, p; |
| tree return_type; |
| |
| /* Set up ops[] with values from arglist. */ |
| expand_builtin_args (d, arglist, target, ops); |
| |
| /* Handle the target operand which must be operand 0. */ |
| i = 0; |
| if (d->parm[0] != SPU_BTI_VOID) |
| { |
| |
| /* We prefer the mode specified for the match_operand otherwise |
| use the mode from the builtin function prototype. */ |
| tmode = insn_data[d->icode].operand[0].mode; |
| if (tmode == VOIDmode) |
| tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]); |
| |
| /* Try to use target because not using it can lead to extra copies |
| and when we are using all of the registers extra copies leads |
| to extra spills. */ |
| if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode) |
| ops[0] = target; |
| else |
| target = ops[0] = gen_reg_rtx (tmode); |
| |
| if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode)) |
| abort (); |
| |
| i++; |
| } |
| |
| if (d->fcode == SPU_MASK_FOR_LOAD) |
| { |
| enum machine_mode mode = insn_data[icode].operand[1].mode; |
| tree arg; |
| rtx addr, op, pat; |
| |
| /* get addr */ |
| arg = TREE_VALUE (arglist); |
| gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE); |
| op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); |
| addr = memory_address (mode, op); |
| |
| /* negate addr */ |
| op = gen_reg_rtx (GET_MODE (addr)); |
| emit_insn (gen_rtx_SET (VOIDmode, op, |
| gen_rtx_NEG (GET_MODE (addr), addr))); |
| op = gen_rtx_MEM (mode, op); |
| |
| pat = GEN_FCN (icode) (target, op); |
| if (!pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Ignore align_hint, but still expand it's args in case they have |
| side effects. */ |
| if (icode == CODE_FOR_spu_align_hint) |
| return 0; |
| |
| /* Handle the rest of the operands. */ |
| for (p = 1; i < insn_data[icode].n_operands; i++, p++) |
| { |
| if (insn_data[d->icode].operand[i].mode != VOIDmode) |
| mode = insn_data[d->icode].operand[i].mode; |
| else |
| mode = TYPE_MODE (spu_builtin_types[d->parm[i]]); |
| |
| /* mode can be VOIDmode here for labels */ |
| |
| /* For specific intrinsics with an immediate operand, e.g., |
| si_ai(), we sometimes need to convert the scalar argument to a |
| vector argument by splatting the scalar. */ |
| if (VECTOR_MODE_P (mode) |
| && (GET_CODE (ops[i]) == CONST_INT |
| || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT |
| || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT)) |
| { |
| if (GET_CODE (ops[i]) == CONST_INT) |
| ops[i] = spu_const (mode, INTVAL (ops[i])); |
| else |
| { |
| rtx reg = gen_reg_rtx (mode); |
| enum machine_mode imode = GET_MODE_INNER (mode); |
| if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i]))) |
| ops[i] = force_reg (GET_MODE (ops[i]), ops[i]); |
| if (imode != GET_MODE (ops[i])) |
| ops[i] = convert_to_mode (imode, ops[i], |
| TYPE_UNSIGNED (spu_builtin_types |
| [d->parm[i]])); |
| emit_insn (gen_spu_splats (reg, ops[i])); |
| ops[i] = reg; |
| } |
| } |
| |
| if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode)) |
| ops[i] = spu_force_reg (mode, ops[i]); |
| |
| spu_check_builtin_parm (d, ops[i], d->parm[p]); |
| } |
| |
| switch (insn_data[icode].n_operands) |
| { |
| case 0: |
| pat = GEN_FCN (icode) (0); |
| break; |
| case 1: |
| pat = GEN_FCN (icode) (ops[0]); |
| break; |
| case 2: |
| pat = GEN_FCN (icode) (ops[0], ops[1]); |
| break; |
| case 3: |
| pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]); |
| break; |
| case 4: |
| pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]); |
| break; |
| case 5: |
| pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]); |
| break; |
| case 6: |
| pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]); |
| break; |
| default: |
| abort (); |
| } |
| |
| if (!pat) |
| abort (); |
| |
| if (d->type == B_CALL || d->type == B_BISLED) |
| emit_call_insn (pat); |
| else if (d->type == B_JUMP) |
| { |
| emit_jump_insn (pat); |
| emit_barrier (); |
| } |
| else |
| emit_insn (pat); |
| |
| return_type = spu_builtin_types[d->parm[0]]; |
| if (d->parm[0] != SPU_BTI_VOID |
| && GET_MODE (target) != TYPE_MODE (return_type)) |
| { |
| /* target is the return value. It should always be the mode of |
| the builtin function prototype. */ |
| target = spu_force_reg (TYPE_MODE (return_type), target); |
| } |
| |
| return target; |
| } |
| |
| rtx |
| spu_expand_builtin (tree exp, |
| rtx target, |
| rtx subtarget ATTRIBUTE_UNUSED, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| int ignore ATTRIBUTE_UNUSED) |
| { |
| tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); |
| unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS; |
| tree arglist = TREE_OPERAND (exp, 1); |
| struct spu_builtin_description *d; |
| |
| if (fcode < NUM_SPU_BUILTINS) |
| { |
| d = &spu_builtins[fcode]; |
| |
| return spu_expand_builtin_1 (d, arglist, target); |
| } |
| abort (); |
| } |
| |
| /* LLVM LOCAL begin */ |
| #ifdef INSN_SCHEDULING |
| /* Implement targetm.vectorize.builtin_mul_widen_even. */ |
| static tree |
| spu_builtin_mul_widen_even (tree type) |
| { |
| switch (TYPE_MODE (type)) |
| { |
| case V8HImode: |
| if (TYPE_UNSIGNED (type)) |
| return spu_builtins[SPU_MULE_0].fndecl; |
| else |
| return spu_builtins[SPU_MULE_1].fndecl; |
| break; |
| default: |
| return NULL_TREE; |
| } |
| } |
| |
| /* Implement targetm.vectorize.builtin_mul_widen_odd. */ |
| static tree |
| spu_builtin_mul_widen_odd (tree type) |
| { |
| switch (TYPE_MODE (type)) |
| { |
| case V8HImode: |
| if (TYPE_UNSIGNED (type)) |
| return spu_builtins[SPU_MULO_1].fndecl; |
| else |
| return spu_builtins[SPU_MULO_0].fndecl; |
| break; |
| default: |
| return NULL_TREE; |
| } |
| } |
| #endif /* INSN_SCHEDULING */ |
| /* LLVM LOCAL end */ |
| |
| /* Implement targetm.vectorize.builtin_mask_for_load. */ |
| static tree |
| spu_builtin_mask_for_load (void) |
| { |
| struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD]; |
| gcc_assert (d); |
| return d->fndecl; |
| } |