| /* Perform various loop optimizations, including strength reduction. |
| Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, |
| 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 |
| Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 2, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING. If not, write to the Free |
| Software Foundation, 59 Temple Place - Suite 330, Boston, MA |
| 02111-1307, USA. */ |
| |
| /* This is the loop optimization pass of the compiler. |
| It finds invariant computations within loops and moves them |
| to the beginning of the loop. Then it identifies basic and |
| general induction variables. |
| |
| Basic induction variables (BIVs) are a pseudo registers which are set within |
| a loop only by incrementing or decrementing its value. General induction |
| variables (GIVs) are pseudo registers with a value which is a linear function |
| of a basic induction variable. BIVs are recognized by `basic_induction_var'; |
| GIVs by `general_induction_var'. |
| |
| Once induction variables are identified, strength reduction is applied to the |
| general induction variables, and induction variable elimination is applied to |
| the basic induction variables. |
| |
| It also finds cases where |
| a register is set within the loop by zero-extending a narrower value |
| and changes these to zero the entire register once before the loop |
| and merely copy the low part within the loop. |
| |
| Most of the complexity is in heuristics to decide when it is worth |
| while to do these things. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "rtl.h" |
| #include "tm_p.h" |
| #include "function.h" |
| #include "expr.h" |
| #include "hard-reg-set.h" |
| #include "basic-block.h" |
| #include "insn-config.h" |
| #include "regs.h" |
| #include "recog.h" |
| #include "flags.h" |
| #include "real.h" |
| #include "cselib.h" |
| #include "except.h" |
| #include "toplev.h" |
| #include "predict.h" |
| #include "insn-flags.h" |
| #include "optabs.h" |
| #include "cfgloop.h" |
| #include "ggc.h" |
| |
| /* Get the loop info pointer of a loop. */ |
| #define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux) |
| |
| /* Get a pointer to the loop movables structure. */ |
| #define LOOP_MOVABLES(LOOP) (&LOOP_INFO (LOOP)->movables) |
| |
| /* Get a pointer to the loop registers structure. */ |
| #define LOOP_REGS(LOOP) (&LOOP_INFO (LOOP)->regs) |
| |
| /* Get a pointer to the loop induction variables structure. */ |
| #define LOOP_IVS(LOOP) (&LOOP_INFO (LOOP)->ivs) |
| |
| /* Get the luid of an insn. Catch the error of trying to reference the LUID |
| of an insn added during loop, since these don't have LUIDs. */ |
| |
| #define INSN_LUID(INSN) \ |
| (INSN_UID (INSN) < max_uid_for_loop ? uid_luid[INSN_UID (INSN)] \ |
| : (abort (), -1)) |
| |
| #define REGNO_FIRST_LUID(REGNO) \ |
| (REGNO_FIRST_UID (REGNO) < max_uid_for_loop \ |
| ? uid_luid[REGNO_FIRST_UID (REGNO)] \ |
| : 0) |
| #define REGNO_LAST_LUID(REGNO) \ |
| (REGNO_LAST_UID (REGNO) < max_uid_for_loop \ |
| ? uid_luid[REGNO_LAST_UID (REGNO)] \ |
| : INT_MAX) |
| |
| /* A "basic induction variable" or biv is a pseudo reg that is set |
| (within this loop) only by incrementing or decrementing it. */ |
| /* A "general induction variable" or giv is a pseudo reg whose |
| value is a linear function of a biv. */ |
| |
| /* Bivs are recognized by `basic_induction_var'; |
| Givs by `general_induction_var'. */ |
| |
| /* An enum for the two different types of givs, those that are used |
| as memory addresses and those that are calculated into registers. */ |
| enum g_types |
| { |
| DEST_ADDR, |
| DEST_REG |
| }; |
| |
| |
| /* A `struct induction' is created for every instruction that sets |
| an induction variable (either a biv or a giv). */ |
| |
| struct induction |
| { |
| rtx insn; /* The insn that sets a biv or giv */ |
| rtx new_reg; /* New register, containing strength reduced |
| version of this giv. */ |
| rtx src_reg; /* Biv from which this giv is computed. |
| (If this is a biv, then this is the biv.) */ |
| enum g_types giv_type; /* Indicate whether DEST_ADDR or DEST_REG */ |
| rtx dest_reg; /* Destination register for insn: this is the |
| register which was the biv or giv. |
| For a biv, this equals src_reg. |
| For a DEST_ADDR type giv, this is 0. */ |
| rtx *location; /* Place in the insn where this giv occurs. |
| If GIV_TYPE is DEST_REG, this is 0. */ |
| /* For a biv, this is the place where add_val |
| was found. */ |
| enum machine_mode mode; /* The mode of this biv or giv */ |
| rtx mem; /* For DEST_ADDR, the memory object. */ |
| rtx mult_val; /* Multiplicative factor for src_reg. */ |
| rtx add_val; /* Additive constant for that product. */ |
| int benefit; /* Gain from eliminating this insn. */ |
| rtx final_value; /* If the giv is used outside the loop, and its |
| final value could be calculated, it is put |
| here, and the giv is made replaceable. Set |
| the giv to this value before the loop. */ |
| unsigned combined_with; /* The number of givs this giv has been |
| combined with. If nonzero, this giv |
| cannot combine with any other giv. */ |
| unsigned replaceable : 1; /* 1 if we can substitute the strength-reduced |
| variable for the original variable. |
| 0 means they must be kept separate and the |
| new one must be copied into the old pseudo |
| reg each time the old one is set. */ |
| unsigned not_replaceable : 1; /* Used to prevent duplicating work. This is |
| 1 if we know that the giv definitely can |
| not be made replaceable, in which case we |
| don't bother checking the variable again |
| even if further info is available. |
| Both this and the above can be zero. */ |
| unsigned ignore : 1; /* 1 prohibits further processing of giv */ |
| unsigned always_computable : 1;/* 1 if this value is computable every |
| iteration. */ |
| unsigned always_executed : 1; /* 1 if this set occurs each iteration. */ |
| unsigned maybe_multiple : 1; /* Only used for a biv and 1 if this biv |
| update may be done multiple times per |
| iteration. */ |
| unsigned cant_derive : 1; /* For giv's, 1 if this giv cannot derive |
| another giv. This occurs in many cases |
| where a giv's lifetime spans an update to |
| a biv. */ |
| unsigned maybe_dead : 1; /* 1 if this giv might be dead. In that case, |
| we won't use it to eliminate a biv, it |
| would probably lose. */ |
| unsigned auto_inc_opt : 1; /* 1 if this giv had its increment output next |
| to it to try to form an auto-inc address. */ |
| unsigned shared : 1; |
| unsigned no_const_addval : 1; /* 1 if add_val does not contain a const. */ |
| int lifetime; /* Length of life of this giv */ |
| rtx derive_adjustment; /* If nonzero, is an adjustment to be |
| subtracted from add_val when this giv |
| derives another. This occurs when the |
| giv spans a biv update by incrementation. */ |
| rtx ext_dependent; /* If nonzero, is a sign or zero extension |
| if a biv on which this giv is dependent. */ |
| struct induction *next_iv; /* For givs, links together all givs that are |
| based on the same biv. For bivs, links |
| together all biv entries that refer to the |
| same biv register. */ |
| struct induction *same; /* For givs, if the giv has been combined with |
| another giv, this points to the base giv. |
| The base giv will have COMBINED_WITH nonzero. |
| For bivs, if the biv has the same LOCATION |
| than another biv, this points to the base |
| biv. */ |
| struct induction *same_insn; /* If there are multiple identical givs in |
| the same insn, then all but one have this |
| field set, and they all point to the giv |
| that doesn't have this field set. */ |
| rtx last_use; /* For a giv made from a biv increment, this is |
| a substitute for the lifetime information. */ |
| }; |
| |
| |
| /* A `struct iv_class' is created for each biv. */ |
| |
| struct iv_class |
| { |
| unsigned int regno; /* Pseudo reg which is the biv. */ |
| int biv_count; /* Number of insns setting this reg. */ |
| struct induction *biv; /* List of all insns that set this reg. */ |
| int giv_count; /* Number of DEST_REG givs computed from this |
| biv. The resulting count is only used in |
| check_dbra_loop. */ |
| struct induction *giv; /* List of all insns that compute a giv |
| from this reg. */ |
| int total_benefit; /* Sum of BENEFITs of all those givs. */ |
| rtx initial_value; /* Value of reg at loop start. */ |
| rtx initial_test; /* Test performed on BIV before loop. */ |
| rtx final_value; /* Value of reg at loop end, if known. */ |
| struct iv_class *next; /* Links all class structures together. */ |
| rtx init_insn; /* insn which initializes biv, 0 if none. */ |
| rtx init_set; /* SET of INIT_INSN, if any. */ |
| unsigned incremented : 1; /* 1 if somewhere incremented/decremented */ |
| unsigned eliminable : 1; /* 1 if plausible candidate for |
| elimination. */ |
| unsigned nonneg : 1; /* 1 if we added a REG_NONNEG note for |
| this. */ |
| unsigned reversed : 1; /* 1 if we reversed the loop that this |
| biv controls. */ |
| unsigned all_reduced : 1; /* 1 if all givs using this biv have |
| been reduced. */ |
| }; |
| |
| |
| /* Definitions used by the basic induction variable discovery code. */ |
| enum iv_mode |
| { |
| UNKNOWN_INDUCT, |
| BASIC_INDUCT, |
| NOT_BASIC_INDUCT, |
| GENERAL_INDUCT |
| }; |
| |
| |
| /* A `struct iv' is created for every register. */ |
| |
| struct iv |
| { |
| enum iv_mode type; |
| union |
| { |
| struct iv_class *class; |
| struct induction *info; |
| } iv; |
| }; |
| |
| |
| #define REG_IV_TYPE(ivs, n) ivs->regs[n].type |
| #define REG_IV_INFO(ivs, n) ivs->regs[n].iv.info |
| #define REG_IV_CLASS(ivs, n) ivs->regs[n].iv.class |
| |
| |
| struct loop_ivs |
| { |
| /* Indexed by register number, contains pointer to `struct |
| iv' if register is an induction variable. */ |
| struct iv *regs; |
| |
| /* Size of regs array. */ |
| unsigned int n_regs; |
| |
| /* The head of a list which links together (via the next field) |
| every iv class for the current loop. */ |
| struct iv_class *list; |
| }; |
| |
| |
| typedef struct loop_mem_info |
| { |
| rtx mem; /* The MEM itself. */ |
| rtx reg; /* Corresponding pseudo, if any. */ |
| /* APPLE LOCAL optimization pragmas 3124235/3420242 */ |
| int optimizable; /* Nonzero if we can optimize access to this MEM. */ |
| } loop_mem_info; |
| |
| |
| |
| struct loop_reg |
| { |
| /* Number of times the reg is set during the loop being scanned. |
| During code motion, a negative value indicates a reg that has |
| been made a candidate; in particular -2 means that it is an |
| candidate that we know is equal to a constant and -1 means that |
| it is a candidate not known equal to a constant. After code |
| motion, regs moved have 0 (which is accurate now) while the |
| failed candidates have the original number of times set. |
| |
| Therefore, at all times, == 0 indicates an invariant register; |
| < 0 a conditionally invariant one. */ |
| int set_in_loop; |
| |
| /* Original value of set_in_loop; same except that this value |
| is not set negative for a reg whose sets have been made candidates |
| and not set to 0 for a reg that is moved. */ |
| int n_times_set; |
| |
| /* Contains the insn in which a register was used if it was used |
| exactly once; contains const0_rtx if it was used more than once. */ |
| rtx single_usage; |
| |
| /* Nonzero indicates that the register cannot be moved or strength |
| reduced. */ |
| char may_not_optimize; |
| |
| /* Nonzero means reg N has already been moved out of one loop. |
| This reduces the desire to move it out of another. */ |
| char moved_once; |
| }; |
| |
| |
| struct loop_regs |
| { |
| int num; /* Number of regs used in table. */ |
| int size; /* Size of table. */ |
| struct loop_reg *array; /* Register usage info. array. */ |
| int multiple_uses; /* Nonzero if a reg has multiple uses. */ |
| }; |
| |
| |
| |
| struct loop_movables |
| { |
| /* Head of movable chain. */ |
| struct movable *head; |
| /* Last movable in chain. */ |
| struct movable *last; |
| }; |
| |
| |
| /* Information pertaining to a loop. */ |
| |
| struct loop_info |
| { |
| /* Nonzero if there is a subroutine call in the current loop. */ |
| int has_call; |
| /* Nonzero if there is a libcall in the current loop. */ |
| int has_libcall; |
| /* Nonzero if there is a non constant call in the current loop. */ |
| int has_nonconst_call; |
| /* Nonzero if there is a prefetch instruction in the current loop. */ |
| int has_prefetch; |
| /* Nonzero if there is a volatile memory reference in the current |
| loop. */ |
| int has_volatile; |
| /* Nonzero if there is a tablejump in the current loop. */ |
| int has_tablejump; |
| /* Nonzero if there are ways to leave the loop other than falling |
| off the end. */ |
| int has_multiple_exit_targets; |
| /* Nonzero if there is an indirect jump in the current function. */ |
| int has_indirect_jump; |
| /* Register or constant initial loop value. */ |
| rtx initial_value; |
| /* Register or constant value used for comparison test. */ |
| rtx comparison_value; |
| /* Register or constant approximate final value. */ |
| rtx final_value; |
| /* Register or constant initial loop value with term common to |
| final_value removed. */ |
| rtx initial_equiv_value; |
| /* Register or constant final loop value with term common to |
| initial_value removed. */ |
| rtx final_equiv_value; |
| /* Register corresponding to iteration variable. */ |
| rtx iteration_var; |
| /* Constant loop increment. */ |
| rtx increment; |
| enum rtx_code comparison_code; |
| /* Holds the number of loop iterations. It is zero if the number |
| could not be calculated. Must be unsigned since the number of |
| iterations can be as high as 2^wordsize - 1. For loops with a |
| wider iterator, this number will be zero if the number of loop |
| iterations is too large for an unsigned integer to hold. */ |
| unsigned HOST_WIDE_INT n_iterations; |
| int used_count_register; |
| /* The loop iterator induction variable. */ |
| struct iv_class *iv; |
| /* List of MEMs that are stored in this loop. */ |
| rtx store_mems; |
| /* Array of MEMs that are used (read or written) in this loop, but |
| cannot be aliased by anything in this loop, except perhaps |
| themselves. In other words, if mems[i] is altered during |
| the loop, it is altered by an expression that is rtx_equal_p to |
| it. */ |
| loop_mem_info *mems; |
| /* The index of the next available slot in MEMS. */ |
| int mems_idx; |
| /* The number of elements allocated in MEMS. */ |
| int mems_allocated; |
| /* Nonzero if we don't know what MEMs were changed in the current |
| loop. This happens if the loop contains a call (in which case |
| `has_call' will also be set) or if we store into more than |
| NUM_STORES MEMs. */ |
| int unknown_address_altered; |
| /* The above doesn't count any readonly memory locations that are |
| stored. This does. */ |
| int unknown_constant_address_altered; |
| /* Count of memory write instructions discovered in the loop. */ |
| int num_mem_sets; |
| /* The insn where the first of these was found. */ |
| rtx first_loop_store_insn; |
| /* The chain of movable insns in loop. */ |
| struct loop_movables movables; |
| /* The registers used the in loop. */ |
| struct loop_regs regs; |
| /* The induction variable information in loop. */ |
| struct loop_ivs ivs; |
| /* Nonzero if call is in pre_header extended basic block. */ |
| int pre_header_has_call; |
| }; |
| |
| /* Not really meaningful values, but at least something. */ |
| #ifndef SIMULTANEOUS_PREFETCHES |
| #define SIMULTANEOUS_PREFETCHES 3 |
| #endif |
| #ifndef PREFETCH_BLOCK |
| #define PREFETCH_BLOCK 32 |
| #endif |
| #ifndef HAVE_prefetch |
| #define HAVE_prefetch 0 |
| #define CODE_FOR_prefetch 0 |
| #define gen_prefetch(a,b,c) (abort(), NULL_RTX) |
| #endif |
| |
| /* Give up the prefetch optimizations once we exceed a given threshold. |
| It is unlikely that we would be able to optimize something in a loop |
| with so many detected prefetches. */ |
| #define MAX_PREFETCHES 100 |
| /* The number of prefetch blocks that are beneficial to fetch at once before |
| a loop with a known (and low) iteration count. */ |
| #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6 |
| /* For very tiny loops it is not worthwhile to prefetch even before the loop, |
| since it is likely that the data are already in the cache. */ |
| #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2 |
| |
| /* Parameterize some prefetch heuristics so they can be turned on and off |
| easily for performance testing on new architectures. These can be |
| defined in target-dependent files. */ |
| |
| /* Prefetch is worthwhile only when loads/stores are dense. */ |
| #ifndef PREFETCH_ONLY_DENSE_MEM |
| #define PREFETCH_ONLY_DENSE_MEM 1 |
| #endif |
| |
| /* Define what we mean by "dense" loads and stores; This value divided by 256 |
| is the minimum percentage of memory references that worth prefetching. */ |
| #ifndef PREFETCH_DENSE_MEM |
| #define PREFETCH_DENSE_MEM 220 |
| #endif |
| |
| /* Do not prefetch for a loop whose iteration count is known to be low. */ |
| #ifndef PREFETCH_NO_LOW_LOOPCNT |
| #define PREFETCH_NO_LOW_LOOPCNT 1 |
| #endif |
| |
| /* Define what we mean by a "low" iteration count. */ |
| #ifndef PREFETCH_LOW_LOOPCNT |
| #define PREFETCH_LOW_LOOPCNT 32 |
| #endif |
| |
| /* Do not prefetch for a loop that contains a function call; such a loop is |
| probably not an internal loop. */ |
| #ifndef PREFETCH_NO_CALL |
| #define PREFETCH_NO_CALL 1 |
| #endif |
| |
| /* Do not prefetch accesses with an extreme stride. */ |
| #ifndef PREFETCH_NO_EXTREME_STRIDE |
| #define PREFETCH_NO_EXTREME_STRIDE 1 |
| #endif |
| |
| /* Define what we mean by an "extreme" stride. */ |
| #ifndef PREFETCH_EXTREME_STRIDE |
| #define PREFETCH_EXTREME_STRIDE 4096 |
| #endif |
| |
| /* Define a limit to how far apart indices can be and still be merged |
| into a single prefetch. */ |
| #ifndef PREFETCH_EXTREME_DIFFERENCE |
| #define PREFETCH_EXTREME_DIFFERENCE 4096 |
| #endif |
| |
| /* Issue prefetch instructions before the loop to fetch data to be used |
| in the first few loop iterations. */ |
| #ifndef PREFETCH_BEFORE_LOOP |
| #define PREFETCH_BEFORE_LOOP 1 |
| #endif |
| |
| /* Do not handle reversed order prefetches (negative stride). */ |
| #ifndef PREFETCH_NO_REVERSE_ORDER |
| #define PREFETCH_NO_REVERSE_ORDER 1 |
| #endif |
| |
| /* Prefetch even if the GIV is in conditional code. */ |
| #ifndef PREFETCH_CONDITIONAL |
| #define PREFETCH_CONDITIONAL 1 |
| #endif |
| |
| #define LOOP_REG_LIFETIME(LOOP, REGNO) \ |
| ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO))) |
| |
| #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \ |
| ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \ |
| || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start))) |
| |
| #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \ |
| ((REGNO) < FIRST_PSEUDO_REGISTER \ |
| ? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1) |
| |
| |
| /* Vector mapping INSN_UIDs to luids. |
| The luids are like uids but increase monotonically always. |
| We use them to see whether a jump comes from outside a given loop. */ |
| |
| static int *uid_luid; |
| |
| /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop |
| number the insn is contained in. */ |
| |
| static struct loop **uid_loop; |
| |
| /* 1 + largest uid of any insn. */ |
| |
| static int max_uid_for_loop; |
| |
| /* Number of loops detected in current function. Used as index to the |
| next few tables. */ |
| |
| static int max_loop_num; |
| |
| /* Bound on pseudo register number before loop optimization. |
| A pseudo has valid regscan info if its number is < max_reg_before_loop. */ |
| static unsigned int max_reg_before_loop; |
| |
| /* The value to pass to the next call of reg_scan_update. */ |
| static int loop_max_reg; |
| |
| /* During the analysis of a loop, a chain of `struct movable's |
| is made to record all the movable insns found. |
| Then the entire chain can be scanned to decide which to move. */ |
| |
| struct movable |
| { |
| rtx insn; /* A movable insn */ |
| rtx set_src; /* The expression this reg is set from. */ |
| rtx set_dest; /* The destination of this SET. */ |
| rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST |
| of any registers used within the LIBCALL. */ |
| int consec; /* Number of consecutive following insns |
| that must be moved with this one. */ |
| unsigned int regno; /* The register it sets */ |
| short lifetime; /* lifetime of that register; |
| may be adjusted when matching movables |
| that load the same value are found. */ |
| short savings; /* Number of insns we can move for this reg, |
| including other movables that force this |
| or match this one. */ |
| ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for |
| a low part that we should avoid changing when |
| clearing the rest of the reg. */ |
| unsigned int cond : 1; /* 1 if only conditionally movable */ |
| unsigned int force : 1; /* 1 means MUST move this insn */ |
| unsigned int global : 1; /* 1 means reg is live outside this loop */ |
| /* If PARTIAL is 1, GLOBAL means something different: |
| that the reg is live outside the range from where it is set |
| to the following label. */ |
| unsigned int done : 1; /* 1 inhibits further processing of this */ |
| |
| unsigned int partial : 1; /* 1 means this reg is used for zero-extending. |
| In particular, moving it does not make it |
| invariant. */ |
| unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to |
| load SRC, rather than copying INSN. */ |
| unsigned int move_insn_first:1;/* Same as above, if this is necessary for the |
| first insn of a consecutive sets group. */ |
| unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */ |
| unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace |
| the original insn with a copy from that |
| pseudo, rather than deleting it. */ |
| struct movable *match; /* First entry for same value */ |
| struct movable *forces; /* An insn that must be moved if this is */ |
| struct movable *next; |
| }; |
| |
| |
| static FILE *loop_dump_stream; |
| |
| /* Forward declarations. */ |
| |
| static void invalidate_loops_containing_label (rtx); |
| static void find_and_verify_loops (rtx, struct loops *); |
| static void mark_loop_jump (rtx, struct loop *); |
| static void prescan_loop (struct loop *); |
| static int reg_in_basic_block_p (rtx, rtx); |
| static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx); |
| static int labels_in_range_p (rtx, int); |
| static void count_one_set (struct loop_regs *, rtx, rtx, rtx *); |
| static void note_addr_stored (rtx, rtx, void *); |
| static void note_set_pseudo_multiple_uses (rtx, rtx, void *); |
| static int loop_reg_used_before_p (const struct loop *, rtx, rtx); |
| static rtx find_regs_nested (rtx, rtx); |
| static void scan_loop (struct loop*, int); |
| #if 0 |
| static void replace_call_address (rtx, rtx, rtx); |
| #endif |
| static rtx skip_consec_insns (rtx, int); |
| static int libcall_benefit (rtx); |
| static rtx libcall_other_reg (rtx, rtx); |
| static void record_excess_regs (rtx, rtx, rtx *); |
| static void ignore_some_movables (struct loop_movables *); |
| static void force_movables (struct loop_movables *); |
| static void combine_movables (struct loop_movables *, struct loop_regs *); |
| static int num_unmoved_movables (const struct loop *); |
| static int regs_match_p (rtx, rtx, struct loop_movables *); |
| static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *, |
| struct loop_regs *); |
| static void add_label_notes (rtx, rtx); |
| static void move_movables (struct loop *loop, struct loop_movables *, int, |
| int); |
| static void loop_movables_add (struct loop_movables *, struct movable *); |
| static void loop_movables_free (struct loop_movables *); |
| static int count_nonfixed_reads (const struct loop *, rtx); |
| static void loop_bivs_find (struct loop *); |
| static void loop_bivs_init_find (struct loop *); |
| static void loop_bivs_check (struct loop *); |
| static void loop_givs_find (struct loop *); |
| static void loop_givs_check (struct loop *); |
| static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int); |
| static int loop_giv_reduce_benefit (struct loop *, struct iv_class *, |
| struct induction *, rtx); |
| static void loop_givs_dead_check (struct loop *, struct iv_class *); |
| static void loop_givs_reduce (struct loop *, struct iv_class *); |
| static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *); |
| static void loop_ivs_free (struct loop *); |
| static void strength_reduce (struct loop *, int); |
| static void find_single_use_in_loop (struct loop_regs *, rtx, rtx); |
| static int valid_initial_value_p (rtx, rtx, int, rtx); |
| static void find_mem_givs (const struct loop *, rtx, rtx, int, int); |
| static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx, |
| rtx, rtx *, int, int); |
| static void check_final_value (const struct loop *, struct induction *); |
| static void loop_ivs_dump (const struct loop *, FILE *, int); |
| static void loop_iv_class_dump (const struct iv_class *, FILE *, int); |
| static void loop_biv_dump (const struct induction *, FILE *, int); |
| static void loop_giv_dump (const struct induction *, FILE *, int); |
| static void record_giv (const struct loop *, struct induction *, rtx, rtx, |
| rtx, rtx, rtx, rtx, int, enum g_types, int, int, |
| rtx *); |
| static void update_giv_derive (const struct loop *, rtx); |
| static HOST_WIDE_INT get_monotonic_increment (struct iv_class *); |
| static bool biased_biv_fits_mode_p (const struct loop *, struct iv_class *, |
| HOST_WIDE_INT, enum machine_mode, |
| unsigned HOST_WIDE_INT); |
| static bool biv_fits_mode_p (const struct loop *, struct iv_class *, |
| HOST_WIDE_INT, enum machine_mode, bool); |
| static bool extension_within_bounds_p (const struct loop *, struct iv_class *, |
| HOST_WIDE_INT, rtx); |
| static void check_ext_dependent_givs (const struct loop *, struct iv_class *); |
| static int basic_induction_var (const struct loop *, rtx, enum machine_mode, |
| rtx, rtx, rtx *, rtx *, rtx **); |
| static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *); |
| static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *, |
| rtx *, rtx *, int, int *, enum machine_mode); |
| static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *, |
| rtx *, rtx *, rtx *); |
| static int check_dbra_loop (struct loop *, int); |
| static rtx express_from_1 (rtx, rtx, rtx); |
| static rtx combine_givs_p (struct induction *, struct induction *); |
| static int cmp_combine_givs_stats (const void *, const void *); |
| static void combine_givs (struct loop_regs *, struct iv_class *); |
| static int product_cheap_p (rtx, rtx); |
| static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int, |
| int, int); |
| static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx, |
| struct iv_class *, int, basic_block, rtx); |
| static int last_use_this_basic_block (rtx, rtx); |
| static void record_initial (rtx, rtx, void *); |
| static void update_reg_last_use (rtx, rtx); |
| static rtx next_insn_in_loop (const struct loop *, rtx); |
| static void loop_regs_scan (const struct loop *, int); |
| static int count_insns_in_loop (const struct loop *); |
| static int find_mem_in_note_1 (rtx *, void *); |
| static rtx find_mem_in_note (rtx); |
| static void load_mems (const struct loop *); |
| static int insert_loop_mem (rtx *, void *); |
| static int replace_loop_mem (rtx *, void *); |
| static void replace_loop_mems (rtx, rtx, rtx, int); |
| static int replace_loop_reg (rtx *, void *); |
| static void replace_loop_regs (rtx insn, rtx, rtx); |
| static void note_reg_stored (rtx, rtx, void *); |
| static void try_copy_prop (const struct loop *, rtx, unsigned int); |
| static void try_swap_copy_prop (const struct loop *, rtx, unsigned int); |
| static rtx check_insn_for_givs (struct loop *, rtx, int, int); |
| static rtx check_insn_for_bivs (struct loop *, rtx, int, int); |
| static rtx gen_add_mult (rtx, rtx, rtx, rtx); |
| static void loop_regs_update (const struct loop *, rtx); |
| static int iv_add_mult_cost (rtx, rtx, rtx, rtx); |
| static int loop_invariant_p (const struct loop *, rtx); |
| static rtx loop_insn_hoist (const struct loop *, rtx); |
| static void loop_iv_add_mult_emit_before (const struct loop *, rtx, rtx, rtx, |
| rtx, basic_block, rtx); |
| static rtx loop_insn_emit_before (const struct loop *, basic_block, |
| rtx, rtx); |
| static int loop_insn_first_p (rtx, rtx); |
| static rtx get_condition_for_loop (const struct loop *, rtx); |
| static void loop_iv_add_mult_sink (const struct loop *, rtx, rtx, rtx, rtx); |
| static void loop_iv_add_mult_hoist (const struct loop *, rtx, rtx, rtx, rtx); |
| static rtx extend_value_for_giv (struct induction *, rtx); |
| static rtx loop_insn_sink (const struct loop *, rtx); |
| |
| static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx); |
| static rtx loop_call_insn_emit_before (const struct loop *, basic_block, |
| rtx, rtx); |
| static rtx loop_call_insn_hoist (const struct loop *, rtx); |
| static rtx loop_insn_sink_or_swim (const struct loop *, rtx); |
| |
| static void loop_dump_aux (const struct loop *, FILE *, int); |
| static void loop_delete_insns (rtx, rtx); |
| static HOST_WIDE_INT remove_constant_addition (rtx *); |
| static rtx gen_load_of_final_value (rtx, rtx); |
| void debug_ivs (const struct loop *); |
| void debug_iv_class (const struct iv_class *); |
| void debug_biv (const struct induction *); |
| void debug_giv (const struct induction *); |
| void debug_loop (const struct loop *); |
| void debug_loops (const struct loops *); |
| |
| typedef struct loop_replace_args |
| { |
| rtx match; |
| rtx replacement; |
| rtx insn; |
| } loop_replace_args; |
| |
| /* Nonzero iff INSN is between START and END, inclusive. */ |
| #define INSN_IN_RANGE_P(INSN, START, END) \ |
| (INSN_UID (INSN) < max_uid_for_loop \ |
| && INSN_LUID (INSN) >= INSN_LUID (START) \ |
| && INSN_LUID (INSN) <= INSN_LUID (END)) |
| |
| /* Indirect_jump_in_function is computed once per function. */ |
| static int indirect_jump_in_function; |
| static int indirect_jump_in_function_p (rtx); |
| |
| static int compute_luids (rtx, rtx, int); |
| |
| static int biv_elimination_giv_has_0_offset (struct induction *, |
| struct induction *, rtx); |
| |
| /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to |
| copy the value of the strength reduced giv to its original register. */ |
| static int copy_cost; |
| |
| /* Cost of using a register, to normalize the benefits of a giv. */ |
| static int reg_address_cost; |
| |
| void |
| init_loop (void) |
| { |
| rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); |
| |
| reg_address_cost = address_cost (reg, SImode); |
| |
| copy_cost = COSTS_N_INSNS (1); |
| } |
| |
| /* Compute the mapping from uids to luids. |
| LUIDs are numbers assigned to insns, like uids, |
| except that luids increase monotonically through the code. |
| Start at insn START and stop just before END. Assign LUIDs |
| starting with PREV_LUID + 1. Return the last assigned LUID + 1. */ |
| static int |
| compute_luids (rtx start, rtx end, int prev_luid) |
| { |
| int i; |
| rtx insn; |
| |
| for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn)) |
| { |
| if (INSN_UID (insn) >= max_uid_for_loop) |
| continue; |
| /* Don't assign luids to line-number NOTEs, so that the distance in |
| luids between two insns is not affected by -g. */ |
| if (!NOTE_P (insn) |
| || NOTE_LINE_NUMBER (insn) <= 0) |
| uid_luid[INSN_UID (insn)] = ++i; |
| else |
| /* Give a line number note the same luid as preceding insn. */ |
| uid_luid[INSN_UID (insn)] = i; |
| } |
| return i + 1; |
| } |
| |
| /* Entry point of this file. Perform loop optimization |
| on the current function. F is the first insn of the function |
| and DUMPFILE is a stream for output of a trace of actions taken |
| (or 0 if none should be output). */ |
| |
| void |
| loop_optimize (rtx f, FILE *dumpfile, int flags) |
| { |
| rtx insn; |
| int i; |
| struct loops loops_data; |
| struct loops *loops = &loops_data; |
| struct loop_info *loops_info; |
| |
| loop_dump_stream = dumpfile; |
| |
| init_recog_no_volatile (); |
| |
| max_reg_before_loop = max_reg_num (); |
| loop_max_reg = max_reg_before_loop; |
| |
| regs_may_share = 0; |
| |
| /* Count the number of loops. */ |
| |
| max_loop_num = 0; |
| for (insn = f; insn; insn = NEXT_INSN (insn)) |
| { |
| if (NOTE_P (insn) |
| && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG) |
| max_loop_num++; |
| } |
| |
| /* Don't waste time if no loops. */ |
| if (max_loop_num == 0) |
| return; |
| |
| loops->num = max_loop_num; |
| |
| /* Get size to use for tables indexed by uids. |
| Leave some space for labels allocated by find_and_verify_loops. */ |
| max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32; |
| |
| uid_luid = xcalloc (max_uid_for_loop, sizeof (int)); |
| uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *)); |
| |
| /* Allocate storage for array of loops. */ |
| loops->array = xcalloc (loops->num, sizeof (struct loop)); |
| |
| /* Find and process each loop. |
| First, find them, and record them in order of their beginnings. */ |
| find_and_verify_loops (f, loops); |
| |
| /* Allocate and initialize auxiliary loop information. */ |
| loops_info = xcalloc (loops->num, sizeof (struct loop_info)); |
| for (i = 0; i < (int) loops->num; i++) |
| loops->array[i].aux = loops_info + i; |
| |
| /* Now find all register lifetimes. This must be done after |
| find_and_verify_loops, because it might reorder the insns in the |
| function. */ |
| reg_scan (f, max_reg_before_loop); |
| |
| /* This must occur after reg_scan so that registers created by gcse |
| will have entries in the register tables. |
| |
| We could have added a call to reg_scan after gcse_main in toplev.c, |
| but moving this call to init_alias_analysis is more efficient. */ |
| init_alias_analysis (); |
| |
| /* See if we went too far. Note that get_max_uid already returns |
| one more that the maximum uid of all insn. */ |
| if (get_max_uid () > max_uid_for_loop) |
| abort (); |
| /* Now reset it to the actual size we need. See above. */ |
| max_uid_for_loop = get_max_uid (); |
| |
| /* find_and_verify_loops has already called compute_luids, but it |
| might have rearranged code afterwards, so we need to recompute |
| the luids now. */ |
| compute_luids (f, NULL_RTX, 0); |
| |
| /* Don't leave gaps in uid_luid for insns that have been |
| deleted. It is possible that the first or last insn |
| using some register has been deleted by cross-jumping. |
| Make sure that uid_luid for that former insn's uid |
| points to the general area where that insn used to be. */ |
| for (i = 0; i < max_uid_for_loop; i++) |
| { |
| uid_luid[0] = uid_luid[i]; |
| if (uid_luid[0] != 0) |
| break; |
| } |
| for (i = 0; i < max_uid_for_loop; i++) |
| if (uid_luid[i] == 0) |
| uid_luid[i] = uid_luid[i - 1]; |
| |
| /* Determine if the function has indirect jump. On some systems |
| this prevents low overhead loop instructions from being used. */ |
| indirect_jump_in_function = indirect_jump_in_function_p (f); |
| |
| /* Now scan the loops, last ones first, since this means inner ones are done |
| before outer ones. */ |
| for (i = max_loop_num - 1; i >= 0; i--) |
| { |
| struct loop *loop = &loops->array[i]; |
| |
| if (! loop->invalid && loop->end) |
| { |
| scan_loop (loop, flags); |
| ggc_collect (); |
| } |
| } |
| |
| end_alias_analysis (); |
| |
| /* Clean up. */ |
| for (i = 0; i < (int) loops->num; i++) |
| free (loops_info[i].mems); |
| |
| free (uid_luid); |
| free (uid_loop); |
| free (loops_info); |
| free (loops->array); |
| } |
| |
| /* Returns the next insn, in execution order, after INSN. START and |
| END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop, |
| respectively. LOOP->TOP, if non-NULL, is the top of the loop in the |
| insn-stream; it is used with loops that are entered near the |
| bottom. */ |
| |
| static rtx |
| next_insn_in_loop (const struct loop *loop, rtx insn) |
| { |
| insn = NEXT_INSN (insn); |
| |
| if (insn == loop->end) |
| { |
| if (loop->top) |
| /* Go to the top of the loop, and continue there. */ |
| insn = loop->top; |
| else |
| /* We're done. */ |
| insn = NULL_RTX; |
| } |
| |
| if (insn == loop->scan_start) |
| /* We're done. */ |
| insn = NULL_RTX; |
| |
| return insn; |
| } |
| |
| /* Find any register references hidden inside X and add them to |
| the dependency list DEPS. This is used to look inside CLOBBER (MEM |
| when checking whether a PARALLEL can be pulled out of a loop. */ |
| |
| static rtx |
| find_regs_nested (rtx deps, rtx x) |
| { |
| enum rtx_code code = GET_CODE (x); |
| if (code == REG) |
| deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps); |
| else |
| { |
| const char *fmt = GET_RTX_FORMAT (code); |
| int i, j; |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'e') |
| deps = find_regs_nested (deps, XEXP (x, i)); |
| else if (fmt[i] == 'E') |
| for (j = 0; j < XVECLEN (x, i); j++) |
| deps = find_regs_nested (deps, XVECEXP (x, i, j)); |
| } |
| } |
| return deps; |
| } |
| |
| /* Optimize one loop described by LOOP. */ |
| |
| /* ??? Could also move memory writes out of loops if the destination address |
| is invariant, the source is invariant, the memory write is not volatile, |
| and if we can prove that no read inside the loop can read this address |
| before the write occurs. If there is a read of this address after the |
| write, then we can also mark the memory read as invariant. */ |
| |
| static void |
| scan_loop (struct loop *loop, int flags) |
| { |
| struct loop_info *loop_info = LOOP_INFO (loop); |
| struct loop_regs *regs = LOOP_REGS (loop); |
| int i; |
| rtx loop_start = loop->start; |
| rtx loop_end = loop->end; |
| rtx p; |
| /* 1 if we are scanning insns that could be executed zero times. */ |
| int maybe_never = 0; |
| /* 1 if we are scanning insns that might never be executed |
| due to a subroutine call which might exit before they are reached. */ |
| int call_passed = 0; |
| /* Number of insns in the loop. */ |
| int insn_count; |
| int tem; |
| rtx temp, update_start, update_end; |
| /* The SET from an insn, if it is the only SET in the insn. */ |
| rtx set, set1; |
| /* Chain describing insns movable in current loop. */ |
| struct loop_movables *movables = LOOP_MOVABLES (loop); |
| /* Ratio of extra register life span we can justify |
| for saving an instruction. More if loop doesn't call subroutines |
| since in that case saving an insn makes more difference |
| and more registers are available. */ |
| int threshold; |
| int in_libcall; |
| |
| loop->top = 0; |
| |
| movables->head = 0; |
| movables->last = 0; |
| |
| /* Determine whether this loop starts with a jump down to a test at |
| the end. This will occur for a small number of loops with a test |
| that is too complex to duplicate in front of the loop. |
| |
| We search for the first insn or label in the loop, skipping NOTEs. |
| However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG |
| (because we might have a loop executed only once that contains a |
| loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END |
| (in case we have a degenerate loop). |
| |
| Note that if we mistakenly think that a loop is entered at the top |
| when, in fact, it is entered at the exit test, the only effect will be |
| slightly poorer optimization. Making the opposite error can generate |
| incorrect code. Since very few loops now start with a jump to the |
| exit test, the code here to detect that case is very conservative. */ |
| |
| for (p = NEXT_INSN (loop_start); |
| p != loop_end |
| && !LABEL_P (p) && ! INSN_P (p) |
| && (!NOTE_P (p) |
| || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG |
| && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END)); |
| p = NEXT_INSN (p)) |
| ; |
| |
| loop->scan_start = p; |
| |
| /* If loop end is the end of the current function, then emit a |
| NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy |
| note insn. This is the position we use when sinking insns out of |
| the loop. */ |
| if (NEXT_INSN (loop->end) != 0) |
| loop->sink = NEXT_INSN (loop->end); |
| else |
| loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end); |
| |
| /* Set up variables describing this loop. */ |
| prescan_loop (loop); |
| threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs); |
| |
| /* If loop has a jump before the first label, |
| the true entry is the target of that jump. |
| Start scan from there. |
| But record in LOOP->TOP the place where the end-test jumps |
| back to so we can scan that after the end of the loop. */ |
| if (JUMP_P (p) |
| /* Loop entry must be unconditional jump (and not a RETURN) */ |
| && any_uncondjump_p (p) |
| && JUMP_LABEL (p) != 0 |
| /* Check to see whether the jump actually |
| jumps out of the loop (meaning it's no loop). |
| This case can happen for things like |
| do {..} while (0). If this label was generated previously |
| by loop, we can't tell anything about it and have to reject |
| the loop. */ |
| && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end)) |
| { |
| /* APPLE LOCAL begin 4130216 */ |
| rtx end_test = prev_real_insn (loop_end); |
| loop->top = next_label (loop->scan_start); |
| loop->scan_start = JUMP_LABEL (p); |
| /* Make sure that loop->top is, in fact, the target of the end-test. |
| If it is not the logic for giv's will not work. The tree optimizers |
| can produce such loops. */ |
| if ((any_condjump_p (end_test) || any_uncondjump_p (end_test)) |
| && JUMP_LABEL (end_test) != loop->top) |
| { |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, |
| "\nLoop from %d to %d is too complex.\n\n", |
| INSN_UID (loop_start), INSN_UID (loop_end)); |
| return; |
| } |
| /* APPLE LOCAL end 4130216 */ |
| } |
| |
| /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid |
| as required by loop_reg_used_before_p. So skip such loops. (This |
| test may never be true, but it's best to play it safe.) |
| |
| Also, skip loops where we do not start scanning at a label. This |
| test also rejects loops starting with a JUMP_INSN that failed the |
| test above. */ |
| |
| if (INSN_UID (loop->scan_start) >= max_uid_for_loop |
| || !LABEL_P (loop->scan_start)) |
| { |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n", |
| INSN_UID (loop_start), INSN_UID (loop_end)); |
| return; |
| } |
| |
| /* Allocate extra space for REGs that might be created by load_mems. |
| We allocate a little extra slop as well, in the hopes that we |
| won't have to reallocate the regs array. */ |
| loop_regs_scan (loop, loop_info->mems_idx + 16); |
| insn_count = count_insns_in_loop (loop); |
| |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n", |
| INSN_UID (loop_start), INSN_UID (loop_end), insn_count); |
| |
| /* Scan through the loop finding insns that are safe to move. |
| Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that |
| this reg will be considered invariant for subsequent insns. |
| We consider whether subsequent insns use the reg |
| in deciding whether it is worth actually moving. |
| |
| MAYBE_NEVER is nonzero if we have passed a conditional jump insn |
| and therefore it is possible that the insns we are scanning |
| would never be executed. At such times, we must make sure |
| that it is safe to execute the insn once instead of zero times. |
| When MAYBE_NEVER is 0, all insns will be executed at least once |
| so that is not a problem. */ |
| |
| for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start); |
| p != NULL_RTX; |
| p = next_insn_in_loop (loop, p)) |
| { |
| if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX)) |
| in_libcall--; |
| if (NONJUMP_INSN_P (p)) |
| { |
| /* Do not scan past an optimization barrier. */ |
| if (GET_CODE (PATTERN (p)) == ASM_INPUT) |
| break; |
| temp = find_reg_note (p, REG_LIBCALL, NULL_RTX); |
| if (temp) |
| in_libcall++; |
| if (! in_libcall |
| && (set = single_set (p)) |
| && REG_P (SET_DEST (set)) |
| #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED |
| && SET_DEST (set) != pic_offset_table_rtx |
| #endif |
| && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize) |
| { |
| int tem1 = 0; |
| int tem2 = 0; |
| int move_insn = 0; |
| int insert_temp = 0; |
| rtx src = SET_SRC (set); |
| rtx dependencies = 0; |
| |
| /* Figure out what to use as a source of this insn. If a |
| REG_EQUIV note is given or if a REG_EQUAL note with a |
| constant operand is specified, use it as the source and |
| mark that we should move this insn by calling |
| emit_move_insn rather that duplicating the insn. |
| |
| Otherwise, only use the REG_EQUAL contents if a REG_RETVAL |
| note is present. */ |
| temp = find_reg_note (p, REG_EQUIV, NULL_RTX); |
| if (temp) |
| src = XEXP (temp, 0), move_insn = 1; |
| else |
| { |
| temp = find_reg_note (p, REG_EQUAL, NULL_RTX); |
| if (temp && CONSTANT_P (XEXP (temp, 0))) |
| src = XEXP (temp, 0), move_insn = 1; |
| if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX)) |
| { |
| src = XEXP (temp, 0); |
| /* A libcall block can use regs that don't appear in |
| the equivalent expression. To move the libcall, |
| we must move those regs too. */ |
| dependencies = libcall_other_reg (p, src); |
| } |
| } |
| |
| /* For parallels, add any possible uses to the dependencies, as |
| we can't move the insn without resolving them first. |
| MEMs inside CLOBBERs may also reference registers; these |
| count as implicit uses. */ |
| if (GET_CODE (PATTERN (p)) == PARALLEL) |
| { |
| for (i = 0; i < XVECLEN (PATTERN (p), 0); i++) |
| { |
| rtx x = XVECEXP (PATTERN (p), 0, i); |
| if (GET_CODE (x) == USE) |
| dependencies |
| = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0), |
| dependencies); |
| else if (GET_CODE (x) == CLOBBER |
| && MEM_P (XEXP (x, 0))) |
| dependencies = find_regs_nested (dependencies, |
| XEXP (XEXP (x, 0), 0)); |
| } |
| } |
| |
| if (/* The register is used in basic blocks other |
| than the one where it is set (meaning that |
| something after this point in the loop might |
| depend on its value before the set). */ |
| ! reg_in_basic_block_p (p, SET_DEST (set)) |
| /* And the set is not guaranteed to be executed once |
| the loop starts, or the value before the set is |
| needed before the set occurs... |
| |
| ??? Note we have quadratic behavior here, mitigated |
| by the fact that the previous test will often fail for |
| large loops. Rather than re-scanning the entire loop |
| each time for register usage, we should build tables |
| of the register usage and use them here instead. */ |
| && (maybe_never |
| || loop_reg_used_before_p (loop, set, p))) |
| /* It is unsafe to move the set. However, it may be OK to |
| move the source into a new pseudo, and substitute a |
| reg-to-reg copy for the original insn. |
| |
| This code used to consider it OK to move a set of a variable |
| which was not created by the user and not used in an exit |
| test. |
| That behavior is incorrect and was removed. */ |
| insert_temp = 1; |
| |
| /* Don't try to optimize a MODE_CC set with a constant |
| source. It probably will be combined with a conditional |
| jump. */ |
| if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC |
| && CONSTANT_P (src)) |
| ; |
| /* Don't try to optimize a register that was made |
| by loop-optimization for an inner loop. |
| We don't know its life-span, so we can't compute |
| the benefit. */ |
| else if (REGNO (SET_DEST (set)) >= max_reg_before_loop) |
| ; |
| /* Don't move the source and add a reg-to-reg copy: |
| - with -Os (this certainly increases size), |
| - if the mode doesn't support copy operations (obviously), |
| - if the source is already a reg (the motion will gain nothing), |
| - if the source is a legitimate constant (likewise). */ |
| else if (insert_temp |
| && (optimize_size |
| || ! can_copy_p (GET_MODE (SET_SRC (set))) |
| || REG_P (SET_SRC (set)) |
| || (CONSTANT_P (SET_SRC (set)) |
| && LEGITIMATE_CONSTANT_P (SET_SRC (set))))) |
| ; |
| else if ((tem = loop_invariant_p (loop, src)) |
| && (dependencies == 0 |
| || (tem2 |
| = loop_invariant_p (loop, dependencies)) != 0) |
| && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1 |
| || (tem1 |
| = consec_sets_invariant_p |
| (loop, SET_DEST (set), |
| regs->array[REGNO (SET_DEST (set))].set_in_loop, |
| p))) |
| /* If the insn can cause a trap (such as divide by zero), |
| can't move it unless it's guaranteed to be executed |
| once loop is entered. Even a function call might |
| prevent the trap insn from being reached |
| (since it might exit!) */ |
| && ! ((maybe_never || call_passed) |
| && may_trap_p (src))) |
| { |
| struct movable *m; |
| int regno = REGNO (SET_DEST (set)); |
| |
| /* A potential lossage is where we have a case where two insns |
| can be combined as long as they are both in the loop, but |
| we move one of them outside the loop. For large loops, |
| this can lose. The most common case of this is the address |
| of a function being called. |
| |
| Therefore, if this register is marked as being used |
| exactly once if we are in a loop with calls |
| (a "large loop"), see if we can replace the usage of |
| this register with the source of this SET. If we can, |
| delete this insn. |
| |
| Don't do this if P has a REG_RETVAL note or if we have |
| SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */ |
| |
| if (loop_info->has_call |
| && regs->array[regno].single_usage != 0 |
| && regs->array[regno].single_usage != const0_rtx |
| && REGNO_FIRST_UID (regno) == INSN_UID (p) |
| && (REGNO_LAST_UID (regno) |
| == INSN_UID (regs->array[regno].single_usage)) |
| && regs->array[regno].set_in_loop == 1 |
| && GET_CODE (SET_SRC (set)) != ASM_OPERANDS |
| && ! side_effects_p (SET_SRC (set)) |
| && ! find_reg_note (p, REG_RETVAL, NULL_RTX) |
| && (! SMALL_REGISTER_CLASSES |
| || (! (REG_P (SET_SRC (set)) |
| && (REGNO (SET_SRC (set)) |
| < FIRST_PSEUDO_REGISTER)))) |
| && regno >= FIRST_PSEUDO_REGISTER |
| /* This test is not redundant; SET_SRC (set) might be |
| a call-clobbered register and the life of REGNO |
| might span a call. */ |
| && ! modified_between_p (SET_SRC (set), p, |
| regs->array[regno].single_usage) |
| && no_labels_between_p (p, |
| regs->array[regno].single_usage) |
| && validate_replace_rtx (SET_DEST (set), SET_SRC (set), |
| regs->array[regno].single_usage)) |
| { |
| /* Replace any usage in a REG_EQUAL note. Must copy |
| the new source, so that we don't get rtx sharing |
| between the SET_SOURCE and REG_NOTES of insn p. */ |
| REG_NOTES (regs->array[regno].single_usage) |
| = (replace_rtx |
| (REG_NOTES (regs->array[regno].single_usage), |
| SET_DEST (set), copy_rtx (SET_SRC (set)))); |
| |
| delete_insn (p); |
| for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); |
| i++) |
| regs->array[regno+i].set_in_loop = 0; |
| continue; |
| } |
| |
| m = xmalloc (sizeof (struct movable)); |
| m->next = 0; |
| m->insn = p; |
| m->set_src = src; |
| m->dependencies = dependencies; |
| m->set_dest = SET_DEST (set); |
| m->force = 0; |
| m->consec |
| = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1; |
| m->done = 0; |
| m->forces = 0; |
| m->partial = 0; |
| m->move_insn = move_insn; |
| m->move_insn_first = 0; |
| m->insert_temp = insert_temp; |
| m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0); |
| m->savemode = VOIDmode; |
| m->regno = regno; |
| /* Set M->cond if either loop_invariant_p |
| or consec_sets_invariant_p returned 2 |
| (only conditionally invariant). */ |
| m->cond = ((tem | tem1 | tem2) > 1); |
| m->global = LOOP_REG_GLOBAL_P (loop, regno); |
| m->match = 0; |
| m->lifetime = LOOP_REG_LIFETIME (loop, regno); |
| m->savings = regs->array[regno].n_times_set; |
| if (find_reg_note (p, REG_RETVAL, NULL_RTX)) |
| m->savings += libcall_benefit (p); |
| for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++) |
| regs->array[regno+i].set_in_loop = move_insn ? -2 : -1; |
| /* Add M to the end of the chain MOVABLES. */ |
| loop_movables_add (movables, m); |
| |
| if (m->consec > 0) |
| { |
| /* It is possible for the first instruction to have a |
| REG_EQUAL note but a non-invariant SET_SRC, so we must |
| remember the status of the first instruction in case |
| the last instruction doesn't have a REG_EQUAL note. */ |
| m->move_insn_first = m->move_insn; |
| |
| /* Skip this insn, not checking REG_LIBCALL notes. */ |
| p = next_nonnote_insn (p); |
| /* Skip the consecutive insns, if there are any. */ |
| p = skip_consec_insns (p, m->consec); |
| /* Back up to the last insn of the consecutive group. */ |
| p = prev_nonnote_insn (p); |
| |
| /* We must now reset m->move_insn, m->is_equiv, and |
| possibly m->set_src to correspond to the effects of |
| all the insns. */ |
| temp = find_reg_note (p, REG_EQUIV, NULL_RTX); |
| if (temp) |
| m->set_src = XEXP (temp, 0), m->move_insn = 1; |
| else |
| { |
| temp = find_reg_note (p, REG_EQUAL, NULL_RTX); |
| if (temp && CONSTANT_P (XEXP (temp, 0))) |
| m->set_src = XEXP (temp, 0), m->move_insn = 1; |
| else |
| m->move_insn = 0; |
| |
| } |
| m->is_equiv |
| = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0); |
| } |
| } |
| /* If this register is always set within a STRICT_LOW_PART |
| or set to zero, then its high bytes are constant. |
| So clear them outside the loop and within the loop |
| just load the low bytes. |
| We must check that the machine has an instruction to do so. |
| Also, if the value loaded into the register |
| depends on the same register, this cannot be done. */ |
| else if (SET_SRC (set) == const0_rtx |
| && NONJUMP_INSN_P (NEXT_INSN (p)) |
| && (set1 = single_set (NEXT_INSN (p))) |
| && GET_CODE (set1) == SET |
| && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART) |
| && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG) |
| && (SUBREG_REG (XEXP (SET_DEST (set1), 0)) |
| == SET_DEST (set)) |
| && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1))) |
| { |
| int regno = REGNO (SET_DEST (set)); |
| if (regs->array[regno].set_in_loop == 2) |
| { |
| struct movable *m; |
| m = xmalloc (sizeof (struct movable)); |
| m->next = 0; |
| m->insn = p; |
| m->set_dest = SET_DEST (set); |
| m->dependencies = 0; |
| m->force = 0; |
| m->consec = 0; |
| m->done = 0; |
| m->forces = 0; |
| m->move_insn = 0; |
| m->move_insn_first = 0; |
| m->insert_temp = insert_temp; |
| m->partial = 1; |
| /* If the insn may not be executed on some cycles, |
| we can't clear the whole reg; clear just high part. |
| Not even if the reg is used only within this loop. |
| Consider this: |
| while (1) |
| while (s != t) { |
| if (foo ()) x = *s; |
| use (x); |
| } |
| Clearing x before the inner loop could clobber a value |
| being saved from the last time around the outer loop. |
| However, if the reg is not used outside this loop |
| and all uses of the register are in the same |
| basic block as the store, there is no problem. |
| |
| If this insn was made by loop, we don't know its |
| INSN_LUID and hence must make a conservative |
| assumption. */ |
| m->global = (INSN_UID (p) >= max_uid_for_loop |
| || LOOP_REG_GLOBAL_P (loop, regno) |
| || (labels_in_range_p |
| (p, REGNO_FIRST_LUID (regno)))); |
| if (maybe_never && m->global) |
| m->savemode = GET_MODE (SET_SRC (set1)); |
| else |
| m->savemode = VOIDmode; |
| m->regno = regno; |
| m->cond = 0; |
| m->match = 0; |
| m->lifetime = LOOP_REG_LIFETIME (loop, regno); |
| m->savings = 1; |
| for (i = 0; |
| i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); |
| i++) |
| regs->array[regno+i].set_in_loop = -1; |
| /* Add M to the end of the chain MOVABLES. */ |
| loop_movables_add (movables, m); |
| } |
| } |
| } |
| } |
| /* Past a call insn, we get to insns which might not be executed |
| because the call might exit. This matters for insns that trap. |
| Constant and pure call insns always return, so they don't count. */ |
| else if (CALL_P (p) && ! CONST_OR_PURE_CALL_P (p)) |
| call_passed = 1; |
| /* Past a label or a jump, we get to insns for which we |
| can't count on whether or how many times they will be |
| executed during each iteration. Therefore, we can |
| only move out sets of trivial variables |
| (those not used after the loop). */ |
| /* Similar code appears twice in strength_reduce. */ |
| else if ((LABEL_P (p) || JUMP_P (p)) |
| /* If we enter the loop in the middle, and scan around to the |
| beginning, don't set maybe_never for that. This must be an |
| unconditional jump, otherwise the code at the top of the |
| loop might never be executed. Unconditional jumps are |
| followed by a barrier then the loop_end. */ |
| && ! (JUMP_P (p) && JUMP_LABEL (p) == loop->top |
| && NEXT_INSN (NEXT_INSN (p)) == loop_end |
| && any_uncondjump_p (p))) |
| maybe_never = 1; |
| } |
| |
| /* If one movable subsumes another, ignore that other. */ |
| |
| ignore_some_movables (movables); |
| |
| /* For each movable insn, see if the reg that it loads |
| leads when it dies right into another conditionally movable insn. |
| If so, record that the second insn "forces" the first one, |
| since the second can be moved only if the first is. */ |
| |
| force_movables (movables); |
| |
| /* See if there are multiple movable insns that load the same value. |
| If there are, make all but the first point at the first one |
| through the `match' field, and add the priorities of them |
| all together as the priority of the first. */ |
| |
| combine_movables (movables, regs); |
| |
| /* Now consider each movable insn to decide whether it is worth moving. |
| Store 0 in regs->array[I].set_in_loop for each reg I that is moved. |
| |
| For machines with few registers this increases code size, so do not |
| move moveables when optimizing for code size on such machines. |
| (The 18 below is the value for i386.) */ |
| |
| if (!optimize_size |
| || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call)) |
| { |
| move_movables (loop, movables, threshold, insn_count); |
| |
| /* Recalculate regs->array if move_movables has created new |
| registers. */ |
| if (max_reg_num () > regs->num) |
| { |
| loop_regs_scan (loop, 0); |
| for (update_start = loop_start; |
| PREV_INSN (update_start) |
| && !LABEL_P (PREV_INSN (update_start)); |
| update_start = PREV_INSN (update_start)) |
| ; |
| update_end = NEXT_INSN (loop_end); |
| |
| reg_scan_update (update_start, update_end, loop_max_reg); |
| loop_max_reg = max_reg_num (); |
| } |
| } |
| |
| /* Now candidates that still are negative are those not moved. |
| Change regs->array[I].set_in_loop to indicate that those are not actually |
| invariant. */ |
| for (i = 0; i < regs->num; i++) |
| if (regs->array[i].set_in_loop < 0) |
| regs->array[i].set_in_loop = regs->array[i].n_times_set; |
| |
| /* Now that we've moved some things out of the loop, we might be able to |
| hoist even more memory references. */ |
| load_mems (loop); |
| |
| /* Recalculate regs->array if load_mems has created new registers. */ |
| if (max_reg_num () > regs->num) |
| loop_regs_scan (loop, 0); |
| |
| for (update_start = loop_start; |
| PREV_INSN (update_start) |
| && !LABEL_P (PREV_INSN (update_start)); |
| update_start = PREV_INSN (update_start)) |
| ; |
| update_end = NEXT_INSN (loop_end); |
| |
| reg_scan_update (update_start, update_end, loop_max_reg); |
| loop_max_reg = max_reg_num (); |
| |
| if (flag_strength_reduce) |
| { |
| if (update_end && LABEL_P (update_end)) |
| /* Ensure our label doesn't go away. */ |
| LABEL_NUSES (update_end)++; |
| |
| strength_reduce (loop, flags); |
| |
| reg_scan_update (update_start, update_end, loop_max_reg); |
| loop_max_reg = max_reg_num (); |
| |
| if (update_end && LABEL_P (update_end) |
| && --LABEL_NUSES (update_end) == 0) |
| delete_related_insns (update_end); |
| } |
| |
| |
| /* The movable information is required for strength reduction. */ |
| loop_movables_free (movables); |
| |
| free (regs->array); |
| regs->array = 0; |
| regs->num = 0; |
| } |
| |
| /* Add elements to *OUTPUT to record all the pseudo-regs |
| mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */ |
| |
| static void |
| record_excess_regs (rtx in_this, rtx not_in_this, rtx *output) |
| { |
| enum rtx_code code; |
| const char *fmt; |
| int i; |
| |
| code = GET_CODE (in_this); |
| |
| switch (code) |
| { |
| case PC: |
| case CC0: |
| case CONST_INT: |
| case CONST_DOUBLE: |
| case CONST: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return; |
| |
| case REG: |
| if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER |
| && ! reg_mentioned_p (in_this, not_in_this)) |
| *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output); |
| return; |
| |
| default: |
| break; |
| } |
| |
| fmt = GET_RTX_FORMAT (code); |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| int j; |
| |
| switch (fmt[i]) |
| { |
| case 'E': |
| for (j = 0; j < XVECLEN (in_this, i); j++) |
| record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output); |
| break; |
| |
| case 'e': |
| record_excess_regs (XEXP (in_this, i), not_in_this, output); |
| break; |
| } |
| } |
| } |
| |
| /* Check what regs are referred to in the libcall block ending with INSN, |
| aside from those mentioned in the equivalent value. |
| If there are none, return 0. |
| If there are one or more, return an EXPR_LIST containing all of them. */ |
| |
| static rtx |
| libcall_other_reg (rtx insn, rtx equiv) |
| { |
| rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX); |
| rtx p = XEXP (note, 0); |
| rtx output = 0; |
| |
| /* First, find all the regs used in the libcall block |
| that are not mentioned as inputs to the result. */ |
| |
| while (p != insn) |
| { |
| if (INSN_P (p)) |
| record_excess_regs (PATTERN (p), equiv, &output); |
| p = NEXT_INSN (p); |
| } |
| |
| return output; |
| } |
| |
| /* Return 1 if all uses of REG |
| are between INSN and the end of the basic block. */ |
| |
| static int |
| reg_in_basic_block_p (rtx insn, rtx reg) |
| { |
| int regno = REGNO (reg); |
| rtx p; |
| |
| if (REGNO_FIRST_UID (regno) != INSN_UID (insn)) |
| return 0; |
| |
| /* Search this basic block for the already recorded last use of the reg. */ |
| for (p = insn; p; p = NEXT_INSN (p)) |
| { |
| switch (GET_CODE (p)) |
| { |
| case NOTE: |
| break; |
| |
| case INSN: |
| case CALL_INSN: |
| /* Ordinary insn: if this is the last use, we win. */ |
| if (REGNO_LAST_UID (regno) == INSN_UID (p)) |
| return 1; |
| break; |
| |
| case JUMP_INSN: |
| /* Jump insn: if this is the last use, we win. */ |
| if (REGNO_LAST_UID (regno) == INSN_UID (p)) |
| return 1; |
| /* Otherwise, it's the end of the basic block, so we lose. */ |
| return 0; |
| |
| case CODE_LABEL: |
| case BARRIER: |
| /* It's the end of the basic block, so we lose. */ |
| return 0; |
| |
| default: |
| break; |
| } |
| } |
| |
| /* The "last use" that was recorded can't be found after the first |
| use. This can happen when the last use was deleted while |
| processing an inner loop, this inner loop was then completely |
| unrolled, and the outer loop is always exited after the inner loop, |
| so that everything after the first use becomes a single basic block. */ |
| return 1; |
| } |
| |
| /* Compute the benefit of eliminating the insns in the block whose |
| last insn is LAST. This may be a group of insns used to compute a |
| value directly or can contain a library call. */ |
| |
| static int |
| libcall_benefit (rtx last) |
| { |
| rtx insn; |
| int benefit = 0; |
| |
| for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0); |
| insn != last; insn = NEXT_INSN (insn)) |
| { |
| if (CALL_P (insn)) |
| benefit += 10; /* Assume at least this many insns in a library |
| routine. */ |
| else if (NONJUMP_INSN_P (insn) |
| && GET_CODE (PATTERN (insn)) != USE |
| && GET_CODE (PATTERN (insn)) != CLOBBER) |
| benefit++; |
| } |
| |
| return benefit; |
| } |
| |
| /* Skip COUNT insns from INSN, counting library calls as 1 insn. */ |
| |
| static rtx |
| skip_consec_insns (rtx insn, int count) |
| { |
| for (; count > 0; count--) |
| { |
| rtx temp; |
| |
| /* If first insn of libcall sequence, skip to end. */ |
| /* Do this at start of loop, since INSN is guaranteed to |
| be an insn here. */ |
| if (!NOTE_P (insn) |
| && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX))) |
| insn = XEXP (temp, 0); |
| |
| do |
| insn = NEXT_INSN (insn); |
| while (NOTE_P (insn)); |
| } |
| |
| return insn; |
| } |
| |
| /* Ignore any movable whose insn falls within a libcall |
| which is part of another movable. |
| We make use of the fact that the movable for the libcall value |
| was made later and so appears later on the chain. */ |
| |
| static void |
| ignore_some_movables (struct loop_movables *movables) |
| { |
| struct movable *m, *m1; |
| |
| for (m = movables->head; m; m = m->next) |
| { |
| /* Is this a movable for the value of a libcall? */ |
| rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX); |
| if (note) |
| { |
| rtx insn; |
| /* Check for earlier movables inside that range, |
| and mark them invalid. We cannot use LUIDs here because |
| insns created by loop.c for prior loops don't have LUIDs. |
| Rather than reject all such insns from movables, we just |
| explicitly check each insn in the libcall (since invariant |
| libcalls aren't that common). */ |
| for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn)) |
| for (m1 = movables->head; m1 != m; m1 = m1->next) |
| if (m1->insn == insn) |
| m1->done = 1; |
| } |
| } |
| } |
| |
| /* For each movable insn, see if the reg that it loads |
| leads when it dies right into another conditionally movable insn. |
| If so, record that the second insn "forces" the first one, |
| since the second can be moved only if the first is. */ |
| |
| static void |
| force_movables (struct loop_movables *movables) |
| { |
| struct movable *m, *m1; |
| |
| for (m1 = movables->head; m1; m1 = m1->next) |
| /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */ |
| if (!m1->partial && !m1->done) |
| { |
| int regno = m1->regno; |
| for (m = m1->next; m; m = m->next) |
| /* ??? Could this be a bug? What if CSE caused the |
| register of M1 to be used after this insn? |
| Since CSE does not update regno_last_uid, |
| this insn M->insn might not be where it dies. |
| But very likely this doesn't matter; what matters is |
| that M's reg is computed from M1's reg. */ |
| if (INSN_UID (m->insn) == REGNO_LAST_UID (regno) |
| && !m->done) |
| break; |
| if (m != 0 && m->set_src == m1->set_dest |
| /* If m->consec, m->set_src isn't valid. */ |
| && m->consec == 0) |
| m = 0; |
| |
| /* Increase the priority of the moving the first insn |
| since it permits the second to be moved as well. |
| Likewise for insns already forced by the first insn. */ |
| if (m != 0) |
| { |
| struct movable *m2; |
| |
| m->forces = m1; |
| for (m2 = m1; m2; m2 = m2->forces) |
| { |
| m2->lifetime += m->lifetime; |
| m2->savings += m->savings; |
| } |
| } |
| } |
| } |
| |
| /* Find invariant expressions that are equal and can be combined into |
| one register. */ |
| |
| static void |
| combine_movables (struct loop_movables *movables, struct loop_regs *regs) |
| { |
| struct movable *m; |
| char *matched_regs = xmalloc (regs->num); |
| enum machine_mode mode; |
| |
| /* Regs that are set more than once are not allowed to match |
| or be matched. I'm no longer sure why not. */ |
| /* Only pseudo registers are allowed to match or be matched, |
| since move_movables does not validate the change. */ |
| /* Perhaps testing m->consec_sets would be more appropriate here? */ |
| |
| for (m = movables->head; m; m = m->next) |
| if (m->match == 0 && regs->array[m->regno].n_times_set == 1 |
| && m->regno >= FIRST_PSEUDO_REGISTER |
| && !m->insert_temp |
| && !m->partial) |
| { |
| struct movable *m1; |
| int regno = m->regno; |
| |
| memset (matched_regs, 0, regs->num); |
| matched_regs[regno] = 1; |
| |
| /* We want later insns to match the first one. Don't make the first |
| one match any later ones. So start this loop at m->next. */ |
| for (m1 = m->next; m1; m1 = m1->next) |
| if (m != m1 && m1->match == 0 |
| && !m1->insert_temp |
| && regs->array[m1->regno].n_times_set == 1 |
| && m1->regno >= FIRST_PSEUDO_REGISTER |
| /* A reg used outside the loop mustn't be eliminated. */ |
| && !m1->global |
| /* A reg used for zero-extending mustn't be eliminated. */ |
| && !m1->partial |
| && (matched_regs[m1->regno] |
| || |
| ( |
| /* Can combine regs with different modes loaded from the |
| same constant only if the modes are the same or |
| if both are integer modes with M wider or the same |
| width as M1. The check for integer is redundant, but |
| safe, since the only case of differing destination |
| modes with equal sources is when both sources are |
| VOIDmode, i.e., CONST_INT. */ |
| (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest) |
| || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT |
| && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT |
| && (GET_MODE_BITSIZE (GET_MODE (m->set_dest)) |
| >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest))))) |
| /* APPLE LOCAL combine hoisted consts */ |
| && m1->regno >= FIRST_PSEUDO_REGISTER |
| /* See if the source of M1 says it matches M. */ |
| && ((REG_P (m1->set_src) |
| && matched_regs[REGNO (m1->set_src)]) |
| || rtx_equal_for_loop_p (m->set_src, m1->set_src, |
| movables, regs)))) |
| && ((m->dependencies == m1->dependencies) |
| || rtx_equal_p (m->dependencies, m1->dependencies))) |
| { |
| m->lifetime += m1->lifetime; |
| m->savings += m1->savings; |
| m1->done = 1; |
| m1->match = m; |
| matched_regs[m1->regno] = 1; |
| } |
| } |
| |
| /* Now combine the regs used for zero-extension. |
| This can be done for those not marked `global' |
| provided their lives don't overlap. */ |
| |
| for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; |
| mode = GET_MODE_WIDER_MODE (mode)) |
| { |
| struct movable *m0 = 0; |
| |
| /* Combine all the registers for extension from mode MODE. |
| Don't combine any that are used outside this loop. */ |
| for (m = movables->head; m; m = m->next) |
| if (m->partial && ! m->global |
| && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn))))) |
| { |
| struct movable *m1; |
| |
| int first = REGNO_FIRST_LUID (m->regno); |
| int last = REGNO_LAST_LUID (m->regno); |
| |
| if (m0 == 0) |
| { |
| /* First one: don't check for overlap, just record it. */ |
| m0 = m; |
| continue; |
| } |
| |
| /* Make sure they extend to the same mode. |
| (Almost always true.) */ |
| if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest)) |
| continue; |
| |
| /* We already have one: check for overlap with those |
| already combined together. */ |
| for (m1 = movables->head; m1 != m; m1 = m1->next) |
| if (m1 == m0 || (m1->partial && m1->match == m0)) |
| if (! (REGNO_FIRST_LUID (m1->regno) > last |
| || REGNO_LAST_LUID (m1->regno) < first)) |
| goto overlap; |
| |
| /* No overlap: we can combine this with the others. */ |
| m0->lifetime += m->lifetime; |
| m0->savings += m->savings; |
| m->done = 1; |
| m->match = m0; |
| |
| overlap: |
| ; |
| } |
| } |
| |
| /* Clean up. */ |
| free (matched_regs); |
| } |
| |
| /* Returns the number of movable instructions in LOOP that were not |
| moved outside the loop. */ |
| |
| static int |
| num_unmoved_movables (const struct loop *loop) |
| { |
| int num = 0; |
| struct movable *m; |
| |
| for (m = LOOP_MOVABLES (loop)->head; m; m = m->next) |
| if (!m->done) |
| ++num; |
| |
| return num; |
| } |
| |
| |
| /* Return 1 if regs X and Y will become the same if moved. */ |
| |
| static int |
| regs_match_p (rtx x, rtx y, struct loop_movables *movables) |
| { |
| unsigned int xn = REGNO (x); |
| unsigned int yn = REGNO (y); |
| struct movable *mx, *my; |
| |
| for (mx = movables->head; mx; mx = mx->next) |
| if (mx->regno == xn) |
| break; |
| |
| for (my = movables->head; my; my = my->next) |
| if (my->regno == yn) |
| break; |
| |
| return (mx && my |
| && ((mx->match == my->match && mx->match != 0) |
| || mx->match == my |
| || mx == my->match)); |
| } |
| |
| /* Return 1 if X and Y are identical-looking rtx's. |
| This is the Lisp function EQUAL for rtx arguments. |
| |
| If two registers are matching movables or a movable register and an |
| equivalent constant, consider them equal. */ |
| |
| static int |
| rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables, |
| struct loop_regs *regs) |
| { |
| int i; |
| int j; |
| struct movable *m; |
| enum rtx_code code; |
| const char *fmt; |
| |
| if (x == y) |
| return 1; |
| if (x == 0 || y == 0) |
| return 0; |
| |
| code = GET_CODE (x); |
| |
| /* If we have a register and a constant, they may sometimes be |
| equal. */ |
| if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2 |
| && CONSTANT_P (y)) |
| { |
| for (m = movables->head; m; m = m->next) |
| if (m->move_insn && m->regno == REGNO (x) |
| && rtx_equal_p (m->set_src, y)) |
| return 1; |
| } |
| else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2 |
| && CONSTANT_P (x)) |
| { |
| for (m = movables->head; m; m = m->next) |
| if (m->move_insn && m->regno == REGNO (y) |
| && rtx_equal_p (m->set_src, x)) |
| return 1; |
| } |
| |
| /* Otherwise, rtx's of different codes cannot be equal. */ |
| if (code != GET_CODE (y)) |
| return 0; |
| |
| /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. |
| (REG:SI x) and (REG:HI x) are NOT equivalent. */ |
| |
| if (GET_MODE (x) != GET_MODE (y)) |
| return 0; |
| |
| /* These three types of rtx's can be compared nonrecursively. */ |
| if (code == REG) |
| return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables)); |
| |
| if (code == LABEL_REF) |
| return XEXP (x, 0) == XEXP (y, 0); |
| if (code == SYMBOL_REF) |
| return XSTR (x, 0) == XSTR (y, 0); |
| |
| /* Compare the elements. If any pair of corresponding elements |
| fail to match, return 0 for the whole things. */ |
| |
| fmt = GET_RTX_FORMAT (code); |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| switch (fmt[i]) |
| { |
| case 'w': |
| if (XWINT (x, i) != XWINT (y, i)) |
| return 0; |
| break; |
| |
| case 'i': |
| if (XINT (x, i) != XINT (y, i)) |
| return 0; |
| break; |
| |
| case 'E': |
| /* Two vectors must have the same length. */ |
| if (XVECLEN (x, i) != XVECLEN (y, i)) |
| return 0; |
| |
| /* And the corresponding elements must match. */ |
| for (j = 0; j < XVECLEN (x, i); j++) |
| if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j), |
| movables, regs) == 0) |
| return 0; |
| break; |
| |
| case 'e': |
| if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs) |
| == 0) |
| return 0; |
| break; |
| |
| case 's': |
| if (strcmp (XSTR (x, i), XSTR (y, i))) |
| return 0; |
| break; |
| |
| case 'u': |
| /* These are just backpointers, so they don't matter. */ |
| break; |
| |
| case '0': |
| break; |
| |
| /* It is believed that rtx's at this level will never |
| contain anything but integers and other rtx's, |
| except for within LABEL_REFs and SYMBOL_REFs. */ |
| default: |
| abort (); |
| } |
| } |
| return 1; |
| } |
| |
| /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all |
| insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL |
| references is incremented once for each added note. */ |
| |
| static void |
| add_label_notes (rtx x, rtx insns) |
| { |
| enum rtx_code code = GET_CODE (x); |
| int i, j; |
| const char *fmt; |
| rtx insn; |
| |
| if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) |
| { |
| /* This code used to ignore labels that referred to dispatch tables to |
| avoid flow generating (slightly) worse code. |
| |
| We no longer ignore such label references (see LABEL_REF handling in |
| mark_jump_label for additional information). */ |
| for (insn = insns; insn; insn = NEXT_INSN (insn)) |
| if (reg_mentioned_p (XEXP (x, 0), insn)) |
| { |
| REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0), |
| REG_NOTES (insn)); |
| if (LABEL_P (XEXP (x, 0))) |
| LABEL_NUSES (XEXP (x, 0))++; |
| } |
| } |
| |
| fmt = GET_RTX_FORMAT (code); |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'e') |
| add_label_notes (XEXP (x, i), insns); |
| else if (fmt[i] == 'E') |
| for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
| add_label_notes (XVECEXP (x, i, j), insns); |
| } |
| } |
| |
| /* Scan MOVABLES, and move the insns that deserve to be moved. |
| If two matching movables are combined, replace one reg with the |
| other throughout. */ |
| |
| static void |
| move_movables (struct loop *loop, struct loop_movables *movables, |
| int threshold, int insn_count) |
| { |
| struct loop_regs *regs = LOOP_REGS (loop); |
| int nregs = regs->num; |
| rtx new_start = 0; |
| struct movable *m; |
| rtx p; |
| rtx loop_start = loop->start; |
| rtx loop_end = loop->end; |
| /* Map of pseudo-register replacements to handle combining |
| when we move several insns that load the same value |
| into different pseudo-registers. */ |
| rtx *reg_map = xcalloc (nregs, sizeof (rtx)); |
| char *already_moved = xcalloc (nregs, sizeof (char)); |
| |
| for (m = movables->head; m; m = m->next) |
| { |
| /* Describe this movable insn. */ |
| |
| if (loop_dump_stream) |
| { |
| fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ", |
| INSN_UID (m->insn), m->regno, m->lifetime); |
| if (m->consec > 0) |
| fprintf (loop_dump_stream, "consec %d, ", m->consec); |
| if (m->cond) |
| fprintf (loop_dump_stream, "cond "); |
| if (m->force) |
| fprintf (loop_dump_stream, "force "); |
| if (m->global) |
| fprintf (loop_dump_stream, "global "); |
| if (m->done) |
| fprintf (loop_dump_stream, "done "); |
| if (m->move_insn) |
| fprintf (loop_dump_stream, "move-insn "); |
| if (m->match) |
| fprintf (loop_dump_stream, "matches %d ", |
| INSN_UID (m->match->insn)); |
| if (m->forces) |
| fprintf (loop_dump_stream, "forces %d ", |
| INSN_UID (m->forces->insn)); |
| } |
| |
| /* Ignore the insn if it's already done (it matched something else). |
| Otherwise, see if it is now safe to move. */ |
| |
| if (!m->done |
| && (! m->cond |
| || (1 == loop_invariant_p (loop, m->set_src) |
| && (m->dependencies == 0 |
| || 1 == loop_invariant_p (loop, m->dependencies)) |
| && (m->consec == 0 |
| || 1 == consec_sets_invariant_p (loop, m->set_dest, |
| m->consec + 1, |
| m->insn)))) |
| && (! m->forces || m->forces->done)) |
| { |
| int regno; |
| rtx p; |
| int savings = m->savings; |
| |
| /* We have an insn that is safe to move. |
| Compute its desirability. */ |
| |
| p = m->insn; |
| regno = m->regno; |
| |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, "savings %d ", savings); |
| |
| if (regs->array[regno].moved_once && loop_dump_stream) |
| fprintf (loop_dump_stream, "halved since already moved "); |
| |
| /* An insn MUST be moved if we already moved something else |
| which is safe only if this one is moved too: that is, |
| if already_moved[REGNO] is nonzero. */ |
| |
| /* An insn is desirable to move if the new lifetime of the |
| register is no more than THRESHOLD times the old lifetime. |
| If it's not desirable, it means the loop is so big |
| that moving won't speed things up much, |
| and it is liable to make register usage worse. */ |
| |
| /* It is also desirable to move if it can be moved at no |
| extra cost because something else was already moved. */ |
| |
| if (already_moved[regno] |
| || (threshold * savings * m->lifetime) >= |
| (regs->array[regno].moved_once ? insn_count * 2 : insn_count) |
| || (m->forces && m->forces->done |
| && regs->array[m->forces->regno].n_times_set == 1)) |
| { |
| int count; |
| struct movable *m1; |
| rtx first = NULL_RTX; |
| rtx newreg = NULL_RTX; |
| |
| if (m->insert_temp) |
| newreg = gen_reg_rtx (GET_MODE (m->set_dest)); |
| |
| /* Now move the insns that set the reg. */ |
| |
| if (m->partial && m->match) |
| { |
| rtx newpat, i1; |
| rtx r1, r2; |
| /* Find the end of this chain of matching regs. |
| Thus, we load each reg in the chain from that one reg. |
| And that reg is loaded with 0 directly, |
| since it has ->match == 0. */ |
| for (m1 = m; m1->match; m1 = m1->match); |
| newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)), |
| SET_DEST (PATTERN (m1->insn))); |
| i1 = loop_insn_hoist (loop, newpat); |
| |
| /* Mark the moved, invariant reg as being allowed to |
| share a hard reg with the other matching invariant. */ |
| REG_NOTES (i1) = REG_NOTES (m->insn); |
| r1 = SET_DEST (PATTERN (m->insn)); |
| r2 = SET_DEST (PATTERN (m1->insn)); |
| regs_may_share |
| = gen_rtx_EXPR_LIST (VOIDmode, r1, |
| gen_rtx_EXPR_LIST (VOIDmode, r2, |
| regs_may_share)); |
| delete_insn (m->insn); |
| |
| if (new_start == 0) |
| new_start = i1; |
| |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1)); |
| } |
| /* If we are to re-generate the item being moved with a |
| new move insn, first delete what we have and then emit |
| the move insn before the loop. */ |
| else if (m->move_insn) |
| { |
| rtx i1, temp, seq; |
| |
| for (count = m->consec; count >= 0; count--) |
| { |
| /* If this is the first insn of a library call sequence, |
| something is very wrong. */ |
| if (!NOTE_P (p) |
| && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX))) |
| abort (); |
| |
| /* If this is the last insn of a libcall sequence, then |
| delete every insn in the sequence except the last. |
| The last insn is handled in the normal manner. */ |
| if (!NOTE_P (p) |
| && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX))) |
| { |
| temp = XEXP (temp, 0); |
| while (temp != p) |
| temp = delete_insn (temp); |
| } |
| |
| temp = p; |
| p = delete_insn (p); |
| |
| /* simplify_giv_expr expects that it can walk the insns |
| at m->insn forwards and see this old sequence we are |
| tossing here. delete_insn does preserve the next |
| pointers, but when we skip over a NOTE we must fix |
| it up. Otherwise that code walks into the non-deleted |
| insn stream. */ |
| while (p && NOTE_P (p)) |
| p = NEXT_INSN (temp) = NEXT_INSN (p); |
| |
| if (m->insert_temp) |
| { |
| /* Replace the original insn with a move from |
| our newly created temp. */ |
| start_sequence (); |
| emit_move_insn (m->set_dest, newreg); |
| seq = get_insns (); |
| end_sequence (); |
| emit_insn_before (seq, p); |
| } |
| } |
| |
| start_sequence (); |
| emit_move_insn (m->insert_temp ? newreg : m->set_dest, |
| m->set_src); |
| seq = get_insns (); |
| end_sequence (); |
| |
| add_label_notes (m->set_src, seq); |
| |
| i1 = loop_insn_hoist (loop, seq); |
| if (! find_reg_note (i1, REG_EQUAL, NULL_RTX)) |
| set_unique_reg_note (i1, |
| m->is_equiv ? REG_EQUIV : REG_EQUAL, |
| m->set_src); |
| |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1)); |
| |
| /* The more regs we move, the less we like moving them. */ |
| threshold -= 3; |
| } |
| else |
| { |
| for (count = m->consec; count >= 0; count--) |
| { |
| rtx i1, temp; |
| |
| /* If first insn of libcall sequence, skip to end. */ |
| /* Do this at start of loop, since p is guaranteed to |
| be an insn here. */ |
| if (!NOTE_P (p) |
| && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX))) |
| p = XEXP (temp, 0); |
| |
| /* If last insn of libcall sequence, move all |
| insns except the last before the loop. The last |
| insn is handled in the normal manner. */ |
| if (!NOTE_P (p) |
| && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX))) |
| { |
| rtx fn_address = 0; |
| rtx fn_reg = 0; |
| rtx fn_address_insn = 0; |
| |
| first = 0; |
| for (temp = XEXP (temp, 0); temp != p; |
| temp = NEXT_INSN (temp)) |
| { |
| rtx body; |
| rtx n; |
| rtx next; |
| |
| if (NOTE_P (temp)) |
| continue; |
| |
| body = PATTERN (temp); |
| |
| /* Find the next insn after TEMP, |
| not counting USE or NOTE insns. */ |
| for (next = NEXT_INSN (temp); next != p; |
| next = NEXT_INSN (next)) |
| if (! (NONJUMP_INSN_P (next) |
| && GET_CODE (PATTERN (next)) == USE) |
| && !NOTE_P (next)) |
| break; |
| |
| /* If that is the call, this may be the insn |
| that loads the function address. |
| |
| Extract the function address from the insn |
| that loads it into a register. |
| If this insn was cse'd, we get incorrect code. |
| |
| So emit a new move insn that copies the |
| function address into the register that the |
| call insn will use. flow.c will delete any |
| redundant stores that we have created. */ |
| if (CALL_P (next) |
| && GET_CODE (body) == SET |
| && REG_P (SET_DEST (body)) |
| && (n = find_reg_note (temp, REG_EQUAL, |
| NULL_RTX))) |
| { |
| fn_reg = SET_SRC (body); |
| if (!REG_P (fn_reg)) |
| fn_reg = SET_DEST (body); |
| fn_address = XEXP (n, 0); |
| fn_address_insn = temp; |
| } |
| /* We have the call insn. |
| If it uses the register we suspect it might, |
| load it with the correct address directly. */ |
| if (CALL_P (temp) |
| && fn_address != 0 |
| && reg_referenced_p (fn_reg, body)) |
| loop_insn_emit_after (loop, 0, fn_address_insn, |
| gen_move_insn |
| (fn_reg, fn_address)); |
| |
| if (CALL_P (temp)) |
| { |
| i1 = loop_call_insn_hoist (loop, body); |
| /* Because the USAGE information potentially |
| contains objects other than hard registers |
| we need to copy it. */ |
| if (CALL_INSN_FUNCTION_USAGE (temp)) |
| CALL_INSN_FUNCTION_USAGE (i1) |
| = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp)); |
| } |
| else |
| i1 = loop_insn_hoist (loop, body); |
| if (first == 0) |
| first = i1; |
| if (temp == fn_address_insn) |
| fn_address_insn = i1; |
| REG_NOTES (i1) = REG_NOTES (temp); |
| REG_NOTES (temp) = NULL; |
| delete_insn (temp); |
| } |
| if (new_start == 0) |
| new_start = first; |
| } |
| if (m->savemode != VOIDmode) |
| { |
| /* P sets REG to zero; but we should clear only |
| the bits that are not covered by the mode |
| m->savemode. */ |
| rtx reg = m->set_dest; |
| rtx sequence; |
| rtx tem; |
| |
| start_sequence (); |
| tem = expand_simple_binop |
| (GET_MODE (reg), AND, reg, |
| GEN_INT ((((HOST_WIDE_INT) 1 |
| << GET_MODE_BITSIZE (m->savemode))) |
| - 1), |
| reg, 1, OPTAB_LIB_WIDEN); |
| if (tem == 0) |
| abort (); |
| if (tem != reg) |
| emit_move_insn (reg, tem); |
| sequence = get_insns (); |
| end_sequence (); |
| i1 = loop_insn_hoist (loop, sequence); |
| } |
| else if (CALL_P (p)) |
| { |
| i1 = loop_call_insn_hoist (loop, PATTERN (p)); |
| /* Because the USAGE information potentially |
| contains objects other than hard registers |
| we need to copy it. */ |
| if (CALL_INSN_FUNCTION_USAGE (p)) |
| CALL_INSN_FUNCTION_USAGE (i1) |
| = copy_rtx (CALL_INSN_FUNCTION_USAGE (p)); |
| } |
| else if (count == m->consec && m->move_insn_first) |
| { |
| rtx seq; |
| /* The SET_SRC might not be invariant, so we must |
| use the REG_EQUAL note. */ |
| start_sequence (); |
| emit_move_insn (m->insert_temp ? newreg : m->set_dest, |
| m->set_src); |
| seq = get_insns (); |
| end_sequence (); |
| |
| add_label_notes (m->set_src, seq); |
| |
| i1 = loop_insn_hoist (loop, seq); |
| if (! find_reg_note (i1, REG_EQUAL, NULL_RTX)) |
| set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV |
| : REG_EQUAL, m->set_src); |
| } |
| else if (m->insert_temp) |
| { |
| rtx *reg_map2 = xcalloc (REGNO (newreg), |
| sizeof(rtx)); |
| reg_map2 [m->regno] = newreg; |
| |
| i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p))); |
| replace_regs (i1, reg_map2, REGNO (newreg), 1); |
| free (reg_map2); |
| } |
| else |
| i1 = loop_insn_hoist (loop, PATTERN (p)); |
| |
| if (REG_NOTES (i1) == 0) |
| { |
| REG_NOTES (i1) = REG_NOTES (p); |
| REG_NOTES (p) = NULL; |
| |
| /* If there is a REG_EQUAL note present whose value |
| is not loop invariant, then delete it, since it |
| may cause problems with later optimization passes. |
| It is possible for cse to create such notes |
| like this as a result of record_jump_cond. */ |
| |
| if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX)) |
| && ! loop_invariant_p (loop, XEXP (temp, 0))) |
| remove_note (i1, temp); |
| } |
| |
| if (new_start == 0) |
| new_start = i1; |
| |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, " moved to %d", |
| INSN_UID (i1)); |
| |
| /* If library call, now fix the REG_NOTES that contain |
| insn pointers, namely REG_LIBCALL on FIRST |
| and REG_RETVAL on I1. */ |
| if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX))) |
| { |
| XEXP (temp, 0) = first; |
| temp = find_reg_note (first, REG_LIBCALL, NULL_RTX); |
| XEXP (temp, 0) = i1; |
| } |
| |
| temp = p; |
| delete_insn (p); |
| p = NEXT_INSN (p); |
| |
| /* simplify_giv_expr expects that it can walk the insns |
| at m->insn forwards and see this old sequence we are |
| tossing here. delete_insn does preserve the next |
| pointers, but when we skip over a NOTE we must fix |
| it up. Otherwise that code walks into the non-deleted |
| insn stream. */ |
| while (p && NOTE_P (p)) |
| p = NEXT_INSN (temp) = NEXT_INSN (p); |
| |
| if (m->insert_temp) |
| { |
| rtx seq; |
| /* Replace the original insn with a move from |
| our newly created temp. */ |
| start_sequence (); |
| emit_move_insn (m->set_dest, newreg); |
| seq = get_insns (); |
| end_sequence (); |
| emit_insn_before (seq, p); |
| } |
| } |
| |
| /* The more regs we move, the less we like moving them. */ |
| threshold -= 3; |
| } |
| |
| m->done = 1; |
| |
| if (!m->insert_temp) |
| { |
| /* Any other movable that loads the same register |
| MUST be moved. */ |
| already_moved[regno] = 1; |
| |
| /* This reg has been moved out of one loop. */ |
| regs->array[regno].moved_once = 1; |
| |
| /* The reg set here is now invariant. */ |
| if (! m->partial) |
| { |
| int i; |
| for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++) |
| regs->array[regno+i].set_in_loop = 0; |
| } |
| |
| /* Change the length-of-life info for the register |
| to say it lives at least the full length of this loop. |
| This will help guide optimizations in outer loops. */ |
| |
| if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start)) |
| /* This is the old insn before all the moved insns. |
| We can't use the moved insn because it is out of range |
| in uid_luid. Only the old insns have luids. */ |
| REGNO_FIRST_UID (regno) = INSN_UID (loop_start); |
| if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end)) |
| REGNO_LAST_UID (regno) = INSN_UID (loop_end); |
| } |
| |
| /* Combine with this moved insn any other matching movables. */ |
| |
| if (! m->partial) |
| for (m1 = movables->head; m1; m1 = m1->next) |
| if (m1->match == m) |
| { |
| rtx temp; |
| |
| /* Schedule the reg loaded by M1 |
| for replacement so that shares the reg of M. |
| If the modes differ (only possible in restricted |
| circumstances, make a SUBREG. |
| |
| Note this assumes that the target dependent files |
| treat REG and SUBREG equally, including within |
| GO_IF_LEGITIMATE_ADDRESS and in all the |
| predicates since we never verify that replacing the |
| original register with a SUBREG results in a |
| recognizable insn. */ |
| if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)) |
| reg_map[m1->regno] = m->set_dest; |
| else |
| reg_map[m1->regno] |
| = gen_lowpart_common (GET_MODE (m1->set_dest), |
| m->set_dest); |
| |
| /* Get rid of the matching insn |
| and prevent further processing of it. */ |
| m1->done = 1; |
| |
| /* If library call, delete all insns. */ |
| if ((temp = find_reg_note (m1->insn, REG_RETVAL, |
| NULL_RTX))) |
| delete_insn_chain (XEXP (temp, 0), m1->insn); |
| else |
| delete_insn (m1->insn); |
| |
| /* Any other movable that loads the same register |
| MUST be moved. */ |
| already_moved[m1->regno] = 1; |
| |
| /* The reg merged here is now invariant, |
| if the reg it matches is invariant. */ |
| if (! m->partial) |
| { |
| int i; |
| for (i = 0; |
| i < LOOP_REGNO_NREGS (regno, m1->set_dest); |
| i++) |
| regs->array[m1->regno+i].set_in_loop = 0; |
| } |
| } |
| } |
| else if (loop_dump_stream) |
| fprintf (loop_dump_stream, "not desirable"); |
| } |
| else if (loop_dump_stream && !m->match) |
| fprintf (loop_dump_stream, "not safe"); |
| |
| if (loop_dump_stream) |
| fprintf (loop_dump_stream, "\n"); |
| } |
| |
| if (new_start == 0) |
| new_start = loop_start; |
| |
| /* Go through all the instructions in the loop, making |
| all the register substitutions scheduled in REG_MAP. */ |
| for (p = new_start; p != loop_end; p = NEXT_INSN (p)) |
| if (INSN_P (p)) |
| { |
| replace_regs (PATTERN (p), reg_map, nregs, 0); |
| replace_regs (REG_NOTES (p), reg_map, nregs, 0); |
| INSN_CODE (p) = -1; |
| } |
| |
| /* Clean up. */ |
| free (reg_map); |
| free (already_moved); |
| } |
| |
| |
| static void |
| loop_movables_add (struct loop_movables *movables, struct movable *m) |
| { |
| if (movables->head == 0) |
| movables->head = m; |
| else |
| movables->last->next = m; |
| movables->last = m; |
| } |
| |
| |
| static void |
| loop_movables_free (struct loop_movables *movables) |
| { |
| struct movable *m; |
| struct movable *m_next; |
| |
| for (m = movables->head; m; m = m_next) |
| { |
| m_next = m->next; |
| free (m); |
| } |
| } |
| |
| #if 0 |
| /* Scan X and replace the address of any MEM in it with ADDR. |
| REG is the address that MEM should have before the replacement. */ |
| |
| static void |
| replace_call_address (rtx x, rtx reg, rtx addr) |
| { |
| enum rtx_code code; |
| int i; |
| const char *fmt; |
| |
| if (x == 0) |
| return; |
| code = GET_CODE (x); |
| switch (code) |
| { |
| case PC: |
| case CC0: |
| case CONST_INT: |
| case CONST_DOUBLE: |
| case CONST: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| case REG: |
| return; |
| |
| case SET: |
| /* Short cut for very common case. */ |
| replace_call_address (XEXP (x, 1), reg, addr); |
| return; |
| |
| case CALL: |
| /* Short cut for very common case. */ |
| replace_call_address (XEXP (x, 0), reg, addr); |
| return; |
| |
| case MEM: |
| /* If this MEM uses a reg other than the one we expected, |
| something is wrong. */ |
| if (XEXP (x, 0) != reg) |
| abort (); |
| XEXP (x, 0) = addr; |
| return; |
| |
| default: |
| break; |
| } |
| |
| fmt = GET_RTX_FORMAT (code); |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'e') |
| replace_call_address (XEXP (x, i), reg, addr); |
| else if (fmt[i] == 'E') |
| { |
| int j; |
| for (j = 0; j < XVECLEN (x, i); j++) |
| replace_call_address (XVECEXP (x, i, j), reg, addr); |
| } |
| } |
| } |
| #endif |
| |
| /* Return the number of memory refs to addresses that vary |
| in the rtx X. */ |
| |
| static int |
| count_nonfixed_reads (const struct loop *loop, rtx x) |
| { |
| enum rtx_code code; |
| int i; |
| const char *fmt; |
| int value; |
| |
| if (x == 0) |
| return 0; |
| |
| code = GET_CODE (x); |
| switch (code) |
| { |
| case PC: |
| case CC0: |
| case CONST_INT: |
| case CONST_DOUBLE: |
| case CONST: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| case REG: |
| return 0; |
| |
| case MEM: |
| return ((loop_invariant_p (loop, XEXP (x, 0)) != 1) |
| + count_nonfixed_reads (loop, XEXP (x, 0))); |
| |
| default: |
| break; |
| } |
| |
| value = 0; |
| fmt = GET_RTX_FORMAT (code); |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'e') |
| value += count_nonfixed_reads (loop, XEXP (x, i)); |
| if (fmt[i] == 'E') |
| { |
| int j; |
| for (j = 0; j < XVECLEN (x, i); j++) |
| value += count_nonfixed_reads (loop, XVECEXP (x, i, j)); |
| } |
| } |
| return value; |
| } |
| |
| /* Scan a loop setting the elements `loops_enclosed', |
| `has_call', `has_nonconst_call', `has_volatile', `has_tablejump', |
| `unknown_address_altered', `unknown_constant_address_altered', and |
| `num_mem_sets' in LOOP. Also, fill in the array `mems' and the |
| list `store_mems' in LOOP. */ |
| |
| static void |
| prescan_loop (struct loop *loop) |
| { |
| int level = 1; |
| rtx insn; |
| struct loop_info *loop_info = LOOP_INFO (loop); |
| rtx start = loop->start; |
| rtx end = loop->end; |
| /* The label after END. Jumping here is just like falling off the |
| end of the loop. We use next_nonnote_insn instead of next_label |
| as a hedge against the (pathological) case where some actual insn |
| might end up between the two. */ |
| rtx exit_target = next_nonnote_insn (end); |
| |
| loop_info->has_indirect_jump = indirect_jump_in_function; |
| loop_info->pre_header_has_call = 0; |
| loop_info->has_call = 0; |
| loop_info->has_nonconst_call = 0; |
| loop_info->has_prefetch = 0; |
| loop_info->has_volatile = 0; |
| loop_info->has_tablejump = 0; |
| loop_info->has_multiple_exit_targets = 0; |
| loop->level = 1; |
| |
| loop_info->unknown_address_altered = 0; |
| loop_info->unknown_constant_address_altered = 0; |
| loop_info->store_mems = NULL_RTX; |
| loop_info->first_loop_store_insn = NULL_RTX; |
| loop_info->mems_idx = 0; |
| loop_info->num_mem_sets = 0; |
| |
| for (insn = start; insn && !LABEL_P (insn); |
| insn = PREV_INSN (insn)) |
| { |
| if (CALL_P (insn)) |
| { |
| loop_info->pre_header_has_call = 1; |
| break; |
| } |
| } |
| |
| for (insn = NEXT_INSN (start); insn != NEXT_INSN (end); |
| insn = NEXT_INSN (insn)) |
| { |
| switch (GET_CODE (insn)) |
| { |
| case NOTE: |
| if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG) |
| { |
| ++level; |
| /* Count number of loops contained in this one. */ |
| loop->level++; |
| } |
| else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END) |
| --level; |
| break; |
| |
| case CALL_INSN: |
| if (! CONST_OR_PURE_CALL_P (insn)) |
| { |
| loop_info->unknown_address_altered = 1; |
| loop_info->has_nonconst_call = 1; |
| } |
| else if (pure_call_p (insn)) |
| loop_info->has_nonconst_call = 1; |
| loop_info->has_call = 1; |
| if (can_throw_internal (insn)) |
| loop_info->has_multiple_exit_targets = 1; |
| break; |
| |
| case JUMP_INSN: |
| if (! loop_info->has_multiple_exit_targets) |
| { |
| rtx set = pc_set (insn); |
| |
| if (set) |
| { |
| rtx src = SET_SRC (set); |
| rtx label1, label2; |
| |
| if (GET_CODE (src) == IF_THEN_ELSE) |
| { |
| label1 = XEXP (src, 1); |
| label2 = XEXP (src, 2); |
| } |
| else |
| { |
| label1 = src; |
| label2 = NULL_RTX; |
| } |
| |
| do |
| { |
| if (label1 && label1 != pc_rtx) |
| { |
| if (GET_CODE (label1) != LABEL_REF) |
| { |
| /* Something tricky. */ |
| loop_info->has_multiple_exit_targets = 1; |
| break; |
| } |
| else if (XEXP (label1, 0) != exit_target |
| && LABEL_OUTSIDE_LOOP_P (label1)) |
| { |
| /* A jump outside the current loop. */ |
| loop_info->has_multiple_exit_targets = 1; |
| break; |
| } |
| } |
| |
| label1 = label2; |
| label2 = NULL_RTX; |
| } |
| while (label1); |
| } |
| else |
| { |
| /* A return, or something tricky. */ |
| loop_info->has_multiple_exit_targets = 1; |
| } |
| } |
| /* Fall through. */ |
| |
| case INSN: |
| if (volatile_refs_p (PATTERN (insn))) |
| loop_info->has_volatile = 1; |
| |
| if (JUMP_P (insn) |
| && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC |
| || GET_CODE (PATTERN (insn)) == ADDR_VEC)) |
| loop_info->has_tablejump = 1; |
| |
| note_stores (PATTERN (insn), note_addr_stored, loop_info); |
| if (! loop_info->first_loop_store_insn && loop_info->store_mems) |
| loop_info->first_loop_store_insn = insn; |
| |
| if (flag_non_call_exceptions && can_throw_internal (insn)) |
| loop_info->has_multiple_exit_targets = 1; |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| /* Now, rescan the loop, setting up the LOOP_MEMS array. */ |
| if (/* An exception thrown by a called function might land us |
| anywhere. */ |
| ! loop_info->has_nonconst_call |
| /* We don't want loads for MEMs moved to a location before the |
| one at which their stack memory becomes allocated. (Note |
| that this is not a problem for malloc, etc., since those |
| require actual function calls. */ |
| && ! current_function_calls_alloca |
| /* There are ways to leave the loop other than falling off the |
| end. */ |
| && ! loop_info->has_multiple_exit_targets) |
| for (insn = NEXT_INSN (start); insn != NEXT_INSN (end); |
| insn = NEXT_INSN (insn)) |
| for_each_rtx (&insn, insert_loop_mem, loop_info); |
| |
| /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so |
| that loop_invariant_p and load_mems can use true_dependence |
| to determine what is really clobbered. */ |
| if (loop_info->unknown_address_altered) |
| { |
| rtx mem = gen_rtx_MEM (BLKmode, const0_rtx); |
| |
| loop_info->store_mems |
| = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems); |
| } |
| if (loop_info->unknown_constant_address_altered) |
| { |
| rtx mem = gen_rtx_MEM (BLKmode, const0_rtx); |
| MEM_READONLY_P (mem) = 1; |
| loop_info->store_mems |
| = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems); |
| } |
| } |
| |
| /* Invalidate all loops containing LABEL. */ |
| |
| static void |
| invalidate_loops_containing_label (rtx label) |
| { |
| struct loop *loop; |
| for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer) |
| loop->invalid = 1; |
| } |
| |
| /* Scan the function looking for loops. Record the start and end of each loop. |
| Also mark as invalid loops any loops that contain a setjmp or are branched |
| to from outside the loop. */ |
| |
| static void |
| find_and_verify_loops (rtx f, struct loops *loops) |
| { |
| rtx insn; |
| rtx label; |
| int num_loops; |
| struct loop *current_loop; |
| struct loop *next_loop; |
| struct loop *loop; |
| |
| num_loops = loops->num; |
| |
| compute_luids (f, NULL_RTX, 0); |
| |
| /* If there are jumps to undefined labels, |
| treat them as jumps out of any/all loops. |
| This also avoids writing past end of tables when there are no loops. */ |
| uid_loop[0] = NULL; |
| |
| /* Find boundaries of loops, mark which loops are contained within |
| loops, and invalidate loops that have setjmp. */ |
| |
| num_loops = 0; |
| current_loop = NULL; |
| for (insn = f; insn; insn = NEXT_INSN (insn)) |
| { |
| if (NOTE_P (insn)) |
| switch (NOTE_LINE_NUMBER (insn)) |
| { |
| case NOTE_INSN_LOOP_BEG: |
| next_loop = loops->array + num_loops; |
| next_loop->num = num_loops; |
| num_loops++; |
| next_loop->start = insn; |
| next_loop->outer = current_loop; |
| current_loop = next_loop; |
| break; |
| |
| case NOTE_INSN_LOOP_END: |
| if (! current_loop) |
| abort (); |
| |
| current_loop->end = insn; |
| current_loop = current_loop->outer |