| /* Subroutines used for code generation on IA-32. |
| Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
| 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING. If not, write to |
| the Free Software Foundation, 51 Franklin Street, Fifth Floor, |
| Boston, MA 02110-1301, USA. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "tm_p.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "real.h" |
| #include "insn-config.h" |
| #include "conditions.h" |
| #include "output.h" |
| #include "insn-codes.h" |
| #include "insn-attr.h" |
| #include "flags.h" |
| #include "except.h" |
| #include "function.h" |
| #include "recog.h" |
| #include "expr.h" |
| #include "optabs.h" |
| #include "toplev.h" |
| #include "basic-block.h" |
| #include "ggc.h" |
| #include "target.h" |
| #include "target-def.h" |
| #include "langhooks.h" |
| #include "cgraph.h" |
| #include "tree-gimple.h" |
| #include "dwarf2.h" |
| #include "tm-constrs.h" |
| |
| /* APPLE LOCAL begin pascal strings */ |
| #include "../../libcpp/internal.h" |
| extern struct cpp_reader* parse_in; |
| /* APPLE LOCAL end pascal strings */ |
| /* APPLE LOCAL begin regparmandstackparm */ |
| #include "integrate.h" |
| #include "tree-inline.h" |
| #include "splay-tree.h" |
| #include "tree-pass.h" |
| #include "c-tree.h" |
| #include "c-common.h" |
| /* APPLE LOCAL end regparmandstackparm */ |
| /* APPLE LOCAL begin dwarf call/pop 5221468 */ |
| #include "debug.h" |
| #include "dwarf2out.h" |
| /* APPLE LOCAL end dwarf call/pop 5221468 */ |
| |
| #ifndef CHECK_STACK_LIMIT |
| #define CHECK_STACK_LIMIT (-1) |
| #endif |
| |
| /* Return index of given mode in mult and division cost tables. */ |
| #define MODE_INDEX(mode) \ |
| ((mode) == QImode ? 0 \ |
| : (mode) == HImode ? 1 \ |
| : (mode) == SImode ? 2 \ |
| : (mode) == DImode ? 3 \ |
| : 4) |
| |
| /* Processor costs (relative to an add) */ |
| /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ |
| #define COSTS_N_BYTES(N) ((N) * 2) |
| |
| static const |
| struct processor_costs size_cost = { /* costs for tuning for size */ |
| COSTS_N_BYTES (2), /* cost of an add instruction */ |
| COSTS_N_BYTES (3), /* cost of a lea instruction */ |
| COSTS_N_BYTES (2), /* variable shift costs */ |
| COSTS_N_BYTES (3), /* constant shift costs */ |
| {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ |
| COSTS_N_BYTES (3), /* HI */ |
| COSTS_N_BYTES (3), /* SI */ |
| COSTS_N_BYTES (3), /* DI */ |
| COSTS_N_BYTES (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ |
| COSTS_N_BYTES (3), /* HI */ |
| COSTS_N_BYTES (3), /* SI */ |
| COSTS_N_BYTES (3), /* DI */ |
| COSTS_N_BYTES (5)}, /* other */ |
| COSTS_N_BYTES (3), /* cost of movsx */ |
| COSTS_N_BYTES (3), /* cost of movzx */ |
| 0, /* "large" insn */ |
| 2, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {2, 2, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 2, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 2}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {2, 2, 2}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 3, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {3, 3}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 3, /* cost of moving SSE register */ |
| {3, 3, 3}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {3, 3, 3}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ |
| COSTS_N_BYTES (2), /* cost of FMUL instruction. */ |
| COSTS_N_BYTES (2), /* cost of FDIV instruction. */ |
| COSTS_N_BYTES (2), /* cost of FABS instruction. */ |
| COSTS_N_BYTES (2), /* cost of FCHS instruction. */ |
| COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ |
| }; |
| |
| /* Processor costs (relative to an add) */ |
| static const |
| struct processor_costs i386_cost = { /* 386 specific costs */ |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (3), /* variable shift costs */ |
| COSTS_N_INSNS (2), /* constant shift costs */ |
| {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (6), /* HI */ |
| COSTS_N_INSNS (6), /* SI */ |
| COSTS_N_INSNS (6), /* DI */ |
| COSTS_N_INSNS (6)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (23), /* HI */ |
| COSTS_N_INSNS (23), /* SI */ |
| COSTS_N_INSNS (23), /* DI */ |
| COSTS_N_INSNS (23)}, /* other */ |
| COSTS_N_INSNS (3), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 15, /* "large" insn */ |
| 3, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {2, 4, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 4, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {8, 8, 8}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {8, 8, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 8, 16}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 8, 16}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (27), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (88), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (22), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (24), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs i486_cost = { /* 486 specific costs */ |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (3), /* variable shift costs */ |
| COSTS_N_INSNS (2), /* constant shift costs */ |
| {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (12), /* HI */ |
| COSTS_N_INSNS (12), /* SI */ |
| COSTS_N_INSNS (12), /* DI */ |
| COSTS_N_INSNS (12)}, /* other */ |
| 1, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (40), /* HI */ |
| COSTS_N_INSNS (40), /* SI */ |
| COSTS_N_INSNS (40), /* DI */ |
| COSTS_N_INSNS (40)}, /* other */ |
| COSTS_N_INSNS (3), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 15, /* "large" insn */ |
| 3, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {2, 4, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 4, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {8, 8, 8}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {8, 8, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 8, 16}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 8, 16}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (16), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (73), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (3), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (3), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs pentium_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (4), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (11), /* HI */ |
| COSTS_N_INSNS (11), /* SI */ |
| COSTS_N_INSNS (11), /* DI */ |
| COSTS_N_INSNS (11)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (25), /* HI */ |
| COSTS_N_INSNS (25), /* SI */ |
| COSTS_N_INSNS (25), /* DI */ |
| COSTS_N_INSNS (25)}, /* other */ |
| COSTS_N_INSNS (3), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 6, /* MOVE_RATIO */ |
| 6, /* cost for loading QImode using movzbl */ |
| {2, 4, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 4, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 8, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 8, 16}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 8, 16}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (3), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (39), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (1), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (1), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs pentiumpro_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (4), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (4)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (17), /* HI */ |
| COSTS_N_INSNS (17), /* SI */ |
| COSTS_N_INSNS (17), /* DI */ |
| COSTS_N_INSNS (17)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 6, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 2, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {2, 2}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {2, 2}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {2, 2, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {2, 2, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 32, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (5), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (56), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs k6_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (3), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (3), /* DI */ |
| COSTS_N_INSNS (3)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (18), /* HI */ |
| COSTS_N_INSNS (18), /* SI */ |
| COSTS_N_INSNS (18), /* DI */ |
| COSTS_N_INSNS (18)}, /* other */ |
| COSTS_N_INSNS (2), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 4, /* MOVE_RATIO */ |
| 3, /* cost for loading QImode using movzbl */ |
| {4, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 3, 2}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {6, 6, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 4}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {2, 2}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {2, 2}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {2, 2, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {2, 2, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 6, /* MMX or SSE register to integer */ |
| 32, /* size of prefetch block */ |
| 1, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (2), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (56), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs athlon_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (5), /* HI */ |
| COSTS_N_INSNS (5), /* SI */ |
| COSTS_N_INSNS (5), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 4}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 6}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 5, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (24), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs k8_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 3, 6}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 5, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (19), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs pentium4_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (3), /* cost of a lea instruction */ |
| COSTS_N_INSNS (4), /* variable shift costs */ |
| COSTS_N_INSNS (4), /* constant shift costs */ |
| {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (15), /* HI */ |
| COSTS_N_INSNS (15), /* SI */ |
| COSTS_N_INSNS (15), /* DI */ |
| COSTS_N_INSNS (15)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (56), /* HI */ |
| COSTS_N_INSNS (56), /* SI */ |
| COSTS_N_INSNS (56), /* DI */ |
| COSTS_N_INSNS (56)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 16, /* "large" insn */ |
| 6, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {4, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 3, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {2, 2}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {2, 2}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 12, /* cost of moving SSE register */ |
| {12, 12, 12}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {2, 2, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 10, /* MMX or SSE register to integer */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (7), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (43), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ |
| }; |
| |
| static const |
| struct processor_costs nocona_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (10), /* HI */ |
| COSTS_N_INSNS (10), /* SI */ |
| COSTS_N_INSNS (10), /* DI */ |
| COSTS_N_INSNS (10)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (66), /* HI */ |
| COSTS_N_INSNS (66), /* SI */ |
| COSTS_N_INSNS (66), /* DI */ |
| COSTS_N_INSNS (66)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 16, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 3, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 4}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 6, /* cost of moving MMX register */ |
| {12, 12}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {12, 12}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 6, /* cost of moving SSE register */ |
| {12, 12, 12}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {12, 12, 12}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 8, /* MMX or SSE register to integer */ |
| 128, /* size of prefetch block */ |
| 8, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (40), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (3), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (3), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ |
| }; |
| /* APPLE LOCAL begin mainline */ |
| static const |
| struct processor_costs core2_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (3), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (3), /* DI */ |
| COSTS_N_INSNS (3)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (22), /* HI */ |
| COSTS_N_INSNS (22), /* SI */ |
| COSTS_N_INSNS (22), /* DI */ |
| COSTS_N_INSNS (22)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 16, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {6, 6, 6}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {6, 6, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 4}, /* cost of loading integer registers */ |
| 2, /* cost of moving MMX register */ |
| {6, 6}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {6, 6, 6}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 4}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 2, /* MMX or SSE register to integer */ |
| 128, /* size of prefetch block */ |
| 8, /* number of parallel prefetches */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (5), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (32), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (1), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (1), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ |
| }; |
| /* APPLE LOCAL end mainline */ |
| /* Generic64 should produce code tuned for Nocona and K8. */ |
| static const |
| struct processor_costs generic64_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| /* On all chips taken into consideration lea is 2 cycles and more. With |
| this cost however our current implementation of synth_mult results in |
| use of unnecessary temporary registers causing regression on several |
| SPECfp benchmarks. */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value |
| is increased to perhaps more appropriate value of 5. */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| }; |
| |
| /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ |
| static const |
| struct processor_costs generic32_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| }; |
| |
| const struct processor_costs *ix86_cost = &pentium_cost; |
| |
| /* Processor feature/optimization bitmasks. */ |
| #define m_386 (1<<PROCESSOR_I386) |
| #define m_486 (1<<PROCESSOR_I486) |
| #define m_PENT (1<<PROCESSOR_PENTIUM) |
| #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) |
| #define m_K6 (1<<PROCESSOR_K6) |
| #define m_ATHLON (1<<PROCESSOR_ATHLON) |
| #define m_PENT4 (1<<PROCESSOR_PENTIUM4) |
| #define m_K8 (1<<PROCESSOR_K8) |
| #define m_ATHLON_K8 (m_K8 | m_ATHLON) |
| #define m_NOCONA (1<<PROCESSOR_NOCONA) |
| /* APPLE LOCAL mainline */ |
| #define m_CORE2 (1<<PROCESSOR_CORE2) |
| #define m_GENERIC32 (1<<PROCESSOR_GENERIC32) |
| #define m_GENERIC64 (1<<PROCESSOR_GENERIC64) |
| #define m_GENERIC (m_GENERIC32 | m_GENERIC64) |
| |
| /* Generic instruction choice should be common subset of supported CPUs |
| (PPro/PENT4/NOCONA/Athlon/K8). */ |
| |
| /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for |
| Generic64 seems like good code size tradeoff. We can't enable it for 32bit |
| generic because it is not working well with PPro base chips. */ |
| /* APPLE LOCAL begin mainline */ |
| const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_CORE2 | m_GENERIC64; |
| const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_zero_extend_with_and = m_486 | m_PENT; |
| const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC /* m_386 | m_K6 */; |
| const int x86_double_with_add = ~m_386; |
| const int x86_use_bit_test = m_386; |
| const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC; |
| const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA; |
| const int x86_3dnow_a = m_ATHLON_K8; |
| /* APPLE LOCAL end mainline */ |
| const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; |
| /* Branch hints were put in P4 based on simulation result. But |
| after P4 was made, no performance benefit was observed with |
| branch hints. It also increases the code size. As the result, |
| icc never generates branch hints. */ |
| const int x86_branch_hints = 0; |
| const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */ |
| /* We probably ought to watch for partial register stalls on Generic32 |
| compilation setting as well. However in current implementation the |
| partial register stalls are not eliminated very well - they can |
| be introduced via subregs synthesized by combine and can happen |
| in caller/callee saving sequences. |
| Because this option pays back little on PPro based chips and is in conflict |
| with partial reg. dependencies used by Athlon/P4 based chips, it is better |
| to leave it off for generic32 for now. */ |
| const int x86_partial_reg_stall = m_PPRO; |
| /* APPLE LOCAL begin mainline */ |
| const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC; |
| const int x86_use_himode_fiop = m_386 | m_486 | m_K6; |
| const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC); |
| const int x86_use_mov0 = m_K6; |
| const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC); |
| const int x86_read_modify_write = ~m_PENT; |
| const int x86_read_modify = ~(m_PENT | m_PPRO); |
| const int x86_split_long_moves = m_PPRO; |
| const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */ |
| const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); |
| const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; |
| const int x86_qimode_math = ~(0); |
| const int x86_promote_qi_regs = 0; |
| /* On PPro this flag is meant to avoid partial register stalls. Just like |
| the x86_partial_reg_stall this option might be considered for Generic32 |
| if our scheme for avoiding partial stalls was more effective. */ |
| const int x86_himode_math = ~(m_PPRO); |
| const int x86_promote_hi_regs = m_PPRO; |
| const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 |m_GENERIC; |
| const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC); |
| const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; |
| const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; |
| const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC; |
| const int x86_shift1 = ~m_486; |
| const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| /* In Generic model we have an conflict here in between PPro/Pentium4 based chips |
| that thread 128bit SSE registers as single units versus K8 based chips that |
| divide SSE registers to two 64bit halves. |
| x86_sse_partial_reg_dependency promote all store destinations to be 128bit |
| to allow register renaming on 128bit SSE units, but usually results in one |
| extra microop on 64bit SSE units. Experimental results shows that disabling |
| this option on P4 brings over 20% SPECfp regression, while enabling it on |
| K8 brings roughly 2.4% regression that can be partly masked by careful scheduling |
| of moves. */ |
| const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; |
| /* Set for machines where the type and dependencies are resolved on SSE |
| register parts instead of whole registers, so we may maintain just |
| lower part of scalar values in proper format leaving the upper part |
| undefined. */ |
| const int x86_sse_split_regs = m_ATHLON_K8; |
| const int x86_sse_typeless_stores = m_ATHLON_K8; |
| const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; |
| const int x86_use_ffreep = m_ATHLON_K8; |
| const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; |
| const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC); |
| |
| /* ??? Allowing interunit moves makes it all too easy for the compiler to put |
| integer data in xmm registers. Which results in pretty abysmal code. */ |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| const int x86_inter_unit_moves = ~(m_ATHLON_K8); |
| |
| const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32; |
| /* Some CPU cores are not able to predict more than 4 branch instructions in |
| the 16 byte window. */ |
| const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC; |
| const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_CORE2 | m_GENERIC; |
| const int x86_use_bt = m_ATHLON_K8; |
| /* APPLE LOCAL begin */ |
| /* See comment in darwin override options for what needs fixing. |
| Most of this code has been rewritten in mainline anyhow. |
| All we've done here is remove the const since we assign to |
| them in SUBTARGET_OVERRIDE_OPTIONS. */ |
| /* Compare and exchange was added for 80486. */ |
| int x86_cmpxchg = ~m_386; |
| /* Compare and exchange 8 bytes was added for pentium. */ |
| int x86_cmpxchg8b = ~(m_386 | m_486); |
| /* Compare and exchange 16 bytes was added for nocona. */ |
| /* APPLE LOCAL mainline */ |
| int x86_cmpxchg16b = m_NOCONA | m_CORE2; |
| /* Exchange and add was added for 80486. */ |
| int x86_xadd = ~m_386; |
| /* APPLE LOCAL begin mainline bswap */ |
| /* Byteswap was added for 80486. */ |
| int x86_bswap = ~m_386; |
| /* APPLE LOCAL end mainline bswap */ |
| /* APPLE LOCAL end */ |
| const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC; |
| /* APPLE LOCAL end mainline */ |
| |
| /* In case the average insn count for single function invocation is |
| lower than this constant, emit fast (but longer) prologue and |
| epilogue code. */ |
| #define FAST_PROLOGUE_INSN_COUNT 20 |
| |
| /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
| static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
| static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
| static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
| |
| /* Array of the smallest class containing reg number REGNO, indexed by |
| REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
| |
| enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
| { |
| /* ax, dx, cx, bx */ |
| AREG, DREG, CREG, BREG, |
| /* si, di, bp, sp */ |
| SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
| /* FP registers */ |
| FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
| FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
| /* arg pointer */ |
| NON_Q_REGS, |
| /* flags, fpsr, dirflag, frame */ |
| NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
| SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| SSE_REGS, SSE_REGS, |
| MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
| MMX_REGS, MMX_REGS, |
| NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, |
| NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, |
| SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| SSE_REGS, SSE_REGS, |
| }; |
| |
| /* The "default" register map used in 32bit mode. */ |
| |
| int const dbx_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ |
| 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ |
| -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ |
| 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ |
| 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ |
| }; |
| |
| static int const x86_64_int_parameter_registers[6] = |
| { |
| 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, |
| FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ |
| }; |
| |
| static int const x86_64_int_return_registers[4] = |
| { |
| 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ |
| }; |
| |
| /* The "default" register map used in 64bit mode. */ |
| int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ |
| 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ |
| -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ |
| 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ |
| 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ |
| 8,9,10,11,12,13,14,15, /* extended integer registers */ |
| 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ |
| }; |
| |
| /* Define the register numbers to be used in Dwarf debugging information. |
| The SVR4 reference port C compiler uses the following register numbers |
| in its Dwarf output code: |
| 0 for %eax (gcc regno = 0) |
| 1 for %ecx (gcc regno = 2) |
| 2 for %edx (gcc regno = 1) |
| 3 for %ebx (gcc regno = 3) |
| 4 for %esp (gcc regno = 7) |
| 5 for %ebp (gcc regno = 6) |
| 6 for %esi (gcc regno = 4) |
| 7 for %edi (gcc regno = 5) |
| The following three DWARF register numbers are never generated by |
| the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
| believes these numbers have these meanings. |
| 8 for %eip (no gcc equivalent) |
| 9 for %eflags (gcc regno = 17) |
| 10 for %trapno (no gcc equivalent) |
| It is not at all clear how we should number the FP stack registers |
| for the x86 architecture. If the version of SDB on x86/svr4 were |
| a bit less brain dead with respect to floating-point then we would |
| have a precedent to follow with respect to DWARF register numbers |
| for x86 FP registers, but the SDB on x86/svr4 is so completely |
| broken with respect to FP registers that it is hardly worth thinking |
| of it as something to strive for compatibility with. |
| The version of x86/svr4 SDB I have at the moment does (partially) |
| seem to believe that DWARF register number 11 is associated with |
| the x86 register %st(0), but that's about all. Higher DWARF |
| register numbers don't seem to be associated with anything in |
| particular, and even for DWARF regno 11, SDB only seems to under- |
| stand that it should say that a variable lives in %st(0) (when |
| asked via an `=' command) if we said it was in DWARF regno 11, |
| but SDB still prints garbage when asked for the value of the |
| variable in question (via a `/' command). |
| (Also note that the labels SDB prints for various FP stack regs |
| when doing an `x' command are all wrong.) |
| Note that these problems generally don't affect the native SVR4 |
| C compiler because it doesn't allow the use of -O with -g and |
| because when it is *not* optimizing, it allocates a memory |
| location for each floating-point variable, and the memory |
| location is what gets described in the DWARF AT_location |
| attribute for the variable in question. |
| Regardless of the severe mental illness of the x86/svr4 SDB, we |
| do something sensible here and we use the following DWARF |
| register numbers. Note that these are all stack-top-relative |
| numbers. |
| 11 for %st(0) (gcc regno = 8) |
| 12 for %st(1) (gcc regno = 9) |
| 13 for %st(2) (gcc regno = 10) |
| 14 for %st(3) (gcc regno = 11) |
| 15 for %st(4) (gcc regno = 12) |
| 16 for %st(5) (gcc regno = 13) |
| 17 for %st(6) (gcc regno = 14) |
| 18 for %st(7) (gcc regno = 15) |
| */ |
| int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ |
| 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ |
| -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ |
| 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ |
| 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ |
| }; |
| |
| /* Test and compare insns in i386.md store the information needed to |
| generate branch and scc insns here. */ |
| |
| rtx ix86_compare_op0 = NULL_RTX; |
| rtx ix86_compare_op1 = NULL_RTX; |
| rtx ix86_compare_emitted = NULL_RTX; |
| |
| /* Size of the register save area. */ |
| #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) |
| |
| /* Define the structure for the machine field in struct function. */ |
| |
| struct stack_local_entry GTY(()) |
| { |
| unsigned short mode; |
| unsigned short n; |
| rtx rtl; |
| struct stack_local_entry *next; |
| }; |
| |
| /* Structure describing stack frame layout. |
| Stack grows downward: |
| |
| [arguments] |
| <- ARG_POINTER |
| saved pc |
| |
| saved frame pointer if frame_pointer_needed |
| <- HARD_FRAME_POINTER |
| [saved regs] |
| |
| [padding1] \ |
| ) |
| [va_arg registers] ( |
| > to_allocate <- FRAME_POINTER |
| [frame] ( |
| ) |
| [padding2] / |
| */ |
| struct ix86_frame |
| { |
| int nregs; |
| int padding1; |
| int va_arg_size; |
| HOST_WIDE_INT frame; |
| int padding2; |
| int outgoing_arguments_size; |
| int red_zone_size; |
| |
| HOST_WIDE_INT to_allocate; |
| /* The offsets relative to ARG_POINTER. */ |
| HOST_WIDE_INT frame_pointer_offset; |
| HOST_WIDE_INT hard_frame_pointer_offset; |
| HOST_WIDE_INT stack_pointer_offset; |
| |
| /* When save_regs_using_mov is set, emit prologue using |
| move instead of push instructions. */ |
| bool save_regs_using_mov; |
| }; |
| |
| /* Code model option. */ |
| enum cmodel ix86_cmodel; |
| /* Asm dialect. */ |
| enum asm_dialect ix86_asm_dialect = ASM_ATT; |
| /* TLS dialects. */ |
| enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; |
| |
| /* Which unit we are generating floating point math for. */ |
| enum fpmath_unit ix86_fpmath; |
| |
| /* Which cpu are we scheduling for. */ |
| enum processor_type ix86_tune; |
| /* Which instruction set architecture to use. */ |
| enum processor_type ix86_arch; |
| |
| /* true if sse prefetch instruction is not NOOP. */ |
| int x86_prefetch_sse; |
| |
| /* ix86_regparm_string as a number */ |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| static |
| #endif |
| int ix86_regparm; |
| /* LLVM LOCAL end */ |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* True if SSE population count insn supported. */ |
| int x86_popcnt; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* -mstackrealign option */ |
| extern int ix86_force_align_arg_pointer; |
| static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; |
| |
| /* Preferred alignment for stack boundary in bits. */ |
| unsigned int ix86_preferred_stack_boundary; |
| /* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */ |
| unsigned int ix86_save_preferred_stack_boundary; |
| /* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */ |
| |
| /* Values 1-5: see jump.c */ |
| int ix86_branch_cost; |
| |
| /* Variables which are this size or smaller are put in the data/bss |
| or ldata/lbss sections. */ |
| |
| int ix86_section_threshold = 65536; |
| |
| /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
| char internal_label_prefix[16]; |
| int internal_label_prefix_len; |
| |
| static bool ix86_handle_option (size_t, const char *, int); |
| static void output_pic_addr_const (FILE *, rtx, int); |
| static void put_condition_code (enum rtx_code, enum machine_mode, |
| int, int, FILE *); |
| static const char *get_some_local_dynamic_name (void); |
| static int get_some_local_dynamic_name_1 (rtx *, void *); |
| static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); |
| static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, |
| rtx *); |
| static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); |
| static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, |
| enum machine_mode); |
| static rtx get_thread_pointer (int); |
| static rtx legitimize_tls_address (rtx, enum tls_model, int); |
| static void get_pc_thunk_name (char [32], unsigned int); |
| static rtx gen_push (rtx); |
| static int ix86_flags_dependent (rtx, rtx, enum attr_type); |
| static int ix86_agi_dependent (rtx, rtx, enum attr_type); |
| static struct machine_function * ix86_init_machine_status (void); |
| static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); |
| static int ix86_nsaved_regs (void); |
| static void ix86_emit_save_regs (void); |
| static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); |
| static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); |
| static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); |
| static HOST_WIDE_INT ix86_GOT_alias_set (void); |
| static void ix86_adjust_counter (rtx, HOST_WIDE_INT); |
| static rtx ix86_expand_aligntest (rtx, int); |
| static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); |
| static int ix86_issue_rate (void); |
| static int ix86_adjust_cost (rtx, rtx, rtx, int); |
| static int ia32_multipass_dfa_lookahead (void); |
| static void ix86_init_mmx_sse_builtins (void); |
| static rtx x86_this_parameter (tree); |
| static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, |
| HOST_WIDE_INT, tree); |
| static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); |
| static void x86_file_start (void); |
| static void ix86_reorg (void); |
| static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); |
| static tree ix86_build_builtin_va_list (void); |
| static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, |
| tree, int *, int); |
| static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); |
| static bool ix86_scalar_mode_supported_p (enum machine_mode); |
| static bool ix86_vector_mode_supported_p (enum machine_mode); |
| |
| static int ix86_address_cost (rtx); |
| static bool ix86_cannot_force_const_mem (rtx); |
| static rtx ix86_delegitimize_address (rtx); |
| |
| static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; |
| |
| struct builtin_description; |
| static rtx ix86_expand_sse_comi (const struct builtin_description *, |
| tree, rtx); |
| static rtx ix86_expand_sse_compare (const struct builtin_description *, |
| tree, rtx); |
| static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); |
| static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); |
| static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); |
| static rtx ix86_expand_store_builtin (enum insn_code, tree); |
| static rtx safe_vector_operand (rtx, enum machine_mode); |
| static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); |
| static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); |
| static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); |
| static int ix86_fp_comparison_sahf_cost (enum rtx_code code); |
| static int ix86_fp_comparison_cost (enum rtx_code code); |
| static unsigned int ix86_select_alt_pic_regnum (void); |
| static int ix86_save_reg (unsigned int, int); |
| static void ix86_compute_frame_layout (struct ix86_frame *); |
| static int ix86_comp_type_attributes (tree, tree); |
| static int ix86_function_regparm (tree, tree); |
| const struct attribute_spec ix86_attribute_table[]; |
| static bool ix86_function_ok_for_sibcall (tree, tree); |
| static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); |
| static int ix86_value_regno (enum machine_mode, tree, tree); |
| /* LLVM LOCAL - begin make global */ |
| #ifndef ENABLE_LLVM |
| static bool contains_128bit_aligned_vector_p (tree); |
| #endif |
| /* LLVM LOCAL - end make global */ |
| static rtx ix86_struct_value_rtx (tree, int); |
| static bool ix86_ms_bitfield_layout_p (tree); |
| static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); |
| static int extended_reg_mentioned_1 (rtx *, void *); |
| static bool ix86_rtx_costs (rtx, int, int, int *); |
| static int min_insn_size (rtx); |
| static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); |
| static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); |
| static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, |
| tree, bool); |
| static void ix86_init_builtins (void); |
| static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); |
| /* APPLE LOCAL mangle_type 7105099 */ |
| static const char *ix86_mangle_type (tree); |
| static tree ix86_stack_protect_fail (void); |
| static rtx ix86_internal_arg_pointer (void); |
| static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); |
| |
| /* This function is only used on Solaris. */ |
| static void i386_solaris_elf_named_section (const char *, unsigned int, tree) |
| ATTRIBUTE_UNUSED; |
| |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| /* Register class used for passing given 64bit part of the argument. |
| These represent classes as documented by the PS ABI, with the exception |
| of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
| use SF or DFmode move instead of DImode to avoid reformatting penalties. |
| |
| Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
| whenever possible (upper half does contain padding). |
| */ |
| enum x86_64_reg_class |
| { |
| X86_64_NO_CLASS, |
| X86_64_INTEGER_CLASS, |
| X86_64_INTEGERSI_CLASS, |
| X86_64_SSE_CLASS, |
| X86_64_SSESF_CLASS, |
| X86_64_SSEDF_CLASS, |
| X86_64_SSEUP_CLASS, |
| X86_64_X87_CLASS, |
| X86_64_X87UP_CLASS, |
| X86_64_COMPLEX_X87_CLASS, |
| X86_64_MEMORY_CLASS, |
| X86_64_POINTER_CLASS |
| }; |
| #endif /* !ENABLE_LLVM */ |
| /* LLVM LOCAL end */ |
| static const char * const x86_64_reg_class_name[] = { |
| "no", "integer", "integerSI", "sse", "sseSF", "sseDF", |
| /* LLVM LOCAL */ |
| "sseup", "x87", "x87up", "cplx87", "no", "ptr" |
| }; |
| |
| #define MAX_CLASSES 4 |
| |
| /* Table of constants used by fldpi, fldln2, etc.... */ |
| static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
| static bool ext_80387_constants_init = 0; |
| static void init_ext_80387_constants (void); |
| static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; |
| static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; |
| static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; |
| static section *x86_64_elf_select_section (tree decl, int reloc, |
| unsigned HOST_WIDE_INT align) |
| ATTRIBUTE_UNUSED; |
| |
| /* Initialize the GCC target structure. */ |
| #undef TARGET_ATTRIBUTE_TABLE |
| #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table |
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| # undef TARGET_MERGE_DECL_ATTRIBUTES |
| # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
| #endif |
| |
| #undef TARGET_COMP_TYPE_ATTRIBUTES |
| #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes |
| |
| #undef TARGET_INIT_BUILTINS |
| #define TARGET_INIT_BUILTINS ix86_init_builtins |
| #undef TARGET_EXPAND_BUILTIN |
| #define TARGET_EXPAND_BUILTIN ix86_expand_builtin |
| |
| #undef TARGET_ASM_FUNCTION_EPILOGUE |
| #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue |
| |
| #undef TARGET_ENCODE_SECTION_INFO |
| #ifndef SUBTARGET_ENCODE_SECTION_INFO |
| #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info |
| #else |
| #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO |
| #endif |
| |
| #undef TARGET_ASM_OPEN_PAREN |
| #define TARGET_ASM_OPEN_PAREN "" |
| #undef TARGET_ASM_CLOSE_PAREN |
| #define TARGET_ASM_CLOSE_PAREN "" |
| |
| #undef TARGET_ASM_ALIGNED_HI_OP |
| #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT |
| #undef TARGET_ASM_ALIGNED_SI_OP |
| #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG |
| #ifdef ASM_QUAD |
| #undef TARGET_ASM_ALIGNED_DI_OP |
| #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD |
| #endif |
| |
| #undef TARGET_ASM_UNALIGNED_HI_OP |
| #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP |
| #undef TARGET_ASM_UNALIGNED_SI_OP |
| #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP |
| #undef TARGET_ASM_UNALIGNED_DI_OP |
| #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP |
| |
| #undef TARGET_SCHED_ADJUST_COST |
| #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost |
| #undef TARGET_SCHED_ISSUE_RATE |
| #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate |
| #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
| #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ |
| ia32_multipass_dfa_lookahead |
| |
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_HAVE_TLS |
| #define TARGET_HAVE_TLS true |
| #endif |
| #undef TARGET_CANNOT_FORCE_CONST_MEM |
| #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem |
| #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P |
| #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true |
| |
| #undef TARGET_DELEGITIMIZE_ADDRESS |
| #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address |
| |
| #undef TARGET_MS_BITFIELD_LAYOUT_P |
| #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p |
| |
| #if TARGET_MACHO |
| #undef TARGET_BINDS_LOCAL_P |
| #define TARGET_BINDS_LOCAL_P darwin_binds_local_p |
| #endif |
| |
| #undef TARGET_ASM_OUTPUT_MI_THUNK |
| #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk |
| #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
| #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk |
| |
| #undef TARGET_ASM_FILE_START |
| #define TARGET_ASM_FILE_START x86_file_start |
| |
| #undef TARGET_DEFAULT_TARGET_FLAGS |
| #define TARGET_DEFAULT_TARGET_FLAGS \ |
| (TARGET_DEFAULT \ |
| | TARGET_64BIT_DEFAULT \ |
| | TARGET_SUBTARGET_DEFAULT \ |
| | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) |
| |
| #undef TARGET_HANDLE_OPTION |
| #define TARGET_HANDLE_OPTION ix86_handle_option |
| |
| #undef TARGET_RTX_COSTS |
| #define TARGET_RTX_COSTS ix86_rtx_costs |
| #undef TARGET_ADDRESS_COST |
| #define TARGET_ADDRESS_COST ix86_address_cost |
| |
| #undef TARGET_FIXED_CONDITION_CODE_REGS |
| #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs |
| #undef TARGET_CC_MODES_COMPATIBLE |
| #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible |
| |
| #undef TARGET_MACHINE_DEPENDENT_REORG |
| #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg |
| |
| #undef TARGET_BUILD_BUILTIN_VA_LIST |
| #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list |
| |
| #undef TARGET_MD_ASM_CLOBBERS |
| #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers |
| |
| #undef TARGET_PROMOTE_PROTOTYPES |
| #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true |
| #undef TARGET_STRUCT_VALUE_RTX |
| #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx |
| #undef TARGET_SETUP_INCOMING_VARARGS |
| #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs |
| #undef TARGET_MUST_PASS_IN_STACK |
| #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack |
| #undef TARGET_PASS_BY_REFERENCE |
| #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference |
| #undef TARGET_INTERNAL_ARG_POINTER |
| #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer |
| #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC |
| #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec |
| |
| #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
| #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg |
| |
| #undef TARGET_SCALAR_MODE_SUPPORTED_P |
| #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p |
| |
| #undef TARGET_VECTOR_MODE_SUPPORTED_P |
| #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
| #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel |
| #endif |
| |
| #ifdef SUBTARGET_INSERT_ATTRIBUTES |
| #undef TARGET_INSERT_ATTRIBUTES |
| #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES |
| #endif |
| |
| /* APPLE LOCAL begin mangle_type 7105099 */ |
| #undef TARGET_MANGLE_TYPE |
| #define TARGET_MANGLE_TYPE ix86_mangle_type |
| /* APPLE LOCAL end mangle_type 7105099 */ |
| |
| #undef TARGET_STACK_PROTECT_FAIL |
| #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail |
| |
| #undef TARGET_FUNCTION_VALUE |
| #define TARGET_FUNCTION_VALUE ix86_function_value |
| |
| struct gcc_target targetm = TARGET_INITIALIZER; |
| |
| |
| /* The svr4 ABI for the i386 says that records and unions are returned |
| in memory. */ |
| #ifndef DEFAULT_PCC_STRUCT_RETURN |
| #define DEFAULT_PCC_STRUCT_RETURN 1 |
| #endif |
| |
| /* Implement TARGET_HANDLE_OPTION. */ |
| |
| static bool |
| ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) |
| { |
| switch (code) |
| { |
| case OPT_m3dnow: |
| if (!value) |
| { |
| target_flags &= ~MASK_3DNOW_A; |
| target_flags_explicit |= MASK_3DNOW_A; |
| } |
| return true; |
| |
| case OPT_mmmx: |
| if (!value) |
| { |
| target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); |
| target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; |
| } |
| return true; |
| |
| case OPT_msse: |
| if (!value) |
| { |
| target_flags &= ~(MASK_SSE2 | MASK_SSE3); |
| target_flags_explicit |= MASK_SSE2 | MASK_SSE3; |
| } |
| return true; |
| |
| case OPT_msse2: |
| if (!value) |
| { |
| target_flags &= ~MASK_SSE3; |
| target_flags_explicit |= MASK_SSE3; |
| } |
| return true; |
| |
| default: |
| return true; |
| } |
| } |
| |
| /* APPLE LOCAL begin 4760857 optimization pragmas. */ |
| /* Hoisted so it can be used by reset_optimization_options. */ |
| static struct ptt |
| { |
| const struct processor_costs *cost; /* Processor costs */ |
| const int target_enable; /* Target flags to enable. */ |
| const int target_disable; /* Target flags to disable. */ |
| const int align_loop; /* Default alignments. */ |
| const int align_loop_max_skip; |
| const int align_jump; |
| const int align_jump_max_skip; |
| const int align_func; |
| } |
| const processor_target_table[PROCESSOR_max] = |
| { |
| {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, |
| {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, |
| {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, |
| {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, |
| {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, |
| {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, |
| {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, |
| {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, |
| {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, |
| /* APPLE LOCAL mainline */ |
| {&core2_cost, 0, 0, 16, 7, 16, 7, 16}, |
| {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, |
| {&generic64_cost, 0, 0, 16, 7, 16, 7, 16} |
| }; |
| /* APPLE LOCAL end 4760857 optimization pragmas. */ |
| |
| /* Sometimes certain combinations of command options do not make |
| sense on a particular target machine. You can define a macro |
| `OVERRIDE_OPTIONS' to take account of this. This macro, if |
| defined, is executed once just after all the command options have |
| been parsed. |
| |
| Don't use this macro to turn on various extra optimizations for |
| `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ |
| |
| void |
| override_options (void) |
| { |
| int i; |
| int ix86_tune_defaulted = 0; |
| /* APPLE LOCAL mainline */ |
| int ix86_arch_specified = 0; |
| |
| /* Comes from final.c -- no real reason to change it. */ |
| #define MAX_CODE_ALIGN 16 |
| |
| /* APPLE LOCAL begin 4760857 optimization pragmas. */ |
| /* processor_target_table moved to file scope. */ |
| /* APPLE LOCAL end 4760857 optimization pragmas. */ |
| |
| static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; |
| static struct pta |
| { |
| const char *const name; /* processor name or nickname. */ |
| const enum processor_type processor; |
| const enum pta_flags |
| { |
| PTA_SSE = 1, |
| PTA_SSE2 = 2, |
| PTA_SSE3 = 4, |
| PTA_MMX = 8, |
| PTA_PREFETCH_SSE = 16, |
| PTA_3DNOW = 32, |
| PTA_3DNOW_A = 64, |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* APPLE LOCAL begin mainline */ |
| PTA_64BIT = 128, |
| PTA_SSSE3 = 256, |
| /* APPLE LOCAL end mainline */ |
| PTA_CX16 = 1 << 9, |
| PTA_POPCNT = 1 << 10, |
| PTA_ABM = 1 << 11, |
| PTA_SSE4A = 1 << 12, |
| PTA_NO_SAHF = 1 << 13, |
| PTA_SSE4_1 = 1 << 14, |
| PTA_SSE4_2 = 1 << 15 |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| } flags; |
| } |
| const processor_alias_table[] = |
| { |
| {"i386", PROCESSOR_I386, 0}, |
| {"i486", PROCESSOR_I486, 0}, |
| {"i586", PROCESSOR_PENTIUM, 0}, |
| {"pentium", PROCESSOR_PENTIUM, 0}, |
| {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, |
| {"winchip-c6", PROCESSOR_I486, PTA_MMX}, |
| {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, |
| {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, |
| {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, |
| {"i686", PROCESSOR_PENTIUMPRO, 0}, |
| {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, |
| {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, |
| {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, |
| {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, |
| {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, |
| {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
| | PTA_MMX | PTA_PREFETCH_SSE}, |
| {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
| | PTA_MMX | PTA_PREFETCH_SSE}, |
| {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_MMX | PTA_PREFETCH_SSE}, |
| {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT |
| | PTA_MMX | PTA_PREFETCH_SSE}, |
| /* APPLE LOCAL begin mainline */ |
| {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_64BIT | PTA_MMX |
| | PTA_PREFETCH_SSE}, |
| /* APPLE LOCAL end mainline */ |
| {"k6", PROCESSOR_K6, PTA_MMX}, |
| {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, |
| {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, |
| {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW |
| | PTA_3DNOW_A}, |
| {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE |
| | PTA_3DNOW | PTA_3DNOW_A}, |
| {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW |
| | PTA_3DNOW_A | PTA_SSE}, |
| {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW |
| | PTA_3DNOW_A | PTA_SSE}, |
| {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW |
| | PTA_3DNOW_A | PTA_SSE}, |
| {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT |
| | PTA_SSE | PTA_SSE2 }, |
| {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT |
| | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, |
| {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT |
| | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, |
| {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT |
| | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, |
| {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT |
| | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, |
| {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, |
| {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, |
| }; |
| |
| int const pta_size = ARRAY_SIZE (processor_alias_table); |
| |
| #ifdef SUBTARGET_OVERRIDE_OPTIONS |
| SUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS |
| SUBSUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| /* -fPIC is the default for x86_64. */ |
| if (TARGET_MACHO && TARGET_64BIT) |
| flag_pic = 2; |
| |
| /* Set the default values for switches whose default depends on TARGET_64BIT |
| in case they weren't overwritten by command line options. */ |
| if (TARGET_64BIT) |
| { |
| /* Mach-O doesn't support omitting the frame pointer for now. */ |
| if (flag_omit_frame_pointer == 2) |
| flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); |
| if (flag_asynchronous_unwind_tables == 2) |
| flag_asynchronous_unwind_tables = 1; |
| if (flag_pcc_struct_return == 2) |
| flag_pcc_struct_return = 0; |
| } |
| else |
| { |
| if (flag_omit_frame_pointer == 2) |
| flag_omit_frame_pointer = 0; |
| if (flag_asynchronous_unwind_tables == 2) |
| flag_asynchronous_unwind_tables = 0; |
| if (flag_pcc_struct_return == 2) |
| flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; |
| } |
| |
| /* Need to check -mtune=generic first. */ |
| if (ix86_tune_string) |
| { |
| if (!strcmp (ix86_tune_string, "generic") |
| || !strcmp (ix86_tune_string, "i686") |
| /* As special support for cross compilers we read -mtune=native |
| as -mtune=generic. With native compilers we won't see the |
| -mtune=native, as it was changed by the driver. */ |
| || !strcmp (ix86_tune_string, "native")) |
| { |
| if (TARGET_64BIT) |
| ix86_tune_string = "generic64"; |
| else |
| ix86_tune_string = "generic32"; |
| } |
| else if (!strncmp (ix86_tune_string, "generic", 7)) |
| error ("bad value (%s) for -mtune= switch", ix86_tune_string); |
| } |
| else |
| { |
| if (ix86_arch_string) |
| ix86_tune_string = ix86_arch_string; |
| if (!ix86_tune_string) |
| { |
| ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; |
| ix86_tune_defaulted = 1; |
| } |
| |
| /* ix86_tune_string is set to ix86_arch_string or defaulted. We |
| need to use a sensible tune option. */ |
| if (!strcmp (ix86_tune_string, "generic") |
| || !strcmp (ix86_tune_string, "x86-64") |
| || !strcmp (ix86_tune_string, "i686")) |
| { |
| if (TARGET_64BIT) |
| ix86_tune_string = "generic64"; |
| else |
| ix86_tune_string = "generic32"; |
| } |
| } |
| if (!strcmp (ix86_tune_string, "x86-64")) |
| warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " |
| "-mtune=generic instead as appropriate."); |
| |
| if (!ix86_arch_string) |
| ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; |
| /* APPLE LOCAL begin mainline */ |
| else |
| ix86_arch_specified = 1; |
| /* APPLE LOCAL end mainline */ |
| if (!strcmp (ix86_arch_string, "generic")) |
| error ("generic CPU can be used only for -mtune= switch"); |
| if (!strncmp (ix86_arch_string, "generic", 7)) |
| error ("bad value (%s) for -march= switch", ix86_arch_string); |
| |
| if (ix86_cmodel_string != 0) |
| { |
| if (!strcmp (ix86_cmodel_string, "small")) |
| ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; |
| else if (!strcmp (ix86_cmodel_string, "medium")) |
| ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; |
| else if (flag_pic) |
| sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); |
| else if (!strcmp (ix86_cmodel_string, "32")) |
| ix86_cmodel = CM_32; |
| else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) |
| ix86_cmodel = CM_KERNEL; |
| else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) |
| ix86_cmodel = CM_LARGE; |
| else |
| error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); |
| } |
| else |
| { |
| /* LLVM LOCAL begin mainline */ |
| #ifdef ENABLE_LLVM |
| /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the |
| use of rip-relative addressing. This eliminates fixups that |
| would otherwise be needed if this object is to be placed in a |
| DLL, and is essentially just as efficient as direct addressing. */ |
| if (TARGET_64BIT_MS_ABI) |
| ix86_cmodel = CM_SMALL_PIC, flag_pic = 1; |
| else if (TARGET_64BIT) |
| ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; |
| else |
| ix86_cmodel = CM_32; |
| #else |
| ix86_cmodel = CM_32; |
| if (TARGET_64BIT) |
| ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; |
| #endif |
| /* LLVM LOCAL end mainline */ |
| } |
| if (ix86_asm_string != 0) |
| { |
| if (! TARGET_MACHO |
| && !strcmp (ix86_asm_string, "intel")) |
| ix86_asm_dialect = ASM_INTEL; |
| else if (!strcmp (ix86_asm_string, "att")) |
| ix86_asm_dialect = ASM_ATT; |
| else |
| error ("bad value (%s) for -masm= switch", ix86_asm_string); |
| } |
| if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) |
| error ("code model %qs not supported in the %s bit mode", |
| ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); |
| if (ix86_cmodel == CM_LARGE) |
| sorry ("code model %<large%> not supported yet"); |
| if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) |
| sorry ("%i-bit mode not compiled in", |
| (target_flags & MASK_64BIT) ? 64 : 32); |
| |
| for (i = 0; i < pta_size; i++) |
| if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) |
| { |
| ix86_arch = processor_alias_table[i].processor; |
| /* Default cpu tuning to the architecture. */ |
| ix86_tune = ix86_arch; |
| if (processor_alias_table[i].flags & PTA_MMX |
| && !(target_flags_explicit & MASK_MMX)) |
| target_flags |= MASK_MMX; |
| if (processor_alias_table[i].flags & PTA_3DNOW |
| && !(target_flags_explicit & MASK_3DNOW)) |
| target_flags |= MASK_3DNOW; |
| if (processor_alias_table[i].flags & PTA_3DNOW_A |
| && !(target_flags_explicit & MASK_3DNOW_A)) |
| target_flags |= MASK_3DNOW_A; |
| if (processor_alias_table[i].flags & PTA_SSE |
| && !(target_flags_explicit & MASK_SSE)) |
| target_flags |= MASK_SSE; |
| if (processor_alias_table[i].flags & PTA_SSE2 |
| && !(target_flags_explicit & MASK_SSE2)) |
| target_flags |= MASK_SSE2; |
| if (processor_alias_table[i].flags & PTA_SSE3 |
| && !(target_flags_explicit & MASK_SSE3)) |
| target_flags |= MASK_SSE3; |
| /* APPLE LOCAL begin mainline */ |
| if (processor_alias_table[i].flags & PTA_SSSE3 |
| && !(target_flags_explicit & MASK_SSSE3)) |
| target_flags |= MASK_SSSE3; |
| /* APPLE LOCAL end mainline */ |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| if (processor_alias_table[i].flags & PTA_SSE4_1 |
| && !(target_flags_explicit & MASK_SSE4_1)) |
| target_flags |= MASK_SSE4_1; |
| if (processor_alias_table[i].flags & PTA_SSE4_2 |
| && !(target_flags_explicit & MASK_SSE4_2)) |
| target_flags |= MASK_SSE4_2; |
| if (processor_alias_table[i].flags & PTA_SSE4A |
| && !(target_flags_explicit & MASK_SSE4A)) |
| target_flags |= MASK_SSE4A; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) |
| x86_prefetch_sse = true; |
| if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) |
| error ("CPU you selected does not support x86-64 " |
| "instruction set"); |
| break; |
| } |
| |
| if (i == pta_size) |
| error ("bad value (%s) for -march= switch", ix86_arch_string); |
| |
| for (i = 0; i < pta_size; i++) |
| if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) |
| { |
| ix86_tune = processor_alias_table[i].processor; |
| if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) |
| { |
| if (ix86_tune_defaulted) |
| { |
| ix86_tune_string = "x86-64"; |
| for (i = 0; i < pta_size; i++) |
| if (! strcmp (ix86_tune_string, |
| processor_alias_table[i].name)) |
| break; |
| ix86_tune = processor_alias_table[i].processor; |
| } |
| else |
| error ("CPU you selected does not support x86-64 " |
| "instruction set"); |
| } |
| /* Intel CPUs have always interpreted SSE prefetch instructions as |
| NOPs; so, we can enable SSE prefetch instructions even when |
| -mtune (rather than -march) points us to a processor that has them. |
| However, the VIA C3 gives a SIGILL, so we only do that for i686 and |
| higher processors. */ |
| if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) |
| x86_prefetch_sse = true; |
| break; |
| } |
| if (i == pta_size) |
| error ("bad value (%s) for -mtune= switch", ix86_tune_string); |
| |
| if (optimize_size) |
| ix86_cost = &size_cost; |
| else |
| ix86_cost = processor_target_table[ix86_tune].cost; |
| target_flags |= processor_target_table[ix86_tune].target_enable; |
| target_flags &= ~processor_target_table[ix86_tune].target_disable; |
| |
| /* Arrange to set up i386_stack_locals for all functions. */ |
| init_machine_status = ix86_init_machine_status; |
| |
| /* Validate -mregparm= value. */ |
| if (ix86_regparm_string) |
| { |
| /* LLVM LOCAL begin mainline */ |
| #ifdef ENABLE_LLVM |
| if (TARGET_64BIT) |
| warning (0, "-mregparm is ignored in 64-bit mode"); |
| #endif |
| /* LLVM LOCAL end mainline */ |
| i = atoi (ix86_regparm_string); |
| if (i < 0 || i > REGPARM_MAX) |
| error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); |
| else |
| ix86_regparm = i; |
| } |
| /* LLVM LOCAL begin mainline */ |
| #ifndef ENABLE_LLVM |
| else |
| #endif |
| /* LLVM LOCAL end mainline */ |
| if (TARGET_64BIT) |
| ix86_regparm = REGPARM_MAX; |
| |
| /* If the user has provided any of the -malign-* options, |
| warn and use that value only if -falign-* is not set. |
| Remove this code in GCC 3.2 or later. */ |
| if (ix86_align_loops_string) |
| { |
| warning (0, "-malign-loops is obsolete, use -falign-loops"); |
| if (align_loops == 0) |
| { |
| i = atoi (ix86_align_loops_string); |
| if (i < 0 || i > MAX_CODE_ALIGN) |
| error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); |
| else |
| align_loops = 1 << i; |
| } |
| } |
| |
| if (ix86_align_jumps_string) |
| { |
| warning (0, "-malign-jumps is obsolete, use -falign-jumps"); |
| if (align_jumps == 0) |
| { |
| i = atoi (ix86_align_jumps_string); |
| if (i < 0 || i > MAX_CODE_ALIGN) |
| error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); |
| else |
| align_jumps = 1 << i; |
| } |
| } |
| |
| if (ix86_align_funcs_string) |
| { |
| warning (0, "-malign-functions is obsolete, use -falign-functions"); |
| if (align_functions == 0) |
| { |
| i = atoi (ix86_align_funcs_string); |
| if (i < 0 || i > MAX_CODE_ALIGN) |
| error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); |
| else |
| align_functions = 1 << i; |
| } |
| } |
| |
| /* Default align_* from the processor table. */ |
| if (align_loops == 0) |
| { |
| align_loops = processor_target_table[ix86_tune].align_loop; |
| align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; |
| } |
| if (align_jumps == 0) |
| { |
| align_jumps = processor_target_table[ix86_tune].align_jump; |
| align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; |
| } |
| if (align_functions == 0) |
| { |
| align_functions = processor_target_table[ix86_tune].align_func; |
| } |
| |
| /* Validate -mbranch-cost= value, or provide default. */ |
| ix86_branch_cost = ix86_cost->branch_cost; |
| if (ix86_branch_cost_string) |
| { |
| i = atoi (ix86_branch_cost_string); |
| if (i < 0 || i > 5) |
| error ("-mbranch-cost=%d is not between 0 and 5", i); |
| else |
| ix86_branch_cost = i; |
| } |
| if (ix86_section_threshold_string) |
| { |
| i = atoi (ix86_section_threshold_string); |
| if (i < 0) |
| error ("-mlarge-data-threshold=%d is negative", i); |
| else |
| ix86_section_threshold = i; |
| } |
| |
| if (ix86_tls_dialect_string) |
| { |
| if (strcmp (ix86_tls_dialect_string, "gnu") == 0) |
| ix86_tls_dialect = TLS_DIALECT_GNU; |
| else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) |
| ix86_tls_dialect = TLS_DIALECT_GNU2; |
| else if (strcmp (ix86_tls_dialect_string, "sun") == 0) |
| ix86_tls_dialect = TLS_DIALECT_SUN; |
| else |
| error ("bad value (%s) for -mtls-dialect= switch", |
| ix86_tls_dialect_string); |
| } |
| /* APPLE LOCAL begin mainline */ |
| if (TARGET_64BIT) |
| { |
| if (TARGET_ALIGN_DOUBLE) |
| error ("-malign-double makes no sense in the 64bit mode"); |
| if (TARGET_RTD) |
| error ("-mrtd calling convention not supported in the 64bit mode"); |
| /* APPLE LOCAL begin radar 4877693 */ |
| if (ix86_force_align_arg_pointer) |
| error ("-mstackrealign not supported in the 64bit mode"); |
| /* APPLE LOCAL end radar 4877693 */ |
| |
| /* LLVM LOCAL begin */ |
| #ifdef ENABLE_LLVM |
| target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit; |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* Enable by default the SSE and MMX builtins. Do allow the user to |
| explicitly disable any of these. In particular, disabling SSE and |
| MMX for kernel code is extremely useful. */ |
| if (!ix86_arch_specified) |
| /* LLVM LOCAL begin */ |
| #ifdef ENABLE_LLVM |
| target_flags |
| |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) |
| & ~target_flags_explicit); |
| #else |
| target_flags |
| |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE |
| | TARGET_SUBTARGET64_DEFAULT) & ~target_flags_explicit); |
| #endif |
| /* LLVM LOCAL end */ |
| /* APPLE LOCAL begin mainline candidate */ |
| /* Disable the red zone for kernel compilation. |
| ??? Why aren't we using -mcmodel=kernel? */ |
| if (TARGET_MACHO |
| && (flag_mkernel || flag_apple_kext)) |
| target_flags |= MASK_NO_RED_ZONE; |
| /* APPLE LOCAL end mainline candidate */ |
| } |
| else |
| { |
| if (!ix86_arch_specified) |
| target_flags |= (TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit); |
| |
| /* i386 ABI does not specify red zone. It still makes sense to use it |
| when programmer takes care to stack from being destroyed. */ |
| if (!(target_flags_explicit & MASK_NO_RED_ZONE)) |
| target_flags |= MASK_NO_RED_ZONE; |
| } |
| |
| /* APPLE LOCAL end mainline */ |
| /* Keep nonleaf frame pointers. */ |
| if (flag_omit_frame_pointer) |
| target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; |
| else if (TARGET_OMIT_LEAF_FRAME_POINTER) |
| /* LLVM LOCAL - Use '3' to indicate omitting leaf FPs only */ |
| flag_omit_frame_pointer = 3; |
| |
| /* If we're doing fast math, we don't care about comparison order |
| wrt NaNs. This lets us use a shorter comparison sequence. */ |
| if (flag_finite_math_only) |
| target_flags &= ~MASK_IEEE_FP; |
| |
| /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, |
| since the insns won't need emulation. */ |
| if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) |
| target_flags &= ~MASK_NO_FANCY_MATH_387; |
| |
| /* Likewise, if the target doesn't have a 387, or we've specified |
| software floating point, don't use 387 inline intrinsics. */ |
| if (!TARGET_80387) |
| target_flags |= MASK_NO_FANCY_MATH_387; |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Turn on SSE4.1 builtins for -msse4.2. */ |
| if (TARGET_SSE4_2) |
| target_flags |= MASK_SSE4_1; |
| /* Turn on SSSE3 builtins for -msse4.1. */ |
| if (TARGET_SSE4_1) |
| target_flags |= MASK_SSSE3; |
| /* Turn on SSE3 builtins for -msse4a. */ |
| if (TARGET_SSE4A) |
| target_flags |= MASK_SSE3; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| /* APPLE LOCAL begin mainline */ |
| /* Turn on SSE3 builtins for -mssse3. */ |
| if (TARGET_SSSE3) |
| target_flags |= MASK_SSE3; |
| /* APPLE LOCAL end mainline */ |
| /* Turn on SSE2 builtins for -msse3. */ |
| if (TARGET_SSE3) |
| target_flags |= MASK_SSE2; |
| |
| /* Turn on SSE builtins for -msse2. */ |
| if (TARGET_SSE2) |
| target_flags |= MASK_SSE; |
| |
| /* Turn on MMX builtins for -msse. */ |
| if (TARGET_SSE) |
| { |
| target_flags |= MASK_MMX & ~target_flags_explicit; |
| x86_prefetch_sse = true; |
| } |
| |
| /* Turn on MMX builtins for 3Dnow. */ |
| if (TARGET_3DNOW) |
| target_flags |= MASK_MMX; |
| |
| /* APPLE LOCAL mainline */ |
| /* Moved this up... */ |
| /* Validate -mpreferred-stack-boundary= value, or provide default. |
| The default of 128 bits is for Pentium III's SSE __m128. We can't |
| change it because of optimize_size. Otherwise, we can't mix object |
| files compiled with -Os and -On. */ |
| ix86_preferred_stack_boundary = 128; |
| if (ix86_preferred_stack_boundary_string) |
| { |
| i = atoi (ix86_preferred_stack_boundary_string); |
| if (i < (TARGET_64BIT ? 4 : 2) || i > 12) |
| error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, |
| TARGET_64BIT ? 4 : 2); |
| else |
| ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; |
| } |
| |
| /* Accept -msseregparm only if at least SSE support is enabled. */ |
| if (TARGET_SSEREGPARM |
| && ! TARGET_SSE) |
| error ("-msseregparm used without SSE enabled"); |
| |
| ix86_fpmath = TARGET_FPMATH_DEFAULT; |
| |
| if (ix86_fpmath_string != 0) |
| { |
| if (! strcmp (ix86_fpmath_string, "387")) |
| ix86_fpmath = FPMATH_387; |
| else if (! strcmp (ix86_fpmath_string, "sse")) |
| { |
| if (!TARGET_SSE) |
| { |
| warning (0, "SSE instruction set disabled, using 387 arithmetics"); |
| ix86_fpmath = FPMATH_387; |
| } |
| else |
| ix86_fpmath = FPMATH_SSE; |
| } |
| else if (! strcmp (ix86_fpmath_string, "387,sse") |
| || ! strcmp (ix86_fpmath_string, "sse,387")) |
| { |
| if (!TARGET_SSE) |
| { |
| warning (0, "SSE instruction set disabled, using 387 arithmetics"); |
| ix86_fpmath = FPMATH_387; |
| } |
| else if (!TARGET_80387) |
| { |
| warning (0, "387 instruction set disabled, using SSE arithmetics"); |
| ix86_fpmath = FPMATH_SSE; |
| } |
| else |
| ix86_fpmath = FPMATH_SSE | FPMATH_387; |
| } |
| else |
| error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); |
| } |
| |
| /* If the i387 is disabled, then do not return values in it. */ |
| if (!TARGET_80387) |
| target_flags &= ~MASK_FLOAT_RETURNS; |
| |
| if ((x86_accumulate_outgoing_args & TUNEMASK) |
| && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) |
| && !optimize_size) |
| target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; |
| |
| /* ??? Unwind info is not correct around the CFG unless either a frame |
| pointer is present or M_A_O_A is set. Fixing this requires rewriting |
| unwind info generation to be aware of the CFG and propagating states |
| around edges. */ |
| if ((flag_unwind_tables || flag_asynchronous_unwind_tables |
| || flag_exceptions || flag_non_call_exceptions) |
| && flag_omit_frame_pointer |
| && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) |
| { |
| if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) |
| warning (0, "unwind tables currently require either a frame pointer " |
| "or -maccumulate-outgoing-args for correctness"); |
| target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; |
| } |
| |
| /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ |
| { |
| char *p; |
| ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); |
| p = strchr (internal_label_prefix, 'X'); |
| internal_label_prefix_len = p - internal_label_prefix; |
| *p = '\0'; |
| } |
| |
| /* When scheduling description is not available, disable scheduler pass |
| so it won't slow down the compilation and make x87 code slower. */ |
| /* APPLE LOCAL 5591571 */ |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| if (1 || !TARGET_SCHEDULE) |
| flag_schedule_insns_after_reload = flag_schedule_insns = 0; |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| #if TARGET_MACHO |
| if (MACHO_DYNAMIC_NO_PIC_P) |
| { |
| if (flag_pic) |
| warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC"); |
| flag_pic = 0; |
| } |
| else |
| #endif |
| if (flag_pic == 1) |
| { |
| /* Darwin's -fpic is -fPIC. */ |
| flag_pic = 2; |
| } |
| /* APPLE LOCAL end dynamic-no-pic */ |
| /* APPLE LOCAL begin 4812082 -fast */ |
| /* These flags were the best on the software H264 codec, and have therefore |
| been lumped into -fast per 4812082. They have not been evaluated on |
| any other code, except that -fno-tree-pre is known to lose on the |
| hardware accelerated version of the codec. */ |
| if (flag_fast || flag_fastf || flag_fastcp) |
| { |
| flag_omit_frame_pointer = 1; |
| flag_strict_aliasing = 1; |
| flag_tree_pre = 0; |
| target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; |
| align_loops = processor_target_table[ix86_tune].align_loop; |
| } |
| /* APPLE LOCAL end 4812082 -fast */ |
| } |
| |
| /* switch to the appropriate section for output of DECL. |
| DECL is either a `VAR_DECL' node or a constant of some sort. |
| RELOC indicates whether forming the initial value of DECL requires |
| link-time relocations. */ |
| |
| static section * |
| x86_64_elf_select_section (tree decl, int reloc, |
| unsigned HOST_WIDE_INT align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && ix86_in_large_data_p (decl)) |
| { |
| const char *sname = NULL; |
| unsigned int flags = SECTION_WRITE; |
| switch (categorize_decl_for_section (decl, reloc)) |
| { |
| case SECCAT_DATA: |
| sname = ".ldata"; |
| break; |
| case SECCAT_DATA_REL: |
| sname = ".ldata.rel"; |
| break; |
| case SECCAT_DATA_REL_LOCAL: |
| sname = ".ldata.rel.local"; |
| break; |
| case SECCAT_DATA_REL_RO: |
| sname = ".ldata.rel.ro"; |
| break; |
| case SECCAT_DATA_REL_RO_LOCAL: |
| sname = ".ldata.rel.ro.local"; |
| break; |
| case SECCAT_BSS: |
| sname = ".lbss"; |
| flags |= SECTION_BSS; |
| break; |
| case SECCAT_RODATA: |
| case SECCAT_RODATA_MERGE_STR: |
| case SECCAT_RODATA_MERGE_STR_INIT: |
| case SECCAT_RODATA_MERGE_CONST: |
| sname = ".lrodata"; |
| flags = 0; |
| break; |
| case SECCAT_SRODATA: |
| case SECCAT_SDATA: |
| case SECCAT_SBSS: |
| gcc_unreachable (); |
| case SECCAT_TEXT: |
| case SECCAT_TDATA: |
| case SECCAT_TBSS: |
| /* We don't split these for medium model. Place them into |
| default sections and hope for best. */ |
| break; |
| } |
| if (sname) |
| { |
| /* We might get called with string constants, but get_named_section |
| doesn't like them as they are not DECLs. Also, we need to set |
| flags in that case. */ |
| if (!DECL_P (decl)) |
| return get_section (sname, flags, NULL); |
| return get_named_section (decl, sname, reloc); |
| } |
| } |
| return default_elf_select_section (decl, reloc, align); |
| } |
| |
| /* Build up a unique section name, expressed as a |
| STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
| RELOC indicates whether the initial value of EXP requires |
| link-time relocations. */ |
| |
| static void |
| x86_64_elf_unique_section (tree decl, int reloc) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && ix86_in_large_data_p (decl)) |
| { |
| const char *prefix = NULL; |
| /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
| bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; |
| |
| switch (categorize_decl_for_section (decl, reloc)) |
| { |
| case SECCAT_DATA: |
| case SECCAT_DATA_REL: |
| case SECCAT_DATA_REL_LOCAL: |
| case SECCAT_DATA_REL_RO: |
| case SECCAT_DATA_REL_RO_LOCAL: |
| prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; |
| break; |
| case SECCAT_BSS: |
| prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; |
| break; |
| case SECCAT_RODATA: |
| case SECCAT_RODATA_MERGE_STR: |
| case SECCAT_RODATA_MERGE_STR_INIT: |
| case SECCAT_RODATA_MERGE_CONST: |
| prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; |
| break; |
| case SECCAT_SRODATA: |
| case SECCAT_SDATA: |
| case SECCAT_SBSS: |
| gcc_unreachable (); |
| case SECCAT_TEXT: |
| case SECCAT_TDATA: |
| case SECCAT_TBSS: |
| /* We don't split these for medium model. Place them into |
| default sections and hope for best. */ |
| break; |
| } |
| if (prefix) |
| { |
| const char *name; |
| size_t nlen, plen; |
| char *string; |
| plen = strlen (prefix); |
| |
| name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
| name = targetm.strip_name_encoding (name); |
| nlen = strlen (name); |
| |
| string = alloca (nlen + plen + 1); |
| memcpy (string, prefix, plen); |
| memcpy (string + plen, name, nlen + 1); |
| |
| DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); |
| return; |
| } |
| } |
| default_unique_section (decl, reloc); |
| } |
| |
| #ifdef COMMON_ASM_OP |
| /* This says how to output assembler code to declare an |
| uninitialized external linkage data object. |
| |
| For medium model x86-64 we need to use .largecomm opcode for |
| large objects. */ |
| void |
| x86_elf_aligned_common (FILE *file, |
| const char *name, unsigned HOST_WIDE_INT size, |
| int align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && size > (unsigned int)ix86_section_threshold) |
| fprintf (file, ".largecomm\t"); |
| else |
| fprintf (file, "%s", COMMON_ASM_OP); |
| assemble_name (file, name); |
| fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", |
| size, align / BITS_PER_UNIT); |
| } |
| |
| /* Utility function for targets to use in implementing |
| ASM_OUTPUT_ALIGNED_BSS. */ |
| |
| void |
| x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, |
| const char *name, unsigned HOST_WIDE_INT size, |
| int align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && size > (unsigned int)ix86_section_threshold) |
| switch_to_section (get_named_section (decl, ".lbss", 0)); |
| else |
| switch_to_section (bss_section); |
| ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
| #ifdef ASM_DECLARE_OBJECT_NAME |
| last_assemble_variable_decl = decl; |
| ASM_DECLARE_OBJECT_NAME (file, name, decl); |
| #else |
| /* Standard thing is just output label for the object. */ |
| ASM_OUTPUT_LABEL (file, name); |
| #endif /* ASM_DECLARE_OBJECT_NAME */ |
| ASM_OUTPUT_SKIP (file, size ? size : 1); |
| } |
| #endif |
| |
| void |
| /* LLVM LOCAL */ |
| optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED) |
| { |
| /* APPLE LOCAL begin disable strict aliasing; breaks too much existing code. */ |
| #if TARGET_MACHO |
| flag_strict_aliasing = 0; |
| #endif |
| /* APPLE LOCAL end disable strict aliasing; breaks too much existing code. */ |
| /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to |
| make the problem with not enough registers even worse. */ |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| #ifdef INSN_SCHEDULING |
| if (level > 1) |
| flag_schedule_insns = 0; |
| #endif |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* APPLE LOCAL begin pragma fenv */ |
| /* Trapping math is not needed by many users, and is expensive. |
| C99 permits us to default it off and we do that. It is |
| turned on when <fenv.h> is included (see darwin_pragma_fenv |
| in darwin-c.c). */ |
| flag_trapping_math = 0; |
| /* APPLE LOCAL end pragma fenv */ |
| |
| /* LLVM LOCAL begin */ |
| /* Enable -fno-math-errno by default, even on systems whose libm functions set |
| errno. This is a deviation from what mainline GCC does, but it is a) good |
| for performance, b) controllable with -fmath-errno, and c) the default |
| behavior most people want anyway. Numericists and others who play with or |
| depend on errno and can use -fmath-errno when they want it. Because errno |
| is not set on all targets anyway, their code is inherently non-portable. */ |
| if (TARGET_MACHO |
| #ifdef ENABLE_LLVM |
| || 1 |
| #endif |
| ) |
| /* LLVM LOCAL end */ |
| /* The Darwin libraries never set errno, so we might as well |
| avoid calling them when that's the only reason we would. */ |
| flag_errno_math = 0; |
| |
| /* The default values of these switches depend on the TARGET_64BIT |
| that is not known at this moment. Mark these values with 2 and |
| let user the to override these. In case there is no command line option |
| specifying them, we will set the defaults in override_options. */ |
| if (optimize >= 1) |
| flag_omit_frame_pointer = 2; |
| flag_pcc_struct_return = 2; |
| flag_asynchronous_unwind_tables = 2; |
| #ifdef SUBTARGET_OPTIMIZATION_OPTIONS |
| SUBTARGET_OPTIMIZATION_OPTIONS; |
| #endif |
| /* APPLE LOCAL begin 4200243 */ |
| if (getenv ("RC_FORCE_SSE3")) |
| target_flags |= MASK_SSE3; |
| } |
| /* APPLE LOCAL end 4200243 */ |
| |
| /* APPLE LOCAL begin optimization pragmas 3124235/3420242 */ |
| /* Version of the above for use from #pragma optimization_level. Only |
| per-function flags are reset. */ |
| #if TARGET_MACHO |
| void |
| reset_optimization_options (int level ATTRIBUTE_UNUSED, int size) |
| { |
| /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to |
| make the problem with not enough registers even worse. */ |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| #ifdef INSN_SCHEDULING |
| if (level > 1) |
| flag_schedule_insns = 0; |
| #endif |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* APPLE LOCAL begin pragma fenv */ |
| /* Trapping math is not needed by many users, and is expensive. |
| C99 permits us to default it off and we do that. It is |
| turned on when <fenv.h> is included (see darwin_pragma_fenv |
| in darwin-c.c). */ |
| flag_trapping_math = 0; |
| /* APPLE LOCAL end pragma fenv */ |
| |
| /* The default values of these switches depend on TARGET_64BIT |
| which was set earlier and not reset. */ |
| if (optimize >= 1) |
| { |
| if (TARGET_64BIT) |
| flag_omit_frame_pointer = 1; |
| else |
| flag_omit_frame_pointer = 0; |
| } |
| #ifdef SUBTARGET_OPTIMIZATION_OPTIONS |
| SUBTARGET_OPTIMIZATION_OPTIONS; |
| #endif |
| /* APPLE LOCAL begin 4760857 */ |
| if (size) |
| ix86_cost = &size_cost; |
| else |
| ix86_cost = processor_target_table[ix86_tune].cost; |
| |
| /* Default align_* from the processor table. */ |
| if (align_loops == 0) |
| { |
| align_loops = processor_target_table[ix86_tune].align_loop; |
| align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; |
| } |
| if (align_jumps == 0) |
| { |
| align_jumps = processor_target_table[ix86_tune].align_jump; |
| align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; |
| } |
| /* APPLE LOCAL end 4760857 */ |
| } |
| #endif |
| /* APPLE LOCAL end optimization pragmas 3124235/3420242 */ |
| |
| /* Table of valid machine attributes. */ |
| const struct attribute_spec ix86_attribute_table[] = |
| { |
| /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ |
| /* Stdcall attribute says callee is responsible for popping arguments |
| if they are not variable. */ |
| { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
| /* Fastcall attribute says callee is responsible for popping arguments |
| if they are not variable. */ |
| { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
| /* Cdecl attribute says the callee is a normal C declaration */ |
| { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
| /* Regparm attribute specifies how many integer arguments are to be |
| passed in registers. */ |
| { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, |
| /* APPLE LOCAL begin regparmandstackparm */ |
| /* regparmandstackparm means two entry points; a traditional stack-based |
| one, and another, with a mangled name, that employs regparm and |
| sseregparm. */ |
| { "regparmandstackparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
| { "regparmandstackparmee", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
| /* APPLE LOCAL end regparmandstackparm */ |
| /* Sseregparm attribute says we are using x86_64 calling conventions |
| for FP arguments. */ |
| { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
| /* force_align_arg_pointer says this function realigns the stack at entry. */ |
| { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, |
| false, true, true, ix86_handle_cconv_attribute }, |
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, |
| { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, |
| { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, |
| #endif |
| { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, |
| { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, |
| #ifdef SUBTARGET_ATTRIBUTE_TABLE |
| SUBTARGET_ATTRIBUTE_TABLE, |
| #endif |
| { NULL, 0, 0, false, false, false, NULL } |
| }; |
| |
| /* Decide whether we can make a sibling call to a function. DECL is the |
| declaration of the function being targeted by the call and EXP is the |
| CALL_EXPR representing the call. */ |
| |
| static bool |
| ix86_function_ok_for_sibcall (tree decl, tree exp) |
| { |
| tree func; |
| rtx a, b; |
| |
| /* APPLE LOCAL begin indirect sibcall 4087330 */ |
| /* If we are generating position-independent code, we cannot sibcall |
| optimize any indirect call, or a direct call to a global function, |
| as the PLT requires %ebx be live. (Darwin does not have a PLT.) */ |
| if (!TARGET_MACHO |
| && !TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) |
| return false; |
| /* APPLE LOCAL end indirect sibcall 4087330 */ |
| |
| if (decl) |
| func = decl; |
| else |
| { |
| func = TREE_TYPE (TREE_OPERAND (exp, 0)); |
| if (POINTER_TYPE_P (func)) |
| func = TREE_TYPE (func); |
| } |
| |
| /* Check that the return value locations are the same. Like |
| if we are returning floats on the 80387 register stack, we cannot |
| make a sibcall from a function that doesn't return a float to a |
| function that does or, conversely, from a function that does return |
| a float to a function that doesn't; the necessary stack adjustment |
| would not be executed. This is also the place we notice |
| differences in the return value ABI. Note that it is ok for one |
| of the functions to have void return type as long as the return |
| value of the other is passed in a register. */ |
| a = ix86_function_value (TREE_TYPE (exp), func, false); |
| b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
| cfun->decl, false); |
| if (STACK_REG_P (a) || STACK_REG_P (b)) |
| { |
| if (!rtx_equal_p (a, b)) |
| return false; |
| } |
| else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
| ; |
| else if (!rtx_equal_p (a, b)) |
| return false; |
| |
| /* If this call is indirect, we'll need to be able to use a call-clobbered |
| register for the address of the target function. Make sure that all |
| such registers are not used for passing parameters. */ |
| if (!decl && !TARGET_64BIT) |
| { |
| tree type; |
| |
| /* We're looking at the CALL_EXPR, we need the type of the function. */ |
| type = TREE_OPERAND (exp, 0); /* pointer expression */ |
| type = TREE_TYPE (type); /* pointer type */ |
| type = TREE_TYPE (type); /* function type */ |
| |
| if (ix86_function_regparm (type, NULL) >= 3) |
| { |
| /* ??? Need to count the actual number of registers to be used, |
| not the possible number of registers. Fix later. */ |
| return false; |
| } |
| } |
| |
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| /* Dllimport'd functions are also called indirectly. */ |
| if (decl && DECL_DLLIMPORT_P (decl) |
| && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) |
| return false; |
| #endif |
| |
| /* If we forced aligned the stack, then sibcalling would unalign the |
| stack, which may break the called function. */ |
| if (cfun->machine->force_align_arg_pointer) |
| return false; |
| |
| /* Otherwise okay. That also includes certain types of indirect calls. */ |
| return true; |
| } |
| |
| /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" |
| calling convention attributes; |
| arguments as in struct attribute_spec.handler. */ |
| |
| static tree |
| ix86_handle_cconv_attribute (tree *node, tree name, |
| tree args, |
| int flags ATTRIBUTE_UNUSED, |
| bool *no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_TYPE |
| && TREE_CODE (*node) != METHOD_TYPE |
| && TREE_CODE (*node) != FIELD_DECL |
| && TREE_CODE (*node) != TYPE_DECL) |
| { |
| warning (OPT_Wattributes, "%qs attribute only applies to functions", |
| IDENTIFIER_POINTER (name)); |
| *no_add_attrs = true; |
| return NULL_TREE; |
| } |
| |
| /* Can combine regparm with all attributes but fastcall. */ |
| if (is_attribute_p ("regparm", name)) |
| { |
| tree cst; |
| |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and regparm attributes are not compatible"); |
| } |
| |
| /* APPLE LOCAL begin regparmandstackparm */ |
| if (!TARGET_64BIT |
| && (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node)) |
| || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (*node)))) |
| { |
| error ("regparmandstackparm and regparm attributes are not compatible"); |
| } |
| /* APPLE LOCAL end regparmandstackparm */ |
| |
| cst = TREE_VALUE (args); |
| if (TREE_CODE (cst) != INTEGER_CST) |
| { |
| warning (OPT_Wattributes, |
| "%qs attribute requires an integer constant argument", |
| IDENTIFIER_POINTER (name)); |
| *no_add_attrs = true; |
| } |
| else if (compare_tree_int (cst, REGPARM_MAX) > 0) |
| { |
| warning (OPT_Wattributes, "argument to %qs attribute larger than %d", |
| IDENTIFIER_POINTER (name), REGPARM_MAX); |
| *no_add_attrs = true; |
| } |
| |
| if (!TARGET_64BIT |
| && lookup_attribute (ix86_force_align_arg_pointer_string, |
| TYPE_ATTRIBUTES (*node)) |
| && compare_tree_int (cst, REGPARM_MAX-1)) |
| { |
| error ("%s functions limited to %d register parameters", |
| ix86_force_align_arg_pointer_string, REGPARM_MAX-1); |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Turn on popcnt instruction for -msse4.2 or -mabm. */ |
| if (TARGET_SSE4_2) |
| x86_popcnt = true; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| if (TARGET_64BIT) |
| { |
| /* LLVM LOCAL begin mainline */ |
| #ifdef ENABLE_LLVM |
| /* Do not warn when emulating the MS ABI. */ |
| if (!TARGET_64BIT_MS_ABI) |
| #endif |
| warning (OPT_Wattributes, "%qs attribute ignored", |
| IDENTIFIER_POINTER (name)); |
| /* LLVM LOCAL end mainline */ |
| *no_add_attrs = true; |
| return NULL_TREE; |
| } |
| |
| /* Can combine fastcall with stdcall (redundant) and sseregparm. */ |
| /* APPLE LOCAL begin regparmandstackparm */ |
| if (is_attribute_p ("fastcall", name) |
| || is_attribute_p ("regparmandstackparm", name)) |
| /* APPLE LOCAL end regparmandstackparm */ |
| { |
| if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and cdecl attributes are not compatible"); |
| } |
| if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and stdcall attributes are not compatible"); |
| } |
| if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and regparm attributes are not compatible"); |
| } |
| } |
| |
| /* Can combine stdcall with fastcall (redundant), regparm and |
| sseregparm. */ |
| else if (is_attribute_p ("stdcall", name)) |
| { |
| if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and cdecl attributes are not compatible"); |
| } |
| /* APPLE LOCAL begin regparmandstackparm */ |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)) |
| || lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node))) |
| /* APPLE LOCAL end regparmandstackparm */ |
| { |
| error ("stdcall and fastcall attributes are not compatible"); |
| } |
| } |
| |
| /* Can combine cdecl with regparm and sseregparm. */ |
| else if (is_attribute_p ("cdecl", name)) |
| { |
| if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and cdecl attributes are not compatible"); |
| } |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and cdecl attributes are not compatible"); |
| } |
| } |
| |
| /* Can combine sseregparm with all attributes. */ |
| |
| return NULL_TREE; |
| } |
| |
| /* Return 0 if the attributes for two types are incompatible, 1 if they |
| are compatible, and 2 if they are nearly compatible (which causes a |
| warning to be generated). */ |
| |
| static int |
| ix86_comp_type_attributes (tree type1, tree type2) |
| { |
| /* Check for mismatch of non-default calling convention. */ |
| const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; |
| |
| if (TREE_CODE (type1) != FUNCTION_TYPE) |
| return 1; |
| |
| /* Check for mismatched fastcall/regparm types. */ |
| if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) |
| != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) |
| || (ix86_function_regparm (type1, NULL) |
| != ix86_function_regparm (type2, NULL))) |
| return 0; |
| |
| /* Check for mismatched sseregparm types. */ |
| if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) |
| != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) |
| return 0; |
| |
| /* Check for mismatched return types (cdecl vs stdcall). */ |
| if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) |
| != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Return the regparm value for a function with the indicated TYPE and DECL. |
| DECL may be NULL when calling function indirectly |
| or considering a libcall. */ |
| |
| static int |
| ix86_function_regparm (tree type, tree decl) |
| { |
| tree attr; |
| /* APPLE LOCAL begin MERGE FIXME audit to ensure that it's ok |
| |
| We had local_regparm but the FSF didn't and there didn't seem to |
| be a merge conflict some something is strange. These seem to be just |
| normal apple local changes. I asked Stuart about them in email. */ |
| int regparm = ix86_regparm; |
| bool user_convention = false; |
| |
| if (!TARGET_64BIT) |
| { |
| attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); |
| if (attr) |
| { |
| regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
| user_convention = true; |
| } |
| |
| /* APPLE LOCAL begin regparmandstackparm */ |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)) |
| || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type))) |
| /* APPLE LOCAL end regparmandstackparm */ |
| { |
| regparm = 2; |
| user_convention = true; |
| } |
| |
| /* Use register calling convention for local functions when possible. */ |
| if (!TARGET_64BIT && !user_convention && decl |
| && flag_unit_at_a_time && !profile_flag) |
| { |
| struct cgraph_local_info *i = cgraph_local_info (decl); |
| if (i && i->local) |
| { |
| int local_regparm, globals = 0, regno; |
| |
| /* Make sure no regparm register is taken by a global register |
| variable. */ |
| for (local_regparm = 0; local_regparm < 3; local_regparm++) |
| if (global_regs[local_regparm]) |
| break; |
| /* We can't use regparm(3) for nested functions as these use |
| static chain pointer in third argument. */ |
| if (local_regparm == 3 |
| /* APPLE LOCAL begin mainline */ |
| && (decl_function_context (decl) |
| || ix86_force_align_arg_pointer) |
| /* APPLE LOCAL end mainline */ |
| && !DECL_NO_STATIC_CHAIN (decl)) |
| local_regparm = 2; |
| /* If the function realigns its stackpointer, the |
| prologue will clobber %ecx. If we've already |
| generated code for the callee, the callee |
| DECL_STRUCT_FUNCTION is gone, so we fall back to |
| scanning the attributes for the self-realigning |
| property. */ |
| if ((DECL_STRUCT_FUNCTION (decl) |
| /* MERGE FIXME was in our version, but not in FSF 2006-05-23 */ |
| && DECL_STRUCT_FUNCTION (decl)->machine |
| && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) |
| || (!DECL_STRUCT_FUNCTION (decl) |
| && lookup_attribute (ix86_force_align_arg_pointer_string, |
| TYPE_ATTRIBUTES (TREE_TYPE (decl))))) |
| local_regparm = 2; |
| /* Each global register variable increases register preassure, |
| so the more global reg vars there are, the smaller regparm |
| optimization use, unless requested by the user explicitly. */ |
| for (regno = 0; regno < 6; regno++) |
| if (global_regs[regno]) |
| globals++; |
| local_regparm |
| = globals < local_regparm ? local_regparm - globals : 0; |
| |
| if (local_regparm > regparm) |
| regparm = local_regparm; |
| } |
| } |
| } |
| /* APPLE LOCAL end MERGE FIXME audit to ensure that it's ok */ |
| return regparm; |
| } |
| |
| /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
| DFmode (2) arguments in SSE registers for a function with the |
| indicated TYPE and DECL. DECL may be NULL when calling function |
| indirectly or considering a libcall. Otherwise return 0. */ |
| |
| static int |
| ix86_function_sseregparm (tree type, tree decl) |
| { |
| /* Use SSE registers to pass SFmode and DFmode arguments if requested |
| by the sseregparm attribute. */ |
| if (TARGET_SSEREGPARM |
| || (type |
| && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) |
| { |
| if (!TARGET_SSE) |
| { |
| if (decl) |
| error ("Calling %qD with attribute sseregparm without " |
| "SSE/SSE2 enabled", decl); |
| else |
| error ("Calling %qT with attribute sseregparm without " |
| "SSE/SSE2 enabled", type); |
| return 0; |
| } |
| |
| return 2; |
| } |
| |
| /* APPLE LOCAL begin regparmandstackparm */ |
| if (type && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type))) |
| return 2; |
| /* APPLE LOCAL end regparmandstackparm */ |
| |
| /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
| (and DFmode for SSE2) arguments in SSE registers, |
| even for 32-bit targets. */ |
| if (!TARGET_64BIT && decl |
| && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) |
| { |
| struct cgraph_local_info *i = cgraph_local_info (decl); |
| if (i && i->local) |
| return TARGET_SSE2 ? 2 : 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Return true if EAX is live at the start of the function. Used by |
| ix86_expand_prologue to determine if we need special help before |
| calling allocate_stack_worker. */ |
| |
| static bool |
| ix86_eax_live_at_start_p (void) |
| { |
| /* Cheat. Don't bother working forward from ix86_function_regparm |
| to the function type to whether an actual argument is located in |
| eax. Instead just look at cfg info, which is still close enough |
| to correct at this point. This gives false positives for broken |
| functions that might use uninitialized data that happens to be |
| allocated in eax, but who cares? */ |
| return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); |
| } |
| |
| /* Value is the number of bytes of arguments automatically |
| popped when returning from a subroutine call. |
| FUNDECL is the declaration node of the function (as a tree), |
| FUNTYPE is the data type of the function (as a tree), |
| or for a library call it is an identifier node for the subroutine name. |
| SIZE is the number of bytes of arguments passed on the stack. |
| |
| On the 80386, the RTD insn may be used to pop them if the number |
| of args is fixed, but if the number is variable then the caller |
| must pop them all. RTD can't be used for library calls now |
| because the library is compiled with the Unix compiler. |
| Use of RTD is a selectable option, since it is incompatible with |
| standard Unix calling sequences. If the option is not selected, |
| the caller must always pop the args. |
| |
| The attribute stdcall is equivalent to RTD on a per module basis. */ |
| |
| int |
| ix86_return_pops_args (tree fundecl, tree funtype, int size) |
| { |
| int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); |
| |
| /* Cdecl functions override -mrtd, and never pop the stack. */ |
| if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { |
| |
| /* Stdcall and fastcall functions will pop the stack if not |
| variable args. */ |
| if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) |
| || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) |
| rtd = 1; |
| |
| if (rtd |
| && (TYPE_ARG_TYPES (funtype) == NULL_TREE |
| || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) |
| == void_type_node))) |
| return size; |
| } |
| |
| /* Lose any fake structure return argument if it is passed on the stack. */ |
| if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
| && !TARGET_64BIT |
| && !KEEP_AGGREGATE_RETURN_POINTER) |
| { |
| int nregs = ix86_function_regparm (funtype, fundecl); |
| |
| if (!nregs) |
| return GET_MODE_SIZE (Pmode); |
| } |
| |
| return 0; |
| } |
| |
| /* Argument support functions. */ |
| |
| /* Return true when register may be used to pass function parameters. */ |
| bool |
| ix86_function_arg_regno_p (int regno) |
| { |
| int i; |
| if (!TARGET_64BIT) |
| { |
| if (TARGET_MACHO) |
| return (regno < REGPARM_MAX |
| || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); |
| else |
| return (regno < REGPARM_MAX |
| || (TARGET_MMX && MMX_REGNO_P (regno) |
| && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) |
| || (TARGET_SSE && SSE_REGNO_P (regno) |
| && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); |
| } |
| |
| if (TARGET_MACHO) |
| { |
| if (SSE_REGNO_P (regno) && TARGET_SSE) |
| return true; |
| } |
| else |
| { |
| if (TARGET_SSE && SSE_REGNO_P (regno) |
| && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) |
| return true; |
| } |
| /* RAX is used as hidden argument to va_arg functions. */ |
| if (!regno) |
| return true; |
| for (i = 0; i < REGPARM_MAX; i++) |
| if (regno == x86_64_int_parameter_registers[i]) |
| return true; |
| return false; |
| } |
| |
| /* Return if we do not know how to pass TYPE solely in registers. */ |
| |
| static bool |
| ix86_must_pass_in_stack (enum machine_mode mode, tree type) |
| { |
| if (must_pass_in_stack_var_size_or_pad (mode, type)) |
| return true; |
| |
| /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
| The layout_type routine is crafty and tries to trick us into passing |
| currently unsupported vector types on the stack by using TImode. */ |
| return (!TARGET_64BIT && mode == TImode |
| && type && TREE_CODE (type) != VECTOR_TYPE); |
| } |
| |
| /* Initialize a variable CUM of type CUMULATIVE_ARGS |
| for a call to a function whose data type is FNTYPE. |
| For a library call, FNTYPE is 0. */ |
| |
| void |
| init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
| tree fntype, /* tree ptr for function decl */ |
| rtx libname, /* SYMBOL_REF of library name or 0 */ |
| tree fndecl) |
| { |
| static CUMULATIVE_ARGS zero_cum; |
| tree param, next_param; |
| |
| if (TARGET_DEBUG_ARG) |
| { |
| fprintf (stderr, "\ninit_cumulative_args ("); |
| if (fntype) |
| fprintf (stderr, "fntype code = %s, ret code = %s", |
| tree_code_name[(int) TREE_CODE (fntype)], |
| tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); |
| else |
| fprintf (stderr, "no fntype"); |
| |
| if (libname) |
| fprintf (stderr, ", libname = %s", XSTR (libname, 0)); |
| } |
| |
| *cum = zero_cum; |
| |
| /* Set up the number of registers to use for passing arguments. */ |
| cum->nregs = ix86_regparm; |
| if (TARGET_SSE) |
| cum->sse_nregs = SSE_REGPARM_MAX; |
| if (TARGET_MMX) |
| cum->mmx_nregs = MMX_REGPARM_MAX; |
| cum->warn_sse = true; |
| cum->warn_mmx = true; |
| cum->maybe_vaarg = false; |
| |
| /* Use ecx and edx registers if function has fastcall attribute, |
| else look for regparm information. */ |
| if (fntype && !TARGET_64BIT) |
| { |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) |
| { |
| cum->nregs = 2; |
| cum->fastcall = 1; |
| } |
| else |
| cum->nregs = ix86_function_regparm (fntype, fndecl); |
| } |
| |
| /* Set up the number of SSE registers used for passing SFmode |
| and DFmode arguments. Warn for mismatching ABI. */ |
| cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); |
| |
| /* Determine if this function has variable arguments. This is |
| indicated by the last argument being 'void_type_mode' if there |
| are no variable arguments. If there are variable arguments, then |
| we won't pass anything in registers in 32-bit mode. */ |
| |
| if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) |
| { |
| for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; |
| param != 0; param = next_param) |
| { |
| next_param = TREE_CHAIN (param); |
| if (next_param == 0 && TREE_VALUE (param) != void_type_node) |
| { |
| if (!TARGET_64BIT) |
| { |
| cum->nregs = 0; |
| cum->sse_nregs = 0; |
| cum->mmx_nregs = 0; |
| cum->warn_sse = 0; |
| cum->warn_mmx = 0; |
| cum->fastcall = 0; |
| cum->float_in_sse = 0; |
| } |
| cum->maybe_vaarg = true; |
| } |
| } |
| } |
| if ((!fntype && !libname) |
| || (fntype && !TYPE_ARG_TYPES (fntype))) |
| cum->maybe_vaarg = true; |
| |
| if (TARGET_DEBUG_ARG) |
| fprintf (stderr, ", nregs=%d )\n", cum->nregs); |
| |
| return; |
| } |
| |
| /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
| But in the case of vector types, it is some vector mode. |
| |
| When we have only some of our vector isa extensions enabled, then there |
| are some modes for which vector_mode_supported_p is false. For these |
| modes, the generic vector support in gcc will choose some non-vector mode |
| in order to implement the type. By computing the natural mode, we'll |
| select the proper ABI location for the operand and not depend on whatever |
| the middle-end decides to do with these vector types. */ |
| |
| static enum machine_mode |
| type_natural_mode (tree type) |
| { |
| enum machine_mode mode = TYPE_MODE (type); |
| |
| if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| if ((size == 8 || size == 16) |
| /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
| && TYPE_VECTOR_SUBPARTS (type) > 1) |
| { |
| enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
| |
| if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) |
| mode = MIN_MODE_VECTOR_FLOAT; |
| else |
| mode = MIN_MODE_VECTOR_INT; |
| |
| /* Get the mode which has this inner mode and number of units. */ |
| for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) |
| if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) |
| && GET_MODE_INNER (mode) == innermode) |
| return mode; |
| |
| gcc_unreachable (); |
| } |
| } |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| /* Pass V1DImode objects as DImode. This is for compatibility. */ |
| if (TREE_CODE (type) == VECTOR_TYPE && mode == V1DImode && !TARGET_64BIT) |
| return DImode; |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| |
| return mode; |
| } |
| |
| /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
| this may not agree with the mode that the type system has chosen for the |
| register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
| go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
| |
| static rtx |
| gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, |
| unsigned int regno) |
| { |
| rtx tmp; |
| |
| if (orig_mode != BLKmode) |
| tmp = gen_rtx_REG (orig_mode, regno); |
| else |
| { |
| tmp = gen_rtx_REG (mode, regno); |
| tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
| tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
| } |
| |
| return tmp; |
| } |
| |
| /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
| of this code is to classify each 8bytes of incoming argument by the register |
| class and assign registers accordingly. */ |
| |
| /* Return the union class of CLASS1 and CLASS2. |
| See the x86-64 PS ABI for details. */ |
| |
| static enum x86_64_reg_class |
| merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
| { |
| /* Rule #1: If both classes are equal, this is the resulting class. */ |
| if (class1 == class2) |
| return class1; |
| |
| /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
| the other class. */ |
| if (class1 == X86_64_NO_CLASS) |
| return class2; |
| if (class2 == X86_64_NO_CLASS) |
| return class1; |
| |
| /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
| if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
| return X86_64_MEMORY_CLASS; |
| |
| /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
| if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) |
| || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) |
| return X86_64_INTEGERSI_CLASS; |
| /* LLVM LOCAL begin */ |
| if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
| || class1 == X86_64_POINTER_CLASS |
| || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS |
| || class2 == X86_64_POINTER_CLASS) |
| /* LLVM LOCAL end */ |
| return X86_64_INTEGER_CLASS; |
| |
| /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
| MEMORY is used. */ |
| if (class1 == X86_64_X87_CLASS |
| || class1 == X86_64_X87UP_CLASS |
| || class1 == X86_64_COMPLEX_X87_CLASS |
| || class2 == X86_64_X87_CLASS |
| || class2 == X86_64_X87UP_CLASS |
| || class2 == X86_64_COMPLEX_X87_CLASS) |
| return X86_64_MEMORY_CLASS; |
| |
| /* Rule #6: Otherwise class SSE is used. */ |
| return X86_64_SSE_CLASS; |
| } |
| |
| /* Classify the argument of type TYPE and mode MODE. |
| CLASSES will be filled by the register class used to pass each word |
| of the operand. The number of words is returned. In case the parameter |
| should be passed in memory, 0 is returned. As a special case for zero |
| sized containers, classes[0] will be NO_CLASS and 1 is returned. |
| |
| BIT_OFFSET is used internally for handling records and specifies offset |
| of the offset in bits modulo 256 to avoid overflow cases. |
| |
| See the x86-64 PS ABI for details. |
| */ |
| |
| static int |
| classify_argument (enum machine_mode mode, tree type, |
| enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
| { |
| HOST_WIDE_INT bytes = |
| (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| /* Variable sized entities are always passed/returned in memory. */ |
| if (bytes < 0) |
| return 0; |
| |
| if (mode != VOIDmode |
| && targetm.calls.must_pass_in_stack (mode, type)) |
| return 0; |
| |
| if (type && AGGREGATE_TYPE_P (type)) |
| { |
| int i; |
| tree field; |
| enum x86_64_reg_class subclasses[MAX_CLASSES]; |
| |
| /* On x86-64 we pass structures larger than 16 bytes on the stack. */ |
| if (bytes > 16) |
| return 0; |
| |
| for (i = 0; i < words; i++) |
| classes[i] = X86_64_NO_CLASS; |
| |
| /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
| signalize memory class, so handle it as special case. */ |
| if (!words) |
| { |
| classes[0] = X86_64_NO_CLASS; |
| return 1; |
| } |
| |
| /* Classify each field of record and merge classes. */ |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| /* For classes first merge in the field of the subclasses. */ |
| if (TYPE_BINFO (type)) |
| { |
| tree binfo, base_binfo; |
| int basenum; |
| |
| for (binfo = TYPE_BINFO (type), basenum = 0; |
| BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) |
| { |
| int num; |
| int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; |
| tree type = BINFO_TYPE (base_binfo); |
| |
| num = classify_argument (TYPE_MODE (type), |
| type, subclasses, |
| (offset + bit_offset) % 256); |
| if (!num) |
| return 0; |
| for (i = 0; i < num; i++) |
| { |
| int pos = (offset + (bit_offset % 64)) / 8 / 8; |
| classes[i + pos] = |
| merge_classes (subclasses[i], classes[i + pos]); |
| } |
| } |
| } |
| /* And now merge the fields of structure. */ |
| for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL) |
| { |
| int num; |
| |
| if (TREE_TYPE (field) == error_mark_node) |
| continue; |
| |
| /* Bitfields are always classified as integer. Handle them |
| early, since later code would consider them to be |
| misaligned integers. */ |
| if (DECL_BIT_FIELD (field)) |
| { |
| for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; |
| i < ((int_bit_position (field) + (bit_offset % 64)) |
| + tree_low_cst (DECL_SIZE (field), 0) |
| + 63) / 8 / 8; i++) |
| classes[i] = |
| merge_classes (X86_64_INTEGER_CLASS, |
| classes[i]); |
| } |
| else |
| { |
| num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
| TREE_TYPE (field), subclasses, |
| (int_bit_position (field) |
| + bit_offset) % 256); |
| if (!num) |
| return 0; |
| for (i = 0; i < num; i++) |
| { |
| int pos = |
| (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; |
| classes[i + pos] = |
| merge_classes (subclasses[i], classes[i + pos]); |
| } |
| } |
| } |
| } |
| break; |
| |
| case ARRAY_TYPE: |
| /* Arrays are handled as small records. */ |
| { |
| int num; |
| num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
| TREE_TYPE (type), subclasses, bit_offset); |
| if (!num) |
| return 0; |
| |
| /* LLVM LOCAL begin 7387470 */ |
| for (i = 0; i < words; i++) |
| classes[i] = subclasses[i % num]; |
| |
| /* If the first register has a 32-bit class, but there are |
| more than 32-bits in the type, upgrade it to the |
| corresponding 64-bit class. */ |
| if ((bytes > 4) && |
| ((subclasses[0] == X86_64_SSESF_CLASS) || |
| (subclasses[0] == X86_64_INTEGERSI_CLASS))) { |
| classes[0] = (subclasses[0] == X86_64_SSESF_CLASS) ? |
| X86_64_SSE_CLASS : X86_64_INTEGER_CLASS; |
| /* subclasses[1] is only valid if num == 2. If it's |
| invalid, or it's set to a 32-bit class, AND there |
| are more than twelve bytes in the type, upgrade the |
| second register to 64-bits. (If we got here, the |
| first register already has a 64-bit class.) */ |
| if (bytes > 12 && |
| (num == 1 || |
| subclasses[1] == X86_64_SSESF_CLASS || |
| subclasses[1] == X86_64_INTEGERSI_CLASS)) |
| classes[1] = classes[0]; |
| } |
| } |
| break; |
| /* LLVM LOCAL end 7387470 */ |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| /* Unions are similar to RECORD_TYPE but offset is always 0. |
| */ |
| |
| /* Unions are not derived. */ |
| gcc_assert (!TYPE_BINFO (type) |
| || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); |
| for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL) |
| { |
| int num; |
| |
| if (TREE_TYPE (field) == error_mark_node) |
| continue; |
| |
| num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
| TREE_TYPE (field), subclasses, |
| bit_offset); |
| if (!num) |
| return 0; |
| for (i = 0; i < num; i++) |
| classes[i] = merge_classes (subclasses[i], classes[i]); |
| } |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Final merger cleanup. */ |
| for (i = 0; i < words; i++) |
| { |
| /* If one class is MEMORY, everything should be passed in |
| memory. */ |
| if (classes[i] == X86_64_MEMORY_CLASS) |
| return 0; |
| |
| /* The X86_64_SSEUP_CLASS should be always preceded by |
| X86_64_SSE_CLASS. */ |
| if (classes[i] == X86_64_SSEUP_CLASS |
| && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) |
| classes[i] = X86_64_SSE_CLASS; |
| |
| /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ |
| if (classes[i] == X86_64_X87UP_CLASS |
| && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) |
| classes[i] = X86_64_SSE_CLASS; |
| } |
| return words; |
| } |
| |
| /* Compute alignment needed. We align all types to natural boundaries with |
| exception of XFmode that is aligned to 64bits. */ |
| if (mode != VOIDmode && mode != BLKmode) |
| { |
| int mode_alignment = GET_MODE_BITSIZE (mode); |
| |
| if (mode == XFmode) |
| mode_alignment = 128; |
| else if (mode == XCmode) |
| mode_alignment = 256; |
| if (COMPLEX_MODE_P (mode)) |
| mode_alignment /= 2; |
| /* Misaligned fields are always returned in memory. */ |
| if (bit_offset % mode_alignment) |
| return 0; |
| } |
| |
| /* for V1xx modes, just use the base mode */ |
| if (VECTOR_MODE_P (mode) |
| && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) |
| mode = GET_MODE_INNER (mode); |
| |
| /* Classification of atomic types. */ |
| switch (mode) |
| { |
| case SDmode: |
| case DDmode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case TDmode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case DImode: |
| /* LLVM LOCAL begin */ |
| if (POINTER_TYPE_P(type)) |
| { |
| classes[0] = X86_64_POINTER_CLASS; |
| return 1; |
| } |
| /* fall through */ |
| /* LLVM LOCAL end */ |
| case SImode: |
| case HImode: |
| case QImode: |
| case CSImode: |
| case CHImode: |
| case CQImode: |
| if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
| classes[0] = X86_64_INTEGERSI_CLASS; |
| else |
| classes[0] = X86_64_INTEGER_CLASS; |
| return 1; |
| case CDImode: |
| case TImode: |
| classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| return 2; |
| case CTImode: |
| return 0; |
| case SFmode: |
| if (!(bit_offset % 64)) |
| classes[0] = X86_64_SSESF_CLASS; |
| else |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case DFmode: |
| classes[0] = X86_64_SSEDF_CLASS; |
| return 1; |
| case XFmode: |
| classes[0] = X86_64_X87_CLASS; |
| classes[1] = X86_64_X87UP_CLASS; |
| return 2; |
| case TFmode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case SCmode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case DCmode: |
| classes[0] = X86_64_SSEDF_CLASS; |
| classes[1] = X86_64_SSEDF_CLASS; |
| return 2; |
| case XCmode: |
| classes[0] = X86_64_COMPLEX_X87_CLASS; |
| return 1; |
| case TCmode: |
| /* This modes is larger than 16 bytes. */ |
| return 0; |
| case V4SFmode: |
| case V4SImode: |
| case V16QImode: |
| case V8HImode: |
| case V2DFmode: |
| case V2DImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case V2SFmode: |
| case V2SImode: |
| case V4HImode: |
| case V8QImode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case BLKmode: |
| case VOIDmode: |
| return 0; |
| default: |
| gcc_assert (VECTOR_MODE_P (mode)); |
| |
| if (bytes > 16) |
| return 0; |
| |
| gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
| |
| if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
| classes[0] = X86_64_INTEGERSI_CLASS; |
| else |
| classes[0] = X86_64_INTEGER_CLASS; |
| classes[1] = X86_64_INTEGER_CLASS; |
| return 1 + (bytes > 8); |
| } |
| } |
| |
| /* Examine the argument and return set number of register required in each |
| class. Return 0 iff parameter should be passed in memory. */ |
| static int |
| examine_argument (enum machine_mode mode, tree type, int in_return, |
| int *int_nregs, int *sse_nregs) |
| { |
| enum x86_64_reg_class class[MAX_CLASSES]; |
| int n = classify_argument (mode, type, class, 0); |
| |
| *int_nregs = 0; |
| *sse_nregs = 0; |
| if (!n) |
| return 0; |
| for (n--; n >= 0; n--) |
| switch (class[n]) |
| { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| /* LLVM LOCAL */ |
| case X86_64_POINTER_CLASS: |
| (*int_nregs)++; |
| break; |
| case X86_64_SSE_CLASS: |
| case X86_64_SSESF_CLASS: |
| case X86_64_SSEDF_CLASS: |
| (*sse_nregs)++; |
| break; |
| case X86_64_NO_CLASS: |
| case X86_64_SSEUP_CLASS: |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_X87UP_CLASS: |
| if (!in_return) |
| return 0; |
| break; |
| case X86_64_COMPLEX_X87_CLASS: |
| return in_return ? 2 : 0; |
| case X86_64_MEMORY_CLASS: |
| gcc_unreachable (); |
| } |
| return 1; |
| } |
| |
| /* Construct container for the argument used by GCC interface. See |
| FUNCTION_ARG for the detailed description. */ |
| |
| static rtx |
| construct_container (enum machine_mode mode, enum machine_mode orig_mode, |
| tree type, int in_return, int nintregs, int nsseregs, |
| const int *intreg, int sse_regno) |
| { |
| /* The following variables hold the static issued_error state. */ |
| static bool issued_sse_arg_error; |
| static bool issued_sse_ret_error; |
| static bool issued_x87_ret_error; |
| |
| enum machine_mode tmpmode; |
| int bytes = |
| (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| enum x86_64_reg_class class[MAX_CLASSES]; |
| int n; |
| int i; |
| int nexps = 0; |
| int needed_sseregs, needed_intregs; |
| rtx exp[MAX_CLASSES]; |
| rtx ret; |
| |
| n = classify_argument (mode, type, class, 0); |
| if (TARGET_DEBUG_ARG) |
| { |
| if (!n) |
| fprintf (stderr, "Memory class\n"); |
| else |
| { |
| fprintf (stderr, "Classes:"); |
| for (i = 0; i < n; i++) |
| { |
| fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); |
| } |
| fprintf (stderr, "\n"); |
| } |
| } |
| if (!n) |
| return NULL; |
| if (!examine_argument (mode, type, in_return, &needed_intregs, |
| &needed_sseregs)) |
| return NULL; |
| if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
| return NULL; |
| |
| /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
| some less clueful developer tries to use floating-point anyway. */ |
| if (needed_sseregs && !TARGET_SSE) |
| { |
| if (in_return) |
| { |
| if (!issued_sse_ret_error) |
| { |
| error ("SSE register return with SSE disabled"); |
| issued_sse_ret_error = true; |
| } |
| } |
| else if (!issued_sse_arg_error) |
| { |
| error ("SSE register argument with SSE disabled"); |
| issued_sse_arg_error = true; |
| } |
| return NULL; |
| } |
| |
| /* Likewise, error if the ABI requires us to return values in the |
| x87 registers and the user specified -mno-80387. */ |
| if (!TARGET_80387 && in_return) |
| for (i = 0; i < n; i++) |
| if (class[i] == X86_64_X87_CLASS |
| || class[i] == X86_64_X87UP_CLASS |
| || class[i] == X86_64_COMPLEX_X87_CLASS) |
| { |
| if (!issued_x87_ret_error) |
| { |
| error ("x87 register return with x87 disabled"); |
| issued_x87_ret_error = true; |
| } |
| return NULL; |
| } |
| |
| /* First construct simple cases. Avoid SCmode, since we want to use |
| single register to pass this type. */ |
| if (n == 1 && mode != SCmode) |
| switch (class[0]) |
| { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| /* LLVM LOCAL */ |
| case X86_64_POINTER_CLASS: |
| return gen_rtx_REG (mode, intreg[0]); |
| case X86_64_SSE_CLASS: |
| case X86_64_SSESF_CLASS: |
| case X86_64_SSEDF_CLASS: |
| return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); |
| case X86_64_X87_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| return gen_rtx_REG (mode, FIRST_STACK_REG); |
| case X86_64_NO_CLASS: |
| /* Zero sized array, struct or class. */ |
| return NULL; |
| default: |
| gcc_unreachable (); |
| } |
| if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); |
| if (n == 2 |
| && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) |
| return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
| if (n == 2 && class[0] == X86_64_INTEGER_CLASS |
| && class[1] == X86_64_INTEGER_CLASS |
| && (mode == CDImode || mode == TImode || mode == TFmode) |
| && intreg[0] + 1 == intreg[1]) |
| return gen_rtx_REG (mode, intreg[0]); |
| |
| /* Otherwise figure out the entries of the PARALLEL. */ |
| for (i = 0; i < n; i++) |
| { |
| switch (class[i]) |
| { |
| case X86_64_NO_CLASS: |
| break; |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| /* LLVM LOCAL */ |
| case X86_64_POINTER_CLASS: |
| /* Merge TImodes on aligned occasions here too. */ |
| if (i * 8 + 8 > bytes) |
| tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); |
| else if (class[i] == X86_64_INTEGERSI_CLASS) |
| tmpmode = SImode; |
| else |
| tmpmode = DImode; |
| /* We've requested 24 bytes we don't have mode for. Use DImode. */ |
| if (tmpmode == BLKmode) |
| tmpmode = DImode; |
| exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (tmpmode, *intreg), |
| GEN_INT (i*8)); |
| intreg++; |
| break; |
| case X86_64_SSESF_CLASS: |
| exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (SFmode, |
| SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSEDF_CLASS: |
| exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (DFmode, |
| SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSE_CLASS: |
| if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) |
| tmpmode = TImode; |
| else |
| tmpmode = DImode; |
| exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (tmpmode, |
| SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| if (tmpmode == TImode) |
| i++; |
| sse_regno++; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Empty aligned struct, union or class. */ |
| if (nexps == 0) |
| return NULL; |
| |
| ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
| for (i = 0; i < nexps; i++) |
| XVECEXP (ret, 0, i) = exp [i]; |
| return ret; |
| } |
| |
| /* Update the data in CUM to advance over an argument |
| of mode MODE and data type TYPE. |
| (TYPE is null for libcalls where that information may not be available.) */ |
| |
| void |
| function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| tree type, int named) |
| { |
| int bytes = |
| (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| if (type) |
| mode = type_natural_mode (type); |
| |
| if (TARGET_DEBUG_ARG) |
| fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " |
| "mode=%s, named=%d)\n\n", |
| words, cum->words, cum->nregs, cum->sse_nregs, |
| GET_MODE_NAME (mode), named); |
| |
| if (TARGET_64BIT) |
| { |
| int int_nregs, sse_nregs; |
| if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) |
| cum->words += words; |
| else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
| { |
| cum->nregs -= int_nregs; |
| cum->sse_nregs -= sse_nregs; |
| cum->regno += int_nregs; |
| cum->sse_regno += sse_nregs; |
| } |
| else |
| cum->words += words; |
| } |
| else |
| { |
| switch (mode) |
| { |
| default: |
| break; |
| |
| case BLKmode: |
| if (bytes < 0) |
| break; |
| /* FALLTHRU */ |
| |
| case DImode: |
| case SImode: |
| case HImode: |
| case QImode: |
| cum->words += words; |
| cum->nregs -= words; |
| cum->regno += words; |
| |
| if (cum->nregs <= 0) |
| { |
| cum->nregs = 0; |
| cum->regno = 0; |
| } |
| break; |
| |
| case DFmode: |
| if (cum->float_in_sse < 2) |
| break; |
| case SFmode: |
| if (cum->float_in_sse < 1) |
| break; |
| /* FALLTHRU */ |
| |
| case TImode: |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| cum->sse_words += words; |
| cum->sse_nregs -= 1; |
| cum->sse_regno += 1; |
| if (cum->sse_nregs <= 0) |
| { |
| cum->sse_nregs = 0; |
| cum->sse_regno = 0; |
| } |
| } |
| break; |
| |
| case V8QImode: |
| case V4HImode: |
| case V2SImode: |
| case V2SFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| cum->mmx_words += words; |
| cum->mmx_nregs -= 1; |
| cum->mmx_regno += 1; |
| if (cum->mmx_nregs <= 0) |
| { |
| cum->mmx_nregs = 0; |
| cum->mmx_regno = 0; |
| } |
| } |
| break; |
| } |
| } |
| } |
| |
| /* Define where to put the arguments to a function. |
| Value is zero to push the argument on the stack, |
| or a hard register in which to store the argument. |
| |
| MODE is the argument's machine mode. |
| TYPE is the data type of the argument (as a tree). |
| This is null for libcalls where that information may |
| not be available. |
| CUM is a variable of type CUMULATIVE_ARGS which gives info about |
| the preceding args and about the function being called. |
| NAMED is nonzero if this argument is a named parameter |
| (otherwise it is an extra parameter matching an ellipsis). */ |
| |
| rtx |
| function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, |
| tree type, int named) |
| { |
| enum machine_mode mode = orig_mode; |
| rtx ret = NULL_RTX; |
| int bytes = |
| (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| static bool warnedsse, warnedmmx; |
| |
| /* To simplify the code below, represent vector types with a vector mode |
| even if MMX/SSE are not active. */ |
| if (type && TREE_CODE (type) == VECTOR_TYPE) |
| mode = type_natural_mode (type); |
| |
| /* Handle a hidden AL argument containing number of registers for varargs |
| x86-64 functions. For i386 ABI just return constm1_rtx to avoid |
| any AL settings. */ |
| if (mode == VOIDmode) |
| { |
| if (TARGET_64BIT) |
| return GEN_INT (cum->maybe_vaarg |
| ? (cum->sse_nregs < 0 |
| ? SSE_REGPARM_MAX |
| : cum->sse_regno) |
| : -1); |
| else |
| return constm1_rtx; |
| } |
| if (TARGET_64BIT) |
| ret = construct_container (mode, orig_mode, type, 0, cum->nregs, |
| cum->sse_nregs, |
| &x86_64_int_parameter_registers [cum->regno], |
| cum->sse_regno); |
| else |
| switch (mode) |
| { |
| /* For now, pass fp/complex values on the stack. */ |
| default: |
| break; |
| |
| case BLKmode: |
| if (bytes < 0) |
| break; |
| /* FALLTHRU */ |
| case DImode: |
| case SImode: |
| case HImode: |
| case QImode: |
| if (words <= cum->nregs) |
| { |
| int regno = cum->regno; |
| |
| /* Fastcall allocates the first two DWORD (SImode) or |
| smaller arguments to ECX and EDX. */ |
| if (cum->fastcall) |
| { |
| if (mode == BLKmode || mode == DImode) |
| break; |
| |
| /* ECX not EAX is the first allocated register. */ |
| if (regno == 0) |
| regno = 2; |
| } |
| ret = gen_rtx_REG (mode, regno); |
| } |
| break; |
| case DFmode: |
| if (cum->float_in_sse < 2) |
| break; |
| case SFmode: |
| if (cum->float_in_sse < 1) |
| break; |
| /* FALLTHRU */ |
| case TImode: |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (!TARGET_SSE && !warnedsse && cum->warn_sse) |
| { |
| warnedsse = true; |
| warning (0, "SSE vector argument without SSE enabled " |
| "changes the ABI"); |
| } |
| if (cum->sse_nregs) |
| ret = gen_reg_or_parallel (mode, orig_mode, |
| cum->sse_regno + FIRST_SSE_REG); |
| } |
| break; |
| case V8QImode: |
| case V4HImode: |
| case V2SImode: |
| case V2SFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) |
| { |
| warnedmmx = true; |
| warning (0, "MMX vector argument without MMX enabled " |
| "changes the ABI"); |
| } |
| if (cum->mmx_nregs) |
| ret = gen_reg_or_parallel (mode, orig_mode, |
| cum->mmx_regno + FIRST_MMX_REG); |
| } |
| break; |
| } |
| |
| if (TARGET_DEBUG_ARG) |
| { |
| fprintf (stderr, |
| "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", |
| words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); |
| |
| if (ret) |
| print_simple_rtl (stderr, ret); |
| else |
| fprintf (stderr, ", stack"); |
| |
| fprintf (stderr, " )\n"); |
| } |
| |
| return ret; |
| } |
| |
| /* A C expression that indicates when an argument must be passed by |
| reference. If nonzero for an argument, a copy of that argument is |
| made in memory and a pointer to the argument is passed instead of |
| the argument itself. The pointer is passed in whatever way is |
| appropriate for passing a pointer to that type. */ |
| |
| static bool |
| ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| tree type, bool named ATTRIBUTE_UNUSED) |
| { |
| /* LLVM LOCAL begin mainline */ |
| #ifdef ENABLE_LLVM |
| /* See Windows x64 Software Convention. */ |
| if (TARGET_64BIT_MS_ABI) |
| { |
| int msize = (int) GET_MODE_SIZE (mode); |
| if (type) |
| { |
| /* Arrays are passed by reference. */ |
| if (TREE_CODE (type) == ARRAY_TYPE) |
| return true; |
| |
| if (AGGREGATE_TYPE_P (type)) |
| { |
| /* Structs/unions of sizes other than 8, 16, 32, or 64 bits |
| are passed by reference. */ |
| msize = int_size_in_bytes (type); |
| } |
| } |
| |
| /* __m128 is passed by reference. */ |
| switch (msize) { |
| case 1: case 2: case 4: case 8: |
| break; |
| default: |
| return true; |
| } |
| } |
| else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1) |
| return 1; |
| |
| return 0; |
| #else |
| if (!TARGET_64BIT) |
| return 0; |
| |
| if (type && int_size_in_bytes (type) == -1) |
| { |
| if (TARGET_DEBUG_ARG) |
| fprintf (stderr, "function_arg_pass_by_reference\n"); |
| return 1; |
| } |
| |
| return 0; |
| #endif |
| /* LLVM LOCAL end mainline */ |
| } |
| |
| /* Return true when TYPE should be 128bit aligned for 32bit argument passing |
| ABI. Only called if TARGET_SSE. */ |
| /* LLVM LOCAL - begin make global */ |
| #ifndef ENABLE_LLVM |
| static |
| #endif |
| bool |
| /* LLVM LOCAL - end make global */ |
| contains_128bit_aligned_vector_p (tree type) |
| { |
| enum machine_mode mode = TYPE_MODE (type); |
| if (SSE_REG_MODE_P (mode) |
| && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) |
| return true; |
| if (TYPE_ALIGN (type) < 128) |
| return false; |
| |
| if (AGGREGATE_TYPE_P (type)) |
| { |
| /* Walk the aggregates recursively. */ |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| { |
| tree field; |
| |
| if (TYPE_BINFO (type)) |
| { |
| tree binfo, base_binfo; |
| int i; |
| |
| for (binfo = TYPE_BINFO (type), i = 0; |
| BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) |
| if (contains_128bit_aligned_vector_p |
| (BINFO_TYPE (base_binfo))) |
| return true; |
| } |
| /* And now merge the fields of structure. */ |
| for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL |
| && contains_128bit_aligned_vector_p (TREE_TYPE (field))) |
| return true; |
| } |
| break; |
| } |
| |
| case ARRAY_TYPE: |
| /* Just for use if some languages passes arrays by value. */ |
| if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) |
| return true; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| return false; |
| } |
| |
| /* Gives the alignment boundary, in bits, of an argument with the |
| specified mode and type. */ |
| |
| int |
| ix86_function_arg_boundary (enum machine_mode mode, tree type) |
| { |
| int align; |
| if (type) |
| align = TYPE_ALIGN (type); |
| else |
| align = GET_MODE_ALIGNMENT (mode); |
| /* APPLE LOCAL begin unbreak ppc64 abi 5103220 */ |
| if (type && integer_zerop (TYPE_SIZE (type))) |
| align = PARM_BOUNDARY; |
| /* APPLE LOCAL end unbreak ppc64 abi 5103220 */ |
| if (align < PARM_BOUNDARY) |
| align = PARM_BOUNDARY; |
| if (!TARGET_64BIT) |
| { |
| /* i386 ABI defines all arguments to be 4 byte aligned. We have to |
| make an exception for SSE modes since these require 128bit |
| alignment. |
| |
| The handling here differs from field_alignment. ICC aligns MMX |
| arguments to 4 byte boundaries, while structure fields are aligned |
| to 8 byte boundaries. */ |
| if (!TARGET_SSE) |
| align = PARM_BOUNDARY; |
| else if (!type) |
| { |
| if (!SSE_REG_MODE_P (mode)) |
| align = PARM_BOUNDARY; |
| } |
| else |
| { |
| if (!contains_128bit_aligned_vector_p (type)) |
| align = PARM_BOUNDARY; |
| } |
| } |
| if (align > 128) |
| align = 128; |
| return align; |
| } |
| |
| /* Return true if N is a possible register number of function value. */ |
| bool |
| ix86_function_value_regno_p (int regno) |
| { |
| if (TARGET_MACHO) |
| { |
| if (!TARGET_64BIT) |
| { |
| return ((regno) == 0 |
| || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) |
| || ((regno) == FIRST_SSE_REG && TARGET_SSE)); |
| } |
| return ((regno) == 0 || (regno) == FIRST_FLOAT_REG |
| || ((regno) == FIRST_SSE_REG && TARGET_SSE) |
| || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); |
| } |
| else |
| { |
| if (regno == 0 |
| || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) |
| || (regno == FIRST_SSE_REG && TARGET_SSE)) |
| return true; |
| |
| if (!TARGET_64BIT |
| && (regno == FIRST_MMX_REG && TARGET_MMX)) |
| return true; |
| |
| return false; |
| } |
| } |
| |
| /* Define how to find the value returned by a function. |
| VALTYPE is the data type of the value (as a tree). |
| If the precise function being called is known, FUNC is its FUNCTION_DECL; |
| otherwise, FUNC is 0. */ |
| rtx |
| ix86_function_value (tree valtype, tree fntype_or_decl, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| enum machine_mode natmode = type_natural_mode (valtype); |
| |
| if (TARGET_64BIT) |
| { |
| rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, |
| 1, REGPARM_MAX, SSE_REGPARM_MAX, |
| x86_64_int_return_registers, 0); |
| /* For zero sized structures, construct_container return NULL, but we |
| need to keep rest of compiler happy by returning meaningful value. */ |
| if (!ret) |
| ret = gen_rtx_REG (TYPE_MODE (valtype), 0); |
| return ret; |
| } |
| else |
| { |
| tree fn = NULL_TREE, fntype; |
| if (fntype_or_decl |
| && DECL_P (fntype_or_decl)) |
| fn = fntype_or_decl; |
| fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; |
| return gen_rtx_REG (TYPE_MODE (valtype), |
| ix86_value_regno (natmode, fn, fntype)); |
| } |
| } |
| |
| /* APPLE LOCAL begin radar 4781080 */ |
| /* Return true iff must generate call to objcMsgSend for an |
| fp-returning method. */ |
| bool |
| ix86_objc_fpreturn_msgcall (tree ret_type, bool no_long_double) |
| { |
| if (no_long_double) |
| return TARGET_64BIT && SCALAR_FLOAT_TYPE_P (ret_type) |
| && TYPE_MODE (ret_type) != XFmode; |
| else |
| return SCALAR_FLOAT_TYPE_P (ret_type); |
| } |
| /* APPLE LOCAL end radar 4781080 */ |
| |
| /* Return true iff type is returned in memory. */ |
| int |
| ix86_return_in_memory (tree type) |
| { |
| int needed_intregs, needed_sseregs, size; |
| enum machine_mode mode = type_natural_mode (type); |
| |
| if (TARGET_64BIT) |
| return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); |
| |
| if (mode == BLKmode) |
| return 1; |
| |
| size = int_size_in_bytes (type); |
| |
| if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) |
| return 0; |
| |
| if (VECTOR_MODE_P (mode) || mode == TImode) |
| { |
| /* User-created vectors small enough to fit in EAX. */ |
| if (size < 8) |
| return 0; |
| |
| /* MMX/3dNow values are returned in MM0, |
| except when it doesn't exits. */ |
| if (size == 8) |
| /* APPLE LOCAL begin radar 4875125. */ |
| /* Undo the mainline patch which broke MACHO ABI compatibility. */ |
| return (TARGET_MACHO) ? 1 : (TARGET_MMX ? 0 : 1); |
| /* APPLE LOCAL end radar 4875125. */ |
| |
| /* SSE values are returned in XMM0, except when it doesn't exist. */ |
| if (size == 16) |
| return (TARGET_SSE ? 0 : 1); |
| } |
| |
| if (mode == XFmode) |
| return 0; |
| |
| if (mode == TDmode) |
| return 1; |
| |
| if (size > 12) |
| return 1; |
| return 0; |
| } |
| |
| /* When returning SSE vector types, we have a choice of either |
| (1) being abi incompatible with a -march switch, or |
| (2) generating an error. |
| Given no good solution, I think the safest thing is one warning. |
| The user won't be able to use -Werror, but.... |
| |
| Choose the STRUCT_VALUE_RTX hook because that's (at present) only |
| called in response to actually generating a caller or callee that |
| uses such a type. As opposed to RETURN_IN_MEMORY, which is called |
| via aggregate_value_p for general type probing from tree-ssa. */ |
| |
| static rtx |
| ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) |
| { |
| static bool warnedsse, warnedmmx; |
| |
| if (type) |
| { |
| /* Look at the return type of the function, not the function type. */ |
| enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); |
| |
| if (!TARGET_SSE && !warnedsse) |
| { |
| if (mode == TImode |
| || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
| { |
| warnedsse = true; |
| warning (0, "SSE vector return without SSE enabled " |
| "changes the ABI"); |
| } |
| } |
| |
| if (!TARGET_MMX && !warnedmmx) |
| { |
| if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
| { |
| warnedmmx = true; |
| warning (0, "MMX vector return without MMX enabled " |
| "changes the ABI"); |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| /* Define how to find the value returned by a library function |
| assuming the value has mode MODE. */ |
| rtx |
| ix86_libcall_value (enum machine_mode mode) |
| { |
| if (TARGET_64BIT) |
| { |
| switch (mode) |
| { |
| case SFmode: |
| case SCmode: |
| case DFmode: |
| case DCmode: |
| case TFmode: |
| case SDmode: |
| case DDmode: |
| case TDmode: |
| return gen_rtx_REG (mode, FIRST_SSE_REG); |
| case XFmode: |
| case XCmode: |
| return gen_rtx_REG (mode, FIRST_FLOAT_REG); |
| case TCmode: |
| return NULL; |
| default: |
| return gen_rtx_REG (mode, 0); |
| } |
| } |
| else |
| return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); |
| } |
| |
| /* Given a mode, return the register to use for a return value. */ |
| |
| static int |
| ix86_value_regno (enum machine_mode mode, tree func, tree fntype) |
| { |
| gcc_assert (!TARGET_64BIT); |
| |
| /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where |
| we normally prevent this case when mmx is not available. However |
| some ABIs may require the result to be returned like DImode. */ |
| if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
| return TARGET_MMX ? FIRST_MMX_REG : 0; |
| |
| /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where |
| we prevent this case when sse is not available. However some ABIs |
| may require the result to be returned like integer TImode. */ |
| if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
| return TARGET_SSE ? FIRST_SSE_REG : 0; |
| |
| /* Decimal floating point values can go in %eax, unlike other float modes. */ |
| if (DECIMAL_FLOAT_MODE_P (mode)) |
| return 0; |
| |
| /* APPLE LOCAL begin regparmandstackparm */ |
| if (SSE_FLOAT_MODE_P(mode) |
| && fntype && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (fntype))) |
| return FIRST_SSE_REG; |
| /* APPLE LOCAL end regparmandstackparm */ |
| |
| /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ |
| if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) |
| return 0; |
| |
| /* Floating point return values in %st(0), except for local functions when |
| SSE math is enabled or for functions with sseregparm attribute. */ |
| if ((func || fntype) |
| && (mode == SFmode || mode == DFmode)) |
| { |
| int sse_level = ix86_function_sseregparm (fntype, func); |
| if ((sse_level >= 1 && mode == SFmode) |
| || (sse_level == 2 && mode == DFmode)) |
| return FIRST_SSE_REG; |
| } |
| |
| return FIRST_FLOAT_REG; |
| } |
| |
| /* Create the va_list data type. */ |
| |
| static tree |
| ix86_build_builtin_va_list (void) |
| { |
| tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; |
| |
| /* For i386 we use plain pointer to argument area. */ |
| if (!TARGET_64BIT) |
| return build_pointer_type (char_type_node); |
| |
| record = (*lang_hooks.types.make_type) (RECORD_TYPE); |
| type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); |
| |
| f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), |
| unsigned_type_node); |
| f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), |
| unsigned_type_node); |
| f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), |
| ptr_type_node); |
| f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), |
| ptr_type_node); |
| |
| va_list_gpr_counter_field = f_gpr; |
| va_list_fpr_counter_field = f_fpr; |
| |
| DECL_FIELD_CONTEXT (f_gpr) = record; |
| DECL_FIELD_CONTEXT (f_fpr) = record; |
| DECL_FIELD_CONTEXT (f_ovf) = record; |
| DECL_FIELD_CONTEXT (f_sav) = record; |
| |
| TREE_CHAIN (record) = type_decl; |
| TYPE_NAME (record) = type_decl; |
| TYPE_FIELDS (record) = f_gpr; |
| TREE_CHAIN (f_gpr) = f_fpr; |
| TREE_CHAIN (f_fpr) = f_ovf; |
| TREE_CHAIN (f_ovf) = f_sav; |
| |
| layout_type (record); |
| |
| /* The correct type is an array type of one element. */ |
| return build_array_type (record, build_index_type (size_zero_node)); |
| } |
| |
| /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ |
| |
| static void |
| ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| tree type, int *pretend_size ATTRIBUTE_UNUSED, |
| int no_rtl) |
| { |
| CUMULATIVE_ARGS next_cum; |
| rtx save_area = NULL_RTX, mem; |
| rtx label; |
| rtx label_ref; |
| rtx tmp_reg; |
| rtx nsse_reg; |
| int set; |
| tree fntype; |
| int stdarg_p; |
| int i; |
| |
| if (!TARGET_64BIT) |
| return; |
| |
| if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) |
| return; |
| |
| /* Indicate to allocate space on the stack for varargs save area. */ |
| ix86_save_varrargs_registers = 1; |
| |
| cfun->stack_alignment_needed = 128; |
| |
| fntype = TREE_TYPE (current_function_decl); |
| stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 |
| && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) |
| != void_type_node)); |
| |
| /* For varargs, we do not want to skip the dummy va_dcl argument. |
| For stdargs, we do want to skip the last named argument. */ |
| next_cum = *cum; |
| if (stdarg_p) |
| function_arg_advance (&next_cum, mode, type, 1); |
| |
| if (!no_rtl) |
| save_area = frame_pointer_rtx; |
| |
| set = get_varargs_alias_set (); |
| |
| for (i = next_cum.regno; |
| i < ix86_regparm |
| && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
| i++) |
| { |
| mem = gen_rtx_MEM (Pmode, |
| plus_constant (save_area, i * UNITS_PER_WORD)); |
| MEM_NOTRAP_P (mem) = 1; |
| set_mem_alias_set (mem, set); |
| emit_move_insn (mem, gen_rtx_REG (Pmode, |
| x86_64_int_parameter_registers[i])); |
| } |
| |
| if (next_cum.sse_nregs && cfun->va_list_fpr_size) |
| { |
| /* Now emit code to save SSE registers. The AX parameter contains number |
| of SSE parameter registers used to call this function. We use |
| sse_prologue_save insn template that produces computed jump across |
| SSE saves. We need some preparation work to get this working. */ |
| |
| label = gen_label_rtx (); |
| label_ref = gen_rtx_LABEL_REF (Pmode, label); |
| |
| /* Compute address to jump to : |
| label - 5*eax + nnamed_sse_arguments*5 */ |
| tmp_reg = gen_reg_rtx (Pmode); |
| nsse_reg = gen_reg_rtx (Pmode); |
| emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); |
| emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, |
| gen_rtx_MULT (Pmode, nsse_reg, |
| GEN_INT (4)))); |
| if (next_cum.sse_regno) |
| emit_move_insn |
| (nsse_reg, |
| gen_rtx_CONST (DImode, |
| gen_rtx_PLUS (DImode, |
| label_ref, |
| GEN_INT (next_cum.sse_regno * 4)))); |
| else |
| emit_move_insn (nsse_reg, label_ref); |
| emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); |
| |
| /* Compute address of memory block we save into. We always use pointer |
| pointing 127 bytes after first byte to store - this is needed to keep |
| instruction size limited by 4 bytes. */ |
| tmp_reg = gen_reg_rtx (Pmode); |
| emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, |
| plus_constant (save_area, |
| 8 * REGPARM_MAX + 127))); |
| mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); |
| MEM_NOTRAP_P (mem) = 1; |
| set_mem_alias_set (mem, set); |
| set_mem_align (mem, BITS_PER_WORD); |
| |
| /* And finally do the dirty job! */ |
| emit_insn (gen_sse_prologue_save (mem, nsse_reg, |
| GEN_INT (next_cum.sse_regno), label)); |
| } |
| |
| } |
| |
| /* Implement va_start. */ |
| |
| void |
| ix86_va_start (tree valist, rtx nextarg) |
| { |
| HOST_WIDE_INT words, n_gpr, n_fpr; |
| tree f_gpr, f_fpr, f_ovf, f_sav; |
| tree gpr, fpr, ovf, sav, t; |
| tree type; |
| |
| /* Only 64bit target needs something special. */ |
| if (!TARGET_64BIT) |
| { |
| std_expand_builtin_va_start (valist, nextarg); |
| return; |
| } |
| |
| f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); |
| f_fpr = TREE_CHAIN (f_gpr); |
| f_ovf = TREE_CHAIN (f_fpr); |
| f_sav = TREE_CHAIN (f_ovf); |
| |
| valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); |
| gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); |
| fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
| ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
| sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
| |
| /* Count number of gp and fp argument registers used. */ |
| words = current_function_args_info.words; |
| n_gpr = current_function_args_info.regno; |
| n_fpr = current_function_args_info.sse_regno; |
| |
| if (TARGET_DEBUG_ARG) |
| fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", |
| (int) words, (int) n_gpr, (int) n_fpr); |
| |
| if (cfun->va_list_gpr_size) |
| { |
| type = TREE_TYPE (gpr); |
| t = build2 (MODIFY_EXPR, type, gpr, |
| build_int_cst (type, n_gpr * 8)); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| |
| if (cfun->va_list_fpr_size) |
| { |
| type = TREE_TYPE (fpr); |
| t = build2 (MODIFY_EXPR, type, fpr, |
| build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| |
| /* Find the overflow area. */ |
| type = TREE_TYPE (ovf); |
| t = make_tree (type, virtual_incoming_args_rtx); |
| if (words != 0) |
| t = build2 (PLUS_EXPR, type, t, |
| build_int_cst (type, words * UNITS_PER_WORD)); |
| t = build2 (MODIFY_EXPR, type, ovf, t); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) |
| { |
| /* Find the register save area. |
| Prologue of the function save it right above stack frame. */ |
| type = TREE_TYPE (sav); |
| t = make_tree (type, frame_pointer_rtx); |
| t = build2 (MODIFY_EXPR, type, sav, t); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| } |
| |
| /* Implement va_arg. */ |
| |
| tree |
| ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) |
| { |
| static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; |
| tree f_gpr, f_fpr, f_ovf, f_sav; |
| tree gpr, fpr, ovf, sav, t; |
| int size, rsize; |
| tree lab_false, lab_over = NULL_TREE; |
| tree addr, t2; |
| rtx container; |
| int indirect_p = 0; |
| tree ptrtype; |
| enum machine_mode nat_mode; |
| |
| /* Only 64bit target needs something special. */ |
| if (!TARGET_64BIT) |
| return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
| |
| f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); |
| f_fpr = TREE_CHAIN (f_gpr); |
| f_ovf = TREE_CHAIN (f_fpr); |
| f_sav = TREE_CHAIN (f_ovf); |
| |
| valist = build_va_arg_indirect_ref (valist); |
| gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); |
| fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
| ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
| sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
| |
| indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); |
| if (indirect_p) |
| type = build_pointer_type (type); |
| size = int_size_in_bytes (type); |
| rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| nat_mode = type_natural_mode (type); |
| container = construct_container (nat_mode, TYPE_MODE (type), type, 0, |
| REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); |
| |
| /* Pull the value out of the saved registers. */ |
| |
| addr = create_tmp_var (ptr_type_node, "addr"); |
| DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); |
| |
| if (container) |
| { |
| int needed_intregs, needed_sseregs; |
| bool need_temp; |
| tree int_addr, sse_addr; |
| |
| lab_false = create_artificial_label (); |
| lab_over = create_artificial_label (); |
| |
| examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); |
| |
| need_temp = (!REG_P (container) |
| && ((needed_intregs && TYPE_ALIGN (type) > 64) |
| || TYPE_ALIGN (type) > 128)); |
| |
| /* In case we are passing structure, verify that it is consecutive block |
| on the register save area. If not we need to do moves. */ |
| if (!need_temp && !REG_P (container)) |
| { |
| /* Verify that all registers are strictly consecutive */ |
| if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) |
| { |
| int i; |
| |
| for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
| { |
| rtx slot = XVECEXP (container, 0, i); |
| if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i |
| || INTVAL (XEXP (slot, 1)) != i * 16) |
| need_temp = 1; |
| } |
| } |
| else |
| { |
| int i; |
| |
| for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
| { |
| rtx slot = XVECEXP (container, 0, i); |
| if (REGNO (XEXP (slot, 0)) != (unsigned int) i |
| || INTVAL (XEXP (slot, 1)) != i * 8) |
| need_temp = 1; |
| } |
| } |
| } |
| if (!need_temp) |
| { |
| int_addr = addr; |
| sse_addr = addr; |
| } |
| else |
| { |
| int_addr = create_tmp_var (ptr_type_node, "int_addr"); |
| DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); |
| sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); |
| DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); |
| } |
| |
| /* First ensure that we fit completely in registers. */ |
| if (needed_intregs) |
| { |
| t = build_int_cst (TREE_TYPE (gpr), |
| (REGPARM_MAX - needed_intregs + 1) * 8); |
| t = build2 (GE_EXPR, boolean_type_node, gpr, t); |
| t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
| t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
| gimplify_and_add (t, pre_p); |
| } |
| if (needed_sseregs) |
| { |
| t = build_int_cst (TREE_TYPE (fpr), |
| (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 |
| + REGPARM_MAX * 8); |
| t = build2 (GE_EXPR, boolean_type_node, fpr, t); |
| t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
| t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
| gimplify_and_add (t, pre_p); |
| } |
| |
| /* Compute index to start of area used for integer regs. */ |
| if (needed_intregs) |
| { |
| /* int_addr = gpr + sav; */ |
| t = fold_convert (ptr_type_node, gpr); |
| t = build2 (PLUS_EXPR, ptr_type_node, sav, t); |
| t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); |
| gimplify_and_add (t, pre_p); |
| } |
| if (needed_sseregs) |
| { |
| /* sse_addr = fpr + sav; */ |
| t = fold_convert (ptr_type_node, fpr); |
| t = build2 (PLUS_EXPR, ptr_type_node, sav, t); |
| t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); |
| gimplify_and_add (t, pre_p); |
| } |
| if (need_temp) |
| { |
| int i; |
| tree temp = create_tmp_var (type, "va_arg_tmp"); |
| |
| /* addr = &temp; */ |
| t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
| t = build2 (MODIFY_EXPR, void_type_node, addr, t); |
| gimplify_and_add (t, pre_p); |
| |
| for (i = 0; i < XVECLEN (container, 0); i++) |
| { |
| rtx slot = XVECEXP (container, 0, i); |
| rtx reg = XEXP (slot, 0); |
| enum machine_mode mode = GET_MODE (reg); |
| tree piece_type = lang_hooks.types.type_for_mode (mode, 1); |
| tree addr_type = build_pointer_type (piece_type); |
| tree src_addr, src; |
| int src_offset; |
| tree dest_addr, dest; |
| |
| if (SSE_REGNO_P (REGNO (reg))) |
| { |
| src_addr = sse_addr; |
| src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; |
| } |
| else |
| { |
| src_addr = int_addr; |
| src_offset = REGNO (reg) * 8; |
| } |
| src_addr = fold_convert (addr_type, src_addr); |
| src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, |
| size_int (src_offset))); |
| src = build_va_arg_indirect_ref (src_addr); |
| |
| dest_addr = fold_convert (addr_type, addr); |
| dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, |
| size_int (INTVAL (XEXP (slot, 1))))); |
| dest = build_va_arg_indirect_ref (dest_addr); |
| |
| t = build2 (MODIFY_EXPR, void_type_node, dest, src); |
| gimplify_and_add (t, pre_p); |
| } |
| } |
| |
| if (needed_intregs) |
| { |
| t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, |
| build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); |
| t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); |
| gimplify_and_add (t, pre_p); |
| } |
| if (needed_sseregs) |
| { |
| t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, |
| build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); |
| t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); |
| gimplify_and_add (t, pre_p); |
| } |
| |
| t = build1 (GOTO_EXPR, void_type_node, lab_over); |
| gimplify_and_add (t, pre_p); |
| |
| t = build1 (LABEL_EXPR, void_type_node, lab_false); |
| append_to_statement_list (t, pre_p); |
| } |
| |
| /* ... otherwise out of the overflow area. */ |
| |
| /* Care for on-stack alignment if needed. */ |
| if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 |
| || integer_zerop (TYPE_SIZE (type))) |
| t = ovf; |
| else |
| { |
| HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; |
| t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, |
| build_int_cst (TREE_TYPE (ovf), align - 1)); |
| t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
| build_int_cst (TREE_TYPE (t), -align)); |
| } |
| gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
| |
| t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); |
| gimplify_and_add (t2, pre_p); |
| |
| t = build2 (PLUS_EXPR, TREE_TYPE (t), t, |
| build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); |
| t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); |
| gimplify_and_add (t, pre_p); |
| |
| if (container) |
| { |
| t = build1 (LABEL_EXPR, void_type_node, lab_over); |
| append_to_statement_list (t, pre_p); |
| } |
| |
| ptrtype = build_pointer_type (type); |
| addr = fold_convert (ptrtype, addr); |
| |
| if (indirect_p) |
| addr = build_va_arg_indirect_ref (addr); |
| return build_va_arg_indirect_ref (addr); |
| } |
| |
| /* Return nonzero if OPNUM's MEM should be matched |
| in movabs* patterns. */ |
| |
| int |
| ix86_check_movabs (rtx insn, int opnum) |
| { |
| rtx set, mem; |
| |
| set = PATTERN (insn); |
| if (GET_CODE (set) == PARALLEL) |
| set = XVECEXP (set, 0, 0); |
| gcc_assert (GET_CODE (set) == SET); |
| mem = XEXP (set, opnum); |
| while (GET_CODE (mem) == SUBREG) |
| mem = SUBREG_REG (mem); |
| gcc_assert (GET_CODE (mem) == MEM); |
| return (volatile_ok || !MEM_VOLATILE_P (mem)); |
| } |
| |
| /* Initialize the table of extra 80387 mathematical constants. */ |
| |
| static void |
| init_ext_80387_constants (void) |
| { |
| static const char * cst[5] = |
| { |
| "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ |
| "0.6931471805599453094286904741849753009", /* 1: fldln2 */ |
| "1.4426950408889634073876517827983434472", /* 2: fldl2e */ |
| "3.3219280948873623478083405569094566090", /* 3: fldl2t */ |
| "3.1415926535897932385128089594061862044", /* 4: fldpi */ |
| }; |
| int i; |
| |
| for (i = 0; i < 5; i++) |
| { |
| real_from_string (&ext_80387_constants_table[i], cst[i]); |
| /* Ensure each constant is rounded to XFmode precision. */ |
| real_convert (&ext_80387_constants_table[i], |
| XFmode, &ext_80387_constants_table[i]); |
| } |
| |
| ext_80387_constants_init = 1; |
| } |
| |
| /* Return true if the constant is something that can be loaded with |
| a special instruction. */ |
| |
| int |
| standard_80387_constant_p (rtx x) |
| { |
| if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) |
| return -1; |
| |
| if (x == CONST0_RTX (GET_MODE (x))) |
| return 1; |
| if (x == CONST1_RTX (GET_MODE (x))) |
| return 2; |
| |
| /* For XFmode constants, try to find a special 80387 instruction when |
| optimizing for size or on those CPUs that benefit from them. */ |
| if (GET_MODE (x) == XFmode |
| && (optimize_size || x86_ext_80387_constants & TUNEMASK)) |
| { |
| REAL_VALUE_TYPE r; |
| int i; |
| |
| if (! ext_80387_constants_init) |
| init_ext_80387_constants (); |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
| for (i = 0; i < 5; i++) |
| if (real_identical (&r, &ext_80387_constants_table[i])) |
| return i + 3; |
| } |
| |
| return 0; |
| } |
| |
| /* Return the opcode of the special instruction to be used to load |
| the constant X. */ |
| |
| const char * |
| standard_80387_constant_opcode (rtx x) |
| { |
| switch (standard_80387_constant_p (x)) |
| { |
| case 1: |
| return "fldz"; |
| case 2: |
| return "fld1"; |
| case 3: |
| return "fldlg2"; |
| case 4: |
| return "fldln2"; |
| case 5: |
| return "fldl2e"; |
| case 6: |
| return "fldl2t"; |
| case 7: |
| return "fldpi"; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return the CONST_DOUBLE representing the 80387 constant that is |
| loaded by the specified special instruction. The argument IDX |
| matches the return value from standard_80387_constant_p. */ |
| |
| rtx |
| standard_80387_constant_rtx (int idx) |
| { |
| int i; |
| |
| if (! ext_80387_constants_init) |
| init_ext_80387_constants (); |
| |
| switch (idx) |
| { |
| case 3: |
| case 4: |
| case 5: |
| case 6: |
| case 7: |
| i = idx - 3; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], |
| XFmode); |
| } |
| |
| /* Return 1 if mode is a valid mode for sse. */ |
| static int |
| standard_sse_mode_p (enum machine_mode mode) |
| { |
| switch (mode) |
| { |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| return 1; |
| |
| default: |
| return 0; |
| } |
| } |
| |
| /* Return 1 if X is FP constant we can load to SSE register w/o using memory. |
| */ |
| int |
| standard_sse_constant_p (rtx x) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| |
| if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
| return 1; |
| if (vector_all_ones_operand (x, mode) |
| && standard_sse_mode_p (mode)) |
| return TARGET_SSE2 ? 2 : -1; |
| |
| return 0; |
| } |
| |
| /* Return the opcode of the special instruction to be used to load |
| the constant X. */ |
| |
| const char * |
| standard_sse_constant_opcode (rtx insn, rtx x) |
| { |
| switch (standard_sse_constant_p (x)) |
| { |
| case 1: |
| if (get_attr_mode (insn) == MODE_V4SF) |
| return "xorps\t%0, %0"; |
| else if (get_attr_mode (insn) == MODE_V2DF) |
| return "xorpd\t%0, %0"; |
| else |
| return "pxor\t%0, %0"; |
| case 2: |
| return "pcmpeqd\t%0, %0"; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Returns 1 if OP contains a symbol reference */ |
| |
| int |
| symbolic_reference_mentioned_p (rtx op) |
| { |
| const char *fmt; |
| int i; |
| |
| if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) |
| return 1; |
| |
| fmt = GET_RTX_FORMAT (GET_CODE (op)); |
| for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'E') |
| { |
| int j; |
| |
| for (j = XVECLEN (op, i) - 1; j >= 0; j--) |
| if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) |
| return 1; |
| } |
| |
| else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Return 1 if it is appropriate to emit `ret' instructions in the |
| body of a function. Do this only if the epilogue is simple, needing a |
| couple of insns. Prior to reloading, we can't tell how many registers |
| must be saved, so return 0 then. Return 0 if there is no frame |
| marker to de-allocate. */ |
| |
| int |
| ix86_can_use_return_insn_p (void) |
| { |
| struct ix86_frame frame; |
| |
| if (! reload_completed || frame_pointer_needed) |
| return 0; |
| |
| /* Don't allow more than 32 pop, since that's all we can do |
| with one instruction. */ |
| if (current_function_pops_args |
| && current_function_args_size >= 32768) |
| return 0; |
| |
| ix86_compute_frame_layout (&frame); |
| return frame.to_allocate == 0 && frame.nregs == 0; |
| } |
| |
| /* Value should be nonzero if functions must have frame pointers. |
| Zero means the frame pointer need not be set up (and parms may |
| be accessed via the stack pointer) in functions that seem suitable. */ |
| |
| int |
| ix86_frame_pointer_required (void) |
| { |
| /* If we accessed previous frames, then the generated code expects |
| to be able to access the saved ebp value in our frame. */ |
| if (cfun->machine->accesses_prev_frame) |
| return 1; |
| |
| /* Several x86 os'es need a frame pointer for other reasons, |
| usually pertaining to setjmp. */ |
| if (SUBTARGET_FRAME_POINTER_REQUIRED) |
| return 1; |
| |
| /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off |
| the frame pointer by default. Turn it back on now if we've not |
| got a leaf function. */ |
| if (TARGET_OMIT_LEAF_FRAME_POINTER |
| && (!current_function_is_leaf |
| || ix86_current_function_calls_tls_descriptor)) |
| return 1; |
| |
| if (current_function_profile) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Record that the current function accesses previous call frames. */ |
| |
| void |
| ix86_setup_frame_addresses (void) |
| { |
| cfun->machine->accesses_prev_frame = 1; |
| } |
| |
| #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO |
| # define USE_HIDDEN_LINKONCE 1 |
| #else |
| # define USE_HIDDEN_LINKONCE 0 |
| #endif |
| |
| /* APPLE LOCAL 5695218 */ |
| static GTY(()) int pic_labels_used; |
| |
| /* Fills in the label name that should be used for a pc thunk for |
| the given register. */ |
| |
| static void |
| get_pc_thunk_name (char name[32], unsigned int regno) |
| { |
| gcc_assert (!TARGET_64BIT); |
| |
| /* APPLE LOCAL deep branch prediction pic-base. */ |
| if (USE_HIDDEN_LINKONCE || TARGET_MACHO) |
| sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); |
| else |
| ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); |
| } |
| |
| |
| /* This function generates code for -fpic that loads %ebx with |
| the return address of the caller and then returns. */ |
| |
| void |
| ix86_file_end (void) |
| { |
| rtx xops[2]; |
| int regno; |
| |
| for (regno = 0; regno < 8; ++regno) |
| { |
| char name[32]; |
| |
| if (! ((pic_labels_used >> regno) & 1)) |
| continue; |
| |
| get_pc_thunk_name (name, regno); |
| |
| #if TARGET_MACHO |
| if (TARGET_MACHO) |
| { |
| switch_to_section (darwin_sections[text_coal_section]); |
| fputs ("\t.weak_definition\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| fputs ("\n\t.private_extern\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| fputs ("\n", asm_out_file); |
| ASM_OUTPUT_LABEL (asm_out_file, name); |
| } |
| else |
| #endif |
| if (USE_HIDDEN_LINKONCE) |
| { |
| tree decl; |
| |
| decl = build_decl (FUNCTION_DECL, get_identifier (name), |
| error_mark_node); |
| TREE_PUBLIC (decl) = 1; |
| TREE_STATIC (decl) = 1; |
| DECL_ONE_ONLY (decl) = 1; |
| |
| (*targetm.asm_out.unique_section) (decl, 0); |
| switch_to_section (get_named_section (decl, NULL, 0)); |
| |
| (*targetm.asm_out.globalize_label) (asm_out_file, name); |
| fputs ("\t.hidden\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| fputc ('\n', asm_out_file); |
| ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
| } |
| /* APPLE LOCAL begin deep branch prediction pic-base */ |
| #if TARGET_MACHO |
| else if (TARGET_MACHO) |
| { |
| switch_to_section (darwin_sections[text_coal_section]); |
| fputs (".weak_definition\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| fputs ("\n.private_extern\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| fputs ("\n", asm_out_file); |
| ASM_OUTPUT_LABEL (asm_out_file, name); |
| } |
| #endif |
| /* APPLE LOCAL end deep branch prediction pic-base */ |
| else |
| { |
| switch_to_section (text_section); |
| ASM_OUTPUT_LABEL (asm_out_file, name); |
| } |
| |
| xops[0] = gen_rtx_REG (SImode, regno); |
| xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); |
| output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); |
| output_asm_insn ("ret", xops); |
| } |
| |
| if (NEED_INDICATE_EXEC_STACK) |
| file_end_indicate_exec_stack (); |
| } |
| |
| /* Emit code for the SET_GOT patterns. */ |
| |
| const char * |
| output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) |
| { |
| rtx xops[3]; |
| |
| xops[0] = dest; |
| xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
| |
| if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) |
| { |
| xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
| |
| if (!flag_pic) |
| output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); |
| else |
| /* APPLE LOCAL begin dwarf call/pop 5221468 */ |
| { |
| output_asm_insn ("call\t%a2", xops); |
| |
| /* If necessary, report the effect that the instruction has on |
| the unwind info. */ |
| #if defined (DWARF2_UNWIND_INFO) |
| if (flag_asynchronous_unwind_tables |
| #if !defined (HAVE_prologue) |
| && !ACCUMULATE_OUTGOING_ARGS |
| #endif |
| && dwarf2out_do_frame ()) |
| { |
| rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (-4))); |
| insn = make_insn_raw (insn); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| dwarf2out_frame_debug (insn, true); |
| } |
| #endif |
| } |
| /* APPLE LOCAL end dwarf call/pop 5221468 */ |
| |
| #if TARGET_MACHO |
| /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This |
| is what will be referenced by the Mach-O PIC subsystem. */ |
| if (!label) |
| ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); |
| #endif |
| |
| (*targetm.asm_out.internal_label) (asm_out_file, "L", |
| CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
| |
| if (flag_pic) |
| /* APPLE LOCAL begin dwarf call/pop 5221468 */ |
| { |
| output_asm_insn ("pop{l}\t%0", xops); |
| |
| /* If necessary, report the effect that the instruction has on |
| the unwind info. We've already done this for delay slots |
| and call instructions. */ |
| #if defined (DWARF2_UNWIND_INFO) |
| if (flag_asynchronous_unwind_tables |
| #if !defined (HAVE_prologue) |
| && !ACCUMULATE_OUTGOING_ARGS |
| #endif |
| && dwarf2out_do_frame ()) |
| { |
| rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (4))); |
| insn = make_insn_raw (insn); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| dwarf2out_frame_debug (insn, true); |
| } |
| #endif |
| } |
| /* APPLE LOCAL end dwarf call/pop 5221468 */ |
| } |
| else |
| { |
| char name[32]; |
| get_pc_thunk_name (name, REGNO (dest)); |
| pic_labels_used |= 1 << REGNO (dest); |
| |
| xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
| xops[2] = gen_rtx_MEM (QImode, xops[2]); |
| output_asm_insn ("call\t%X2", xops); |
| /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This |
| is what will be referenced by the Mach-O PIC subsystem. */ |
| #if TARGET_MACHO |
| if (!label) |
| ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); |
| else |
| targetm.asm_out.internal_label (asm_out_file, "L", |
| CODE_LABEL_NUMBER (label)); |
| #endif |
| } |
| |
| if (TARGET_MACHO) |
| return ""; |
| |
| if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) |
| output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); |
| else |
| output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); |
| |
| return ""; |
| } |
| |
| /* Generate an "push" pattern for input ARG. */ |
| |
| static rtx |
| gen_push (rtx arg) |
| { |
| return gen_rtx_SET (VOIDmode, |
| gen_rtx_MEM (Pmode, |
| gen_rtx_PRE_DEC (Pmode, |
| stack_pointer_rtx)), |
| arg); |
| } |
| |
| /* Return >= 0 if there is an unused call-clobbered register available |
| for the entire function. */ |
| |
| static unsigned int |
| ix86_select_alt_pic_regnum (void) |
| { |
| if (current_function_is_leaf && !current_function_profile |
| && !ix86_current_function_calls_tls_descriptor) |
| { |
| int i; |
| for (i = 2; i >= 0; --i) |
| if (!regs_ever_live[i]) |
| return i; |
| } |
| |
| return INVALID_REGNUM; |
| } |
| |
| /* APPLE LOCAL begin 5695218 */ |
| /* Reload may introduce references to the PIC base register |
| that do not directly reference pic_offset_table_rtx. |
| In the rare event we choose an alternate PIC register, |
| walk all the insns and rewrite every reference. */ |
| /* Run through the insns, changing references to the original |
| PIC_OFFSET_TABLE_REGNUM to our new one. */ |
| static void |
| ix86_globally_replace_pic_reg (unsigned int new_pic_regno) |
| { |
| rtx insn; |
| const int nregs = PIC_OFFSET_TABLE_REGNUM + 1; |
| rtx reg_map[FIRST_PSEUDO_REGISTER]; |
| memset (reg_map, 0, nregs * sizeof (rtx)); |
| pic_offset_table_rtx = gen_rtx_REG (SImode, new_pic_regno); |
| reg_map[REAL_PIC_OFFSET_TABLE_REGNUM] = pic_offset_table_rtx; |
| |
| push_topmost_sequence (); |
| for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn)) |
| { |
| if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) |
| { |
| replace_regs (PATTERN (insn), reg_map, nregs, 1); |
| replace_regs (REG_NOTES (insn), reg_map, nregs, 1); |
| } |
| #if defined (TARGET_TOC) |
| else if (GET_CODE (insn) == CALL_INSN) |
| { |
| if ( !SIBLING_CALL_P (insn)) |
| abort (); |
| } |
| #endif |
| } |
| pop_topmost_sequence (); |
| |
| regs_ever_live[new_pic_regno] = 1; |
| regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 0; |
| #if defined (TARGET_TOC) |
| cfun->machine->substitute_pic_base_reg = new_pic_regno; |
| #endif |
| } |
| /* APPLE LOCAL end 5695218 */ |
| |
| /* Return 1 if we need to save REGNO. */ |
| static int |
| ix86_save_reg (unsigned int regno, int maybe_eh_return) |
| { |
| /* APPLE LOCAL begin CW asm blocks */ |
| /* For an asm function, we don't save any registers, instead, the |
| user is responsible. */ |
| if (cfun->iasm_asm_function) |
| return 0; |
| /* APPLE LOCAL end CW asm blocks */ |
| |
| if (pic_offset_table_rtx |
| && regno == REAL_PIC_OFFSET_TABLE_REGNUM |
| /* APPLE LOCAL begin 5695218 */ |
| && (current_function_uses_pic_offset_table |
| || current_function_profile |
| || current_function_calls_eh_return |
| || current_function_uses_const_pool)) |
| /* APPLE LOCAL end 5695218 */ |
| { |
| if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) |
| return 0; |
| return 1; |
| } |
| |
| if (current_function_calls_eh_return && maybe_eh_return) |
| { |
| unsigned i; |
| for (i = 0; ; i++) |
| { |
| unsigned test = EH_RETURN_DATA_REGNO (i); |
| if (test == INVALID_REGNUM) |
| break; |
| if (test == regno) |
| return 1; |
| } |
| } |
| |
| if (cfun->machine->force_align_arg_pointer |
| && regno == REGNO (cfun->machine->force_align_arg_pointer)) |
| return 1; |
| |
| /* APPLE LOCAL begin 5695218 */ |
| /* In order to get accurate usage info for the PIC register, we've |
| been forced to break and un-break the call_used_regs and |
| fixed_regs vectors. Ignore them when considering the PIC |
| register. */ |
| if (regno == REAL_PIC_OFFSET_TABLE_REGNUM |
| && regs_ever_live[regno]) |
| return 1; |
| /* APPLE LOCAL end 5695218 */ |
| |
| return (regs_ever_live[regno] |
| && !call_used_regs[regno] |
| && !fixed_regs[regno] |
| && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); |
| } |
| |
| /* Return number of registers to be saved on the stack. */ |
| |
| static int |
| ix86_nsaved_regs (void) |
| { |
| int nregs = 0; |
| int regno; |
| |
| for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) |
| if (ix86_save_reg (regno, true)) |
| nregs++; |
| return nregs; |
| } |
| |
| /* Return the offset between two registers, one to be eliminated, and the other |
| its replacement, at the start of a routine. */ |
| |
| HOST_WIDE_INT |
| ix86_initial_elimination_offset (int from, int to) |
| { |
| struct ix86_frame frame; |
| ix86_compute_frame_layout (&frame); |
| |
| if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| return frame.hard_frame_pointer_offset; |
| else if (from == FRAME_POINTER_REGNUM |
| && to == HARD_FRAME_POINTER_REGNUM) |
| return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; |
| else |
| { |
| gcc_assert (to == STACK_POINTER_REGNUM); |
| |
| if (from == ARG_POINTER_REGNUM) |
| return frame.stack_pointer_offset; |
| |
| gcc_assert (from == FRAME_POINTER_REGNUM); |
| return frame.stack_pointer_offset - frame.frame_pointer_offset; |
| } |
| } |
| |
| /* Fill structure ix86_frame about frame of currently computed function. */ |
| |
| static void |
| ix86_compute_frame_layout (struct ix86_frame *frame) |
| { |
| HOST_WIDE_INT total_size; |
| unsigned int stack_alignment_needed; |
| HOST_WIDE_INT offset; |
| unsigned int preferred_alignment; |
| HOST_WIDE_INT size = get_frame_size (); |
| |
| frame->nregs = ix86_nsaved_regs (); |
| total_size = size; |
| |
| stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; |
| preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; |
| |
| /* During reload iteration the amount of registers saved can change. |
| Recompute the value as needed. Do not recompute when amount of registers |
| didn't change as reload does multiple calls to the function and does not |
| expect the decision to change within single iteration. */ |
| if (!optimize_size |
| && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) |
| { |
| int count = frame->nregs; |
| |
| cfun->machine->use_fast_prologue_epilogue_nregs = count; |
| /* The fast prologue uses move instead of push to save registers. This |
| is significantly longer, but also executes faster as modern hardware |
| can execute the moves in parallel, but can't do that for push/pop. |
| |
| Be careful about choosing what prologue to emit: When function takes |
| many instructions to execute we may use slow version as well as in |
| case function is known to be outside hot spot (this is known with |
| feedback only). Weight the size of function by number of registers |
| to save as it is cheap to use one or two push instructions but very |
| slow to use many of them. */ |
| if (count) |
| count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
| if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL |
| || (flag_branch_probabilities |
| && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) |
| cfun->machine->use_fast_prologue_epilogue = false; |
| else |
| cfun->machine->use_fast_prologue_epilogue |
| = !expensive_function_p (count); |
| } |
| if (TARGET_PROLOGUE_USING_MOVE |
| && cfun->machine->use_fast_prologue_epilogue) |
| frame->save_regs_using_mov = true; |
| else |
| frame->save_regs_using_mov = false; |
| |
| |
| /* Skip return address and saved base pointer. */ |
| offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; |
| |
| frame->hard_frame_pointer_offset = offset; |
| |
| /* Do some sanity checking of stack_alignment_needed and |
| preferred_alignment, since i386 port is the only using those features |
| that may break easily. */ |
| |
| gcc_assert (!size || stack_alignment_needed); |
| gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); |
| gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); |
| gcc_assert (stack_alignment_needed |
| <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) |
| stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; |
| |
| /* Register save area */ |
| offset += frame->nregs * UNITS_PER_WORD; |
| |
| /* Va-arg area */ |
| if (ix86_save_varrargs_registers) |
| { |
| offset += X86_64_VARARGS_SIZE; |
| frame->va_arg_size = X86_64_VARARGS_SIZE; |
| } |
| else |
| frame->va_arg_size = 0; |
| |
| /* Align start of frame for local function. */ |
| frame->padding1 = ((offset + stack_alignment_needed - 1) |
| & -stack_alignment_needed) - offset; |
| |
| offset += frame->padding1; |
| |
| /* Frame pointer points here. */ |
| frame->frame_pointer_offset = offset; |
| |
| offset += size; |
| |
| /* Add outgoing arguments area. Can be skipped if we eliminated |
| all the function calls as dead code. |
| Skipping is however impossible when function calls alloca. Alloca |
| expander assumes that last current_function_outgoing_args_size |
| of stack frame are unused. */ |
| if (ACCUMULATE_OUTGOING_ARGS |
| && (!current_function_is_leaf || current_function_calls_alloca |
| || ix86_current_function_calls_tls_descriptor)) |
| { |
| offset += current_function_outgoing_args_size; |
| frame->outgoing_arguments_size = current_function_outgoing_args_size; |
| } |
| else |
| frame->outgoing_arguments_size = 0; |
| |
| /* Align stack boundary. Only needed if we're calling another function |
| or using alloca. */ |
| if (!current_function_is_leaf || current_function_calls_alloca |
| || ix86_current_function_calls_tls_descriptor) |
| frame->padding2 = ((offset + preferred_alignment - 1) |
| & -preferred_alignment) - offset; |
| else |
| frame->padding2 = 0; |
| |
| offset += frame->padding2; |
| |
| /* We've reached end of stack frame. */ |
| frame->stack_pointer_offset = offset; |
| |
| /* Size prologue needs to allocate. */ |
| frame->to_allocate = |
| (size + frame->padding1 + frame->padding2 |
| + frame->outgoing_arguments_size + frame->va_arg_size); |
| |
| if ((!frame->to_allocate && frame->nregs <= 1) |
| || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) |
| frame->save_regs_using_mov = false; |
| |
| if (TARGET_RED_ZONE && current_function_sp_is_unchanging |
| && current_function_is_leaf |
| && !ix86_current_function_calls_tls_descriptor) |
| { |
| frame->red_zone_size = frame->to_allocate; |
| if (frame->save_regs_using_mov) |
| frame->red_zone_size += frame->nregs * UNITS_PER_WORD; |
| if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) |
| frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; |
| } |
| else |
| frame->red_zone_size = 0; |
| frame->to_allocate -= frame->red_zone_size; |
| frame->stack_pointer_offset -= frame->red_zone_size; |
| #if 0 |
| fprintf (stderr, "nregs: %i\n", frame->nregs); |
| fprintf (stderr, "size: %i\n", size); |
| fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); |
| fprintf (stderr, "padding1: %i\n", frame->padding1); |
| fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); |
| fprintf (stderr, "padding2: %i\n", frame->padding2); |
| fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); |
| fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); |
| fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); |
| fprintf (stderr, "hard_frame_pointer_offset: %i\n", |
| frame->hard_frame_pointer_offset); |
| fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); |
| #endif |
| } |
| |
| /* Emit code to save registers in the prologue. */ |
| |
| static void |
| ix86_emit_save_regs (void) |
| { |
| unsigned int regno; |
| rtx insn; |
| |
| for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) |
| if (ix86_save_reg (regno, true)) |
| { |
| insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| /* Emit code to save registers using MOV insns. First register |
| is restored from POINTER + OFFSET. */ |
| static void |
| ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) |
| { |
| unsigned int regno; |
| rtx insn; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (ix86_save_reg (regno, true)) |
| { |
| insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), |
| Pmode, offset), |
| gen_rtx_REG (Pmode, regno)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| offset += UNITS_PER_WORD; |
| } |
| } |
| |
| /* Expand prologue or epilogue stack adjustment. |
| The pattern exist to put a dependency on all ebp-based memory accesses. |
| STYLE should be negative if instructions should be marked as frame related, |
| zero if %r11 register is live and cannot be freely used and positive |
| otherwise. */ |
| |
| static void |
| pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) |
| { |
| rtx insn; |
| |
| if (! TARGET_64BIT) |
| insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); |
| else if (x86_64_immediate_operand (offset, DImode)) |
| insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); |
| else |
| { |
| rtx r11; |
| /* r11 is used by indirect sibcall return as well, set before the |
| epilogue and used after the epilogue. ATM indirect sibcall |
| shouldn't be used together with huge frame sizes in one |
| function because of the frame_size check in sibcall.c. */ |
| gcc_assert (style); |
| r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); |
| insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); |
| /* APPLE LOCAL async unwind info 5949469 */ |
| if (style < 0 /* || flag_asynchronous_unwind_tables*/) |
| RTX_FRAME_RELATED_P (insn) = 1; |
| insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, |
| offset)); |
| } |
| if (style < 0) |
| RTX_FRAME_RELATED_P (insn) = 1; |
| /* APPLE LOCAL begin async unwind info 5949350 5949469 */ |
| #if 0 |
| else if (flag_asynchronous_unwind_tables |
| && (src == hard_frame_pointer_rtx |
| || src == stack_pointer_rtx)) |
| RTX_FRAME_RELATED_P (insn) = 1; |
| #endif |
| /* APPLE LOCAL end async unwind info 5949350 5949469 */ |
| } |
| |
| /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ |
| |
| static rtx |
| ix86_internal_arg_pointer (void) |
| { |
| bool has_force_align_arg_pointer = |
| (0 != lookup_attribute (ix86_force_align_arg_pointer_string, |
| TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); |
| if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN |
| && DECL_NAME (current_function_decl) |
| && MAIN_NAME_P (DECL_NAME (current_function_decl)) |
| && DECL_FILE_SCOPE_P (current_function_decl)) |
| || ix86_force_align_arg_pointer |
| || has_force_align_arg_pointer) |
| { |
| /* Nested functions can't realign the stack due to a register |
| conflict. */ |
| if (DECL_CONTEXT (current_function_decl) |
| && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) |
| { |
| if (ix86_force_align_arg_pointer) |
| warning (0, "-mstackrealign ignored for nested functions"); |
| if (has_force_align_arg_pointer) |
| error ("%s not supported for nested functions", |
| ix86_force_align_arg_pointer_string); |
| return virtual_incoming_args_rtx; |
| } |
| cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); |
| return copy_to_reg (cfun->machine->force_align_arg_pointer); |
| } |
| else |
| return virtual_incoming_args_rtx; |
| } |
| |
| /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. |
| This is called from dwarf2out.c to emit call frame instructions |
| for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ |
| static void |
| ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) |
| { |
| rtx unspec = SET_SRC (pattern); |
| gcc_assert (GET_CODE (unspec) == UNSPEC); |
| |
| switch (index) |
| { |
| case UNSPEC_REG_SAVE: |
| dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), |
| SET_DEST (pattern)); |
| break; |
| case UNSPEC_DEF_CFA: |
| dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), |
| INTVAL (XVECEXP (unspec, 0, 0))); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* APPLE LOCAL begin 3399553 */ |
| /* Calculate the value of FLT_ROUNDS into DEST. |
| |
| The rounding mode is in bits 11:10 of FPSR, and has the following |
| settings: |
| 00 Round to nearest |
| 01 Round to -inf |
| 10 Round to +inf |
| 11 Round to 0 |
| |
| FLT_ROUNDS, on the other hand, expects the following: |
| -1 Undefined |
| 0 Round to 0 |
| 1 Round to nearest |
| 2 Round to +inf |
| 3 Round to -inf |
| |
| To perform the conversion, we do: |
| (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) |
| */ |
| void |
| ix86_expand_flt_rounds (rtx dest) |
| { |
| rtx mem = assign_stack_temp (HImode, GET_MODE_SIZE (HImode), 0); |
| rtx temp = gen_reg_rtx (SImode); |
| |
| /* Step #1: Read FPSR. Unfortunately, this can only be done into a |
| 16-bit memory location. */ |
| emit_insn (gen_x86_fnstcw_1 (mem)); |
| |
| /* Step #2: Copy into a register. */ |
| emit_insn (gen_zero_extendhisi2 (dest, mem)); |
| |
| /* Step #3: Perform conversion described above. */ |
| emit_insn (gen_andsi3 (temp, dest, GEN_INT (0x400))); |
| emit_insn (gen_andsi3 (dest, dest, GEN_INT (0x800))); |
| emit_insn (gen_lshrsi3 (temp, temp, GEN_INT (9))); |
| emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (11))); |
| emit_insn (gen_iorsi3 (dest, dest, temp)); |
| emit_insn (gen_addsi3 (dest, dest, const1_rtx)); |
| emit_insn (gen_andsi3 (dest, dest, GEN_INT (3))); |
| } |
| /* APPLE LOCAL end 3399553 */ |
| |
| /* APPLE LOCAL begin fix-and-continue x86 */ |
| #ifndef TARGET_FIX_AND_CONTINUE |
| #define TARGET_FIX_AND_CONTINUE 0 |
| #endif |
| /* APPLE LOCAL end fix-and-continue x86 */ |
| |
| /* Expand the prologue into a bunch of separate insns. */ |
| |
| void |
| ix86_expand_prologue (void) |
| { |
| rtx insn; |
| bool pic_reg_used; |
| struct ix86_frame frame; |
| HOST_WIDE_INT allocate; |
| |
| /* APPLE LOCAL begin fix-and-continue x86 */ |
| if (TARGET_FIX_AND_CONTINUE) |
| { |
| /* gdb on darwin arranges to forward a function from the old |
| address by modifying the first 6 instructions of the function |
| to branch to the overriding function. This is necessary to |
| permit function pointers that point to the old function to |
| actually forward to the new function. */ |
| emit_insn (gen_nop ()); |
| emit_insn (gen_nop ()); |
| emit_insn (gen_nop ()); |
| emit_insn (gen_nop ()); |
| emit_insn (gen_nop ()); |
| emit_insn (gen_nop ()); |
| } |
| /* APPLE LOCAL end fix-and-continue x86 */ |
| |
| ix86_compute_frame_layout (&frame); |
| |
| if (cfun->machine->force_align_arg_pointer) |
| { |
| rtx x, y; |
| |
| /* Grab the argument pointer. */ |
| x = plus_constant (stack_pointer_rtx, 4); |
| y = cfun->machine->force_align_arg_pointer; |
| insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| /* The unwind info consists of two parts: install the fafp as the cfa, |
| and record the fafp as the "save register" of the stack pointer. |
| The later is there in order that the unwinder can see where it |
| should restore the stack pointer across the and insn. */ |
| x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); |
| x = gen_rtx_SET (VOIDmode, y, x); |
| RTX_FRAME_RELATED_P (x) = 1; |
| y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), |
| UNSPEC_REG_SAVE); |
| y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); |
| RTX_FRAME_RELATED_P (y) = 1; |
| x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); |
| x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); |
| REG_NOTES (insn) = x; |
| |
| /* Align the stack. */ |
| emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-16))); |
| |
| /* And here we cheat like madmen with the unwind info. We force the |
| cfa register back to sp+4, which is exactly what it was at the |
| start of the function. Re-pushing the return address results in |
| the return at the same spot relative to the cfa, and thus is |
| correct wrt the unwind info. */ |
| x = cfun->machine->force_align_arg_pointer; |
| x = gen_frame_mem (Pmode, plus_constant (x, -4)); |
| insn = emit_insn (gen_push (x)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| x = GEN_INT (4); |
| x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); |
| x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); |
| x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); |
| REG_NOTES (insn) = x; |
| } |
| |
| /* Note: AT&T enter does NOT have reversed args. Enter is probably |
| slower on all targets. Also sdb doesn't like it. */ |
| |
| if (frame_pointer_needed) |
| { |
| insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| allocate = frame.to_allocate; |
| |
| if (!frame.save_regs_using_mov) |
| ix86_emit_save_regs (); |
| else |
| allocate += frame.nregs * UNITS_PER_WORD; |
| |
| /* When using red zone we may start register saving before allocating |
| the stack frame saving one cycle of the prologue. */ |
| if (TARGET_RED_ZONE && frame.save_regs_using_mov) |
| ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx |
| : stack_pointer_rtx, |
| -frame.nregs * UNITS_PER_WORD); |
| |
| if (allocate == 0) |
| ; |
| else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) |
| /* APPLE LOCAL begin CW asm blocks */ |
| { |
| if (! cfun->iasm_asm_function) |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-allocate), -1); |
| } |
| /* APPLE LOCAL end CW asm blocks */ |
| else |
| { |
| /* Only valid for Win32. */ |
| rtx eax = gen_rtx_REG (SImode, 0); |
| bool eax_live = ix86_eax_live_at_start_p (); |
| rtx t; |
| |
| gcc_assert (!TARGET_64BIT); |
| |
| if (eax_live) |
| { |
| emit_insn (gen_push (eax)); |
| allocate -= 4; |
| } |
| |
| emit_move_insn (eax, GEN_INT (allocate)); |
| |
| insn = emit_insn (gen_allocate_stack_worker (eax)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); |
| t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); |
| REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, |
| t, REG_NOTES (insn)); |
| |
| if (eax_live) |
| { |
| if (frame_pointer_needed) |
| t = plus_constant (hard_frame_pointer_rtx, |
| allocate |
| - frame.to_allocate |
| - frame.nregs * UNITS_PER_WORD); |
| else |
| t = plus_constant (stack_pointer_rtx, allocate); |
| emit_move_insn (eax, gen_rtx_MEM (SImode, t)); |
| } |
| } |
| |
| if (frame.save_regs_using_mov && !TARGET_RED_ZONE) |
| { |
| if (!frame_pointer_needed || !frame.to_allocate) |
| ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); |
| else |
| ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, |
| -frame.nregs * UNITS_PER_WORD); |
| } |
| |
| pic_reg_used = false; |
| /* APPLE LOCAL begin 5695218 */ |
| if (pic_offset_table_rtx && regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] |
| && !TARGET_64BIT) |
| { |
| unsigned int alt_pic_reg_used; |
| |
| alt_pic_reg_used = ix86_select_alt_pic_regnum (); |
| /* APPLE LOCAL end 5695218 */ |
| |
| if (alt_pic_reg_used != INVALID_REGNUM) |
| /* APPLE LOCAL begin 5695218 */ |
| /* REGNO (pic_offset_table_rtx) = alt_pic_reg_used; */ |
| ix86_globally_replace_pic_reg (alt_pic_reg_used); |
| /* APPLE LOCAL end 5695218 */ |
| |
| pic_reg_used = true; |
| } |
| |
| if (pic_reg_used) |
| { |
| if (TARGET_64BIT) |
| insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
| else |
| insn = emit_insn (gen_set_got (pic_offset_table_rtx)); |
| |
| /* Even with accurate pre-reload life analysis, we can wind up |
| deleting all references to the pic register after reload. |
| Consider if cross-jumping unifies two sides of a branch |
| controlled by a comparison vs the only read from a global. |
| In which case, allow the set_got to be deleted, though we're |
| too late to do anything about the ebx save in the prologue. */ |
| REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); |
| } |
| |
| /* Prevent function calls from be scheduled before the call to mcount. |
| In the pic_reg_used case, make sure that the got load isn't deleted. */ |
| if (current_function_profile) |
| emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); |
| } |
| |
| /* Emit code to restore saved registers using MOV insns. First register |
| is restored from POINTER + OFFSET. */ |
| static void |
| ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
| int maybe_eh_return) |
| { |
| int regno; |
| rtx base_address = gen_rtx_MEM (Pmode, pointer); |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (ix86_save_reg (regno, maybe_eh_return)) |
| { |
| /* Ensure that adjust_address won't be forced to produce pointer |
| out of range allowed by x86-64 instruction set. */ |
| if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) |
| { |
| rtx r11; |
| |
| r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); |
| emit_move_insn (r11, GEN_INT (offset)); |
| emit_insn (gen_adddi3 (r11, r11, pointer)); |
| base_address = gen_rtx_MEM (Pmode, r11); |
| offset = 0; |
| } |
| emit_move_insn (gen_rtx_REG (Pmode, regno), |
| adjust_address (base_address, Pmode, offset)); |
| offset += UNITS_PER_WORD; |
| } |
| } |
| |
| /* Restore function stack, frame, and registers. */ |
| |
| void |
| ix86_expand_epilogue (int style) |
| { |
| int regno; |
| int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; |
| struct ix86_frame frame; |
| HOST_WIDE_INT offset; |
| |
| ix86_compute_frame_layout (&frame); |
| |
| /* Calculate start of saved registers relative to ebp. Special care |
| must be taken for the normal return case of a function using |
| eh_return: the eax and edx registers are marked as saved, but not |
| restored along this path. */ |
| offset = frame.nregs; |
| if (current_function_calls_eh_return && style != 2) |
| offset -= 2; |
| offset *= -UNITS_PER_WORD; |
| |
| /* APPLE LOCAL begin CW asm blocks */ |
| /* For an asm function, don't generate an epilogue. */ |
| if (cfun->iasm_asm_function) |
| { |
| emit_jump_insn (gen_return_internal ()); |
| return; |
| } |
| /* APPLE LOCAL end CW asm blocks */ |
| |
| /* If we're only restoring one register and sp is not valid then |
| using a move instruction to restore the register since it's |
| less work than reloading sp and popping the register. |
| |
| The default code result in stack adjustment using add/lea instruction, |
| while this code results in LEAVE instruction (or discrete equivalent), |
| so it is profitable in some other cases as well. Especially when there |
| are no registers to restore. We also use this code when TARGET_USE_LEAVE |
| and there is exactly one register to pop. This heuristic may need some |
| tuning in future. */ |
| if ((!sp_valid && frame.nregs <= 1) |
| || (TARGET_EPILOGUE_USING_MOVE |
| && cfun->machine->use_fast_prologue_epilogue |
| && (frame.nregs > 1 || frame.to_allocate)) |
| || (frame_pointer_needed && !frame.nregs && frame.to_allocate) |
| || (frame_pointer_needed && TARGET_USE_LEAVE |
| && cfun->machine->use_fast_prologue_epilogue |
| && frame.nregs == 1) |
| || current_function_calls_eh_return) |
| { |
| /* Restore registers. We can use ebp or esp to address the memory |
| locations. If both are available, default to ebp, since offsets |
| are known to be small. Only exception is esp pointing directly to the |
| end of block of saved registers, where we may simplify addressing |
| mode. */ |
| |
| if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) |
| ix86_emit_restore_regs_using_mov (stack_pointer_rtx, |
| frame.to_allocate, style == 2); |
| else |
| ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, |
| offset, style == 2); |
| |
| /* eh_return epilogues need %ecx added to the stack pointer. */ |
| if (style == 2) |
| { |
| rtx tmp, sa = EH_RETURN_STACKADJ_RTX; |
| |
| if (frame_pointer_needed) |
| { |
| tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
| tmp = plus_constant (tmp, UNITS_PER_WORD); |
| emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); |
| |
| tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); |
| emit_move_insn (hard_frame_pointer_rtx, tmp); |
| |
| pro_epilogue_adjust_stack (stack_pointer_rtx, sa, |
| const0_rtx, style); |
| } |
| else |
| { |
| tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); |
| tmp = plus_constant (tmp, (frame.to_allocate |
| + frame.nregs * UNITS_PER_WORD)); |
| emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); |
| } |
| } |
| else if (!frame_pointer_needed) |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (frame.to_allocate |
| + frame.nregs * UNITS_PER_WORD), |
| style); |
| /* If not an i386, mov & pop is faster than "leave". */ |
| else if (TARGET_USE_LEAVE || optimize_size |
| || !cfun->machine->use_fast_prologue_epilogue) |
| emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); |
| else |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| const0_rtx, style); |
| if (TARGET_64BIT) |
| emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); |
| else |
| emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); |
| } |
| } |
| else |
| { |
| /* First step is to deallocate the stack frame so that we can |
| pop the registers. */ |
| if (!sp_valid) |
| { |
| gcc_assert (frame_pointer_needed); |
| pro_epilogue_adjust_stack (stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| GEN_INT (offset), style); |
| } |
| else if (frame.to_allocate) |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (frame.to_allocate), style); |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (ix86_save_reg (regno, false)) |
| { |
| if (TARGET_64BIT) |
| emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); |
| else |
| emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); |
| } |
| if (frame_pointer_needed) |
| { |
| /* Leave results in shorter dependency chains on CPUs that are |
| able to grok it fast. */ |
| if (TARGET_USE_LEAVE) |
| emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); |
| else if (TARGET_64BIT) |
| emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); |
| else |
| emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); |
| } |
| } |
| |
| if (cfun->machine->force_align_arg_pointer) |
| { |
| emit_insn (gen_addsi3 (stack_pointer_rtx, |
| cfun->machine->force_align_arg_pointer, |
| GEN_INT (-4))); |
| } |
| |
| /* Sibcall epilogues don't want a return instruction. */ |
| if (style == 0) |
| return; |
| |
| if (current_function_pops_args && current_function_args_size) |
| { |
| rtx popc = GEN_INT (current_function_pops_args); |
| |
| /* i386 can only pop 64K bytes. If asked to pop more, pop |
| return address, do explicit add, and jump indirectly to the |
| caller. */ |
| |
| if (current_function_pops_args >= 65536) |
| { |
| rtx ecx = gen_rtx_REG (SImode, 2); |
| |
| /* There is no "pascal" calling convention in 64bit ABI. */ |
| gcc_assert (!TARGET_64BIT); |
| |
| emit_insn (gen_popsi1 (ecx)); |
| emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); |
| emit_jump_insn (gen_return_indirect_internal (ecx)); |
| } |
| else |
| emit_jump_insn (gen_return_pop_internal (popc)); |
| } |
| else |
| emit_jump_insn (gen_return_internal ()); |
| } |
| |
| /* Reset from the function's potential modifications. */ |
| |
| static void |
| ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, |
| HOST_WIDE_INT size ATTRIBUTE_UNUSED) |
| { |
| if (pic_offset_table_rtx) |
| REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; |
| #if TARGET_MACHO |
| /* Mach-O doesn't support labels at the end of objects, so if |
| it looks like we might want one, insert a NOP. */ |
| { |
| rtx insn = get_last_insn (); |
| while (insn |
| && NOTE_P (insn) |
| && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) |
| insn = PREV_INSN (insn); |
| if (insn |
| && (LABEL_P (insn) |
| || (NOTE_P (insn) |
| && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) |
| fputs ("\tnop\n", file); |
| } |
| #endif |
| |
| } |
| |
| /* Extract the parts of an RTL expression that is a valid memory address |
| for an instruction. Return 0 if the structure of the address is |
| grossly off. Return -1 if the address contains ASHIFT, so it is not |
| strictly valid, but still used for computing length of lea instruction. */ |
| |
| int |
| ix86_decompose_address (rtx addr, struct ix86_address *out) |
| { |
| rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
| rtx base_reg, index_reg; |
| HOST_WIDE_INT scale = 1; |
| rtx scale_rtx = NULL_RTX; |
| int retval = 1; |
| enum ix86_address_seg seg = SEG_DEFAULT; |
| |
| if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) |
| base = addr; |
| else if (GET_CODE (addr) == PLUS) |
| { |
| rtx addends[4], op; |
| int n = 0, i; |
| |
| op = addr; |
| do |
| { |
| if (n >= 4) |
| return 0; |
| addends[n++] = XEXP (op, 1); |
| op = XEXP (op, 0); |
| } |
| while (GET_CODE (op) == PLUS); |
| if (n >= 4) |
| return 0; |
| addends[n] = op; |
| |
| for (i = n; i >= 0; --i) |
| { |
| op = addends[i]; |
| switch (GET_CODE (op)) |
| { |
| case MULT: |
| if (index) |
| return 0; |
| index = XEXP (op, 0); |
| scale_rtx = XEXP (op, 1); |
| break; |
| |
| case UNSPEC: |
| if (XINT (op, 1) == UNSPEC_TP |
| && TARGET_TLS_DIRECT_SEG_REFS |
| && seg == SEG_DEFAULT) |
| seg = TARGET_64BIT ? SEG_FS : SEG_GS; |
| else |
| return 0; |
| break; |
| |
| case REG: |
| case SUBREG: |
| if (!base) |
| base = op; |
| else if (!index) |
| index = op; |
| else |
| return 0; |
| break; |
| |
| case CONST: |
| case CONST_INT: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| if (disp) |
| return 0; |
| disp = op; |
| break; |
| |
| default: |
| return 0; |
| } |
| } |
| } |
| else if (GET_CODE (addr) == MULT) |
| { |
| index = XEXP (addr, 0); /* index*scale */ |
| scale_rtx = XEXP (addr, 1); |
| } |
| else if (GET_CODE (addr) == ASHIFT) |
| { |
| rtx tmp; |
| |
| /* We're called for lea too, which implements ashift on occasion. */ |
| index = XEXP (addr, 0); |
| tmp = XEXP (addr, 1); |
| if (GET_CODE (tmp) != CONST_INT) |
| return 0; |
| scale = INTVAL (tmp); |
| if ((unsigned HOST_WIDE_INT) scale > 3) |
| return 0; |
| scale = 1 << scale; |
| retval = -1; |
| } |
| else |
| disp = addr; /* displacement */ |
| |
| /* Extract the integral value of scale. */ |
| if (scale_rtx) |
| { |
| if (GET_CODE (scale_rtx) != CONST_INT) |
| return 0; |
| scale = INTVAL (scale_rtx); |
| } |
| |
| base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; |
| index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; |
| |
| /* Allow arg pointer and stack pointer as index if there is not scaling. */ |
| if (base_reg && index_reg && scale == 1 |
| && (index_reg == arg_pointer_rtx |
| || index_reg == frame_pointer_rtx |
| || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) |
| { |
| rtx tmp; |
| tmp = base, base = index, index = tmp; |
| tmp = base_reg, base_reg = index_reg, index_reg = tmp; |
| } |
| |
| /* Special case: %ebp cannot be encoded as a base without a displacement. */ |
| if ((base_reg == hard_frame_pointer_rtx |
| || base_reg == frame_pointer_rtx |
| || base_reg == arg_pointer_rtx) && !disp) |
| disp = const0_rtx; |
| |
| /* Special case: on K6, [%esi] makes the instruction vector decoded. |
| Avoid this by transforming to [%esi+0]. */ |
| if (ix86_tune == PROCESSOR_K6 && !optimize_size |
| && base_reg && !index_reg && !disp |
| && REG_P (base_reg) |
| && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) |
| disp = const0_rtx; |
| |
| /* Special case: encode reg+reg instead of reg*2. */ |
| if (!base && index && scale && scale == 2) |
| base = index, base_reg = index_reg, scale = 1; |
| |
| /* Special case: scaling cannot be encoded without base or displacement. */ |
| if (!base && !disp && index && scale != 1) |
| disp = const0_rtx; |
| |
| out->base = base; |
| out->index = index; |
| out->disp = disp; |
| out->scale = scale; |
| out->seg = seg; |
| |
| return retval; |
| } |
| |
| /* Return cost of the memory address x. |
| For i386, it is better to use a complex address than let gcc copy |
| the address into a reg and make a new pseudo. But not if the address |
| requires to two regs - that would mean more pseudos with longer |
| lifetimes. */ |
| static int |
| ix86_address_cost (rtx x) |
| { |
| struct ix86_address parts; |
| int cost = 1; |
| int ok = ix86_decompose_address (x, &parts); |
| |
| gcc_assert (ok); |
| |
| if (parts.base && GET_CODE (parts.base) == SUBREG) |
| parts.base = SUBREG_REG (parts.base); |
| if (parts.index && GET_CODE (parts.index) == SUBREG) |
| parts.index = SUBREG_REG (parts.index); |
| |
| /* More complex memory references are better. */ |
| if (parts.disp && parts.disp != const0_rtx) |
| cost--; |
| if (parts.seg != SEG_DEFAULT) |
| cost--; |
| |
| /* Attempt to minimize number of registers in the address. */ |
| if ((parts.base |
| && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) |
| || (parts.index |
| && (!REG_P (parts.index) |
| || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) |
| cost++; |
| |
| if (parts.base |
| && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) |
| && parts.index |
| && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) |
| && parts.base != parts.index) |
| cost++; |
| |
| /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, |
| since it's predecode logic can't detect the length of instructions |
| and it degenerates to vector decoded. Increase cost of such |
| addresses here. The penalty is minimally 2 cycles. It may be worthwhile |
| to split such addresses or even refuse such addresses at all. |
| |
| Following addressing modes are affected: |
| [base+scale*index] |
| [scale*index+disp] |
| [base+index] |
| |
| The first and last case may be avoidable by explicitly coding the zero in |
| memory address, but I don't have AMD-K6 machine handy to check this |
| theory. */ |
| |
| if (TARGET_K6 |
| && ((!parts.disp && parts.base && parts.index && parts.scale != 1) |
| || (parts.disp && !parts.base && parts.index && parts.scale != 1) |
| || (!parts.disp && parts.base && parts.index && parts.scale == 1))) |
| cost += 10; |
| |
| return cost; |
| } |
| |
| /* If X is a machine specific address (i.e. a symbol or label being |
| referenced as a displacement from the GOT implemented using an |
| UNSPEC), then return the base term. Otherwise return X. */ |
| |
| rtx |
| ix86_find_base_term (rtx x) |
| { |
| rtx term; |
| |
| if (TARGET_64BIT) |
| { |
| if (GET_CODE (x) != CONST) |
| return x; |
| term = XEXP (x, 0); |
| if (GET_CODE (term) == PLUS |
| && (GET_CODE (XEXP (term, 1)) == CONST_INT |
| || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) |
| term = XEXP (term, 0); |
| if (GET_CODE (term) != UNSPEC |
| || XINT (term, 1) != UNSPEC_GOTPCREL) |
| return x; |
| |
| term = XVECEXP (term, 0, 0); |
| |
| if (GET_CODE (term) != SYMBOL_REF |
| && GET_CODE (term) != LABEL_REF) |
| return x; |
| |
| return term; |
| } |
| |
| term = ix86_delegitimize_address (x); |
| |
| if (GET_CODE (term) != SYMBOL_REF |
| && GET_CODE (term) != LABEL_REF) |
| return x; |
| |
| return term; |
| } |
| |
| /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as |
| this is used for to form addresses to local data when -fPIC is in |
| use. */ |
| |
| static bool |
| darwin_local_data_pic (rtx disp) |
| { |
| if (GET_CODE (disp) == MINUS) |
| { |
| if (GET_CODE (XEXP (disp, 0)) == LABEL_REF |
| || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) |
| if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) |
| { |
| const char *sym_name = XSTR (XEXP (disp, 1), 0); |
| if (! strcmp (sym_name, "<pic base>")) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* Determine if a given RTX is a valid constant. We already know this |
| satisfies CONSTANT_P. */ |
| |
| bool |
| legitimate_constant_p (rtx x) |
| { |
| switch (GET_CODE (x)) |
| { |
| case CONST: |
| x = XEXP (x, 0); |
| |
| if (GET_CODE (x) == PLUS) |
| { |
| if (GET_CODE (XEXP (x, 1)) != CONST_INT) |
| return false; |
| x = XEXP (x, 0); |
| } |
| |
| if (TARGET_MACHO && darwin_local_data_pic (x)) |
| return true; |
| |
| /* Only some unspecs are valid as "constants". */ |
| if (GET_CODE (x) == UNSPEC) |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_GOTOFF: |
| return TARGET_64BIT; |
| case UNSPEC_TPOFF: |
| case UNSPEC_NTPOFF: |
| x = XVECEXP (x, 0, 0); |
| return (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
| case UNSPEC_DTPOFF: |
| x = XVECEXP (x, 0, 0); |
| return (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); |
| default: |
| return false; |
| } |
| |
| /* We must have drilled down to a symbol. */ |
| if (GET_CODE (x) == LABEL_REF) |
| return true; |
| if (GET_CODE (x) != SYMBOL_REF) |
| return false; |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| /* TLS symbols are never valid. */ |
| if (SYMBOL_REF_TLS_MODEL (x)) |
| return false; |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| #if TARGET_MACHO |
| if (TARGET_MACHO && MACHO_DYNAMIC_NO_PIC_P) |
| return machopic_symbol_defined_p (x); |
| #endif |
| break; |
| |
| case PLUS: |
| { |
| rtx left = XEXP (x, 0); |
| rtx right = XEXP (x, 1); |
| bool left_is_constant = legitimate_constant_p (left); |
| bool right_is_constant = legitimate_constant_p (right); |
| return left_is_constant && right_is_constant; |
| } |
| break; |
| /* APPLE LOCAL end dynamic-no-pic */ |
| |
| case CONST_DOUBLE: |
| if (GET_MODE (x) == TImode |
| && x != CONST0_RTX (TImode) |
| && !TARGET_64BIT) |
| return false; |
| break; |
| |
| case CONST_VECTOR: |
| /* APPLE LOCAL begin radar 4874197 mainline candidate */ |
| if (standard_sse_constant_p (x)) |
| /* APPLE LOCAL end radar 4874197 mainline candidate */ |
| return true; |
| return false; |
| |
| default: |
| break; |
| } |
| |
| /* Otherwise we handle everything else in the move patterns. */ |
| return true; |
| } |
| |
| /* Determine if it's legal to put X into the constant pool. This |
| is not possible for the address of thread-local symbols, which |
| is checked above. */ |
| |
| static bool |
| ix86_cannot_force_const_mem (rtx x) |
| { |
| /* We can always put integral constants and vectors in memory. */ |
| switch (GET_CODE (x)) |
| { |
| case CONST_INT: |
| case CONST_DOUBLE: |
| case CONST_VECTOR: |
| return false; |
| |
| default: |
| break; |
| } |
| return !legitimate_constant_p (x); |
| } |
| |
| /* Determine if a given RTX is a valid constant address. */ |
| |
| bool |
| constant_address_p (rtx x) |
| { |
| return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); |
| } |
| |
| /* Nonzero if the constant value X is a legitimate general operand |
| when generating PIC code. It is given that flag_pic is on and |
| that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ |
| |
| bool |
| legitimate_pic_operand_p (rtx x) |
| { |
| rtx inner; |
| |
| switch (GET_CODE (x)) |
| { |
| case CONST: |
| inner = XEXP (x, 0); |
| if (GET_CODE (inner) == PLUS |
| && GET_CODE (XEXP (inner, 1)) == CONST_INT) |
| inner = XEXP (inner, 0); |
| |
| /* Only some unspecs are valid as "constants". */ |
| if (GET_CODE (inner) == UNSPEC) |
| switch (XINT (inner, 1)) |
| { |
| case UNSPEC_GOTOFF: |
| return TARGET_64BIT; |
| case UNSPEC_TPOFF: |
| x = XVECEXP (inner, 0, 0); |
| return (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
| default: |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return legitimate_pic_address_disp_p (x); |
| |
| default: |
| return true; |
| } |
| } |
| |
| /* Determine if a given CONST RTX is a valid memory displacement |
| in PIC mode. */ |
| |
| int |
| legitimate_pic_address_disp_p (rtx disp) |
| { |
| bool saw_plus; |
| |
| /* In 64bit mode we can allow direct addresses of symbols and labels |
| when they are not dynamic symbols. */ |
| if (TARGET_64BIT) |
| { |
| rtx op0 = disp, op1; |
| |
| switch (GET_CODE (disp)) |
| { |
| case LABEL_REF: |
| return true; |
| |
| case CONST: |
| if (GET_CODE (XEXP (disp, 0)) != PLUS) |
| break; |
| op0 = XEXP (XEXP (disp, 0), 0); |
| op1 = XEXP (XEXP (disp, 0), 1); |
| if (GET_CODE (op1) != CONST_INT |
| || INTVAL (op1) >= 16*1024*1024 |
| || INTVAL (op1) < -16*1024*1024) |
| break; |
| if (GET_CODE (op0) == LABEL_REF) |
| return true; |
| if (GET_CODE (op0) != SYMBOL_REF) |
| break; |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| /* TLS references should always be enclosed in UNSPEC. */ |
| if (SYMBOL_REF_TLS_MODEL (op0)) |
| return false; |
| /* APPLE LOCAL begin fix-and-continue 6227434 */ |
| #if TARGET_MACHO |
| if (machopic_data_defined_p (op0)) |
| return true; |
| |
| /* Under -mfix-and-continue, even local storage is |
| addressed via the GOT, so that the value of local |
| statics is preserved when a function is "fixed." */ |
| if (indirect_data (op0)) |
| return false; |
| #endif |
| /* APPLE LOCAL end fix-and-continue 6227434 */ |
| if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) |
| return true; |
| break; |
| |
| default: |
| break; |
| } |
| } |
| if (GET_CODE (disp) != CONST) |
| return 0; |
| disp = XEXP (disp, 0); |
| |
| if (TARGET_64BIT) |
| { |
| /* We are unsafe to allow PLUS expressions. This limit allowed distance |
| of GOT tables. We should not need these anyway. */ |
| if (GET_CODE (disp) != UNSPEC |
| || (XINT (disp, 1) != UNSPEC_GOTPCREL |
| && XINT (disp, 1) != UNSPEC_GOTOFF)) |
| return 0; |
| |
| if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF |
| && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) |
| return 0; |
| return 1; |
| } |
| |
| saw_plus = false; |
| if (GET_CODE (disp) == PLUS) |
| { |
| if (GET_CODE (XEXP (disp, 1)) != CONST_INT) |
| return 0; |
| disp = XEXP (disp, 0); |
| saw_plus = true; |
| } |
| |
| if (TARGET_MACHO && darwin_local_data_pic (disp)) |
| return 1; |
| |
| if (GET_CODE (disp) != UNSPEC) |
| return 0; |
| |
| switch (XINT (disp, 1)) |
| { |
| case UNSPEC_GOT: |
| if (saw_plus) |
| return false; |
| return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; |
| case UNSPEC_GOTOFF: |
| /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. |
| While ABI specify also 32bit relocation but we don't produce it in |
| small PIC model at all. */ |
| if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
| || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) |
| && !TARGET_64BIT) |
| return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); |
| return false; |
| case UNSPEC_GOTTPOFF: |
| case UNSPEC_GOTNTPOFF: |
| case UNSPEC_INDNTPOFF: |
| if (saw_plus) |
| return false; |
| disp = XVECEXP (disp, 0, 0); |
| return (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); |
| case UNSPEC_NTPOFF: |
| disp = XVECEXP (disp, 0, 0); |
| return (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); |
| case UNSPEC_DTPOFF: |
| disp = XVECEXP (disp, 0, 0); |
| return (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); |
| } |
| |
| return 0; |
| } |
| |
| /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid |
| memory address for an instruction. The MODE argument is the machine mode |
| for the MEM expression that wants to use this address. |
| |
| It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
| convert common non-canonical forms to canonical form so that they will |
| be recognized. */ |
| |
| int |
| legitimate_address_p (enum machine_mode mode, rtx addr, int strict) |
| { |
| struct ix86_address parts; |
| rtx base, index, disp; |
| HOST_WIDE_INT scale; |
| const char *reason = NULL; |
| rtx reason_rtx = NULL_RTX; |
| |
| if (TARGET_DEBUG_ADDR) |
| { |
| fprintf (stderr, |
| "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", |
| GET_MODE_NAME (mode), strict); |
| debug_rtx (addr); |
| } |
| |
| if (ix86_decompose_address (addr, &parts) <= 0) |
| { |
| reason = "decomposition failed"; |
| goto report_error; |
| } |
| |
| base = parts.base; |
| index = parts.index; |
| disp = parts.disp; |
| scale = parts.scale; |
| |
| /* Validate base register. |
| |
| Don't allow SUBREG's that span more than a word here. It can lead to spill |
| failures when the base is one word out of a two word structure, which is |
| represented internally as a DImode int. */ |
| |
| if (base) |
| { |
| rtx reg; |
| reason_rtx = base; |
| |
| if (REG_P (base)) |
| reg = base; |
| else if (GET_CODE (base) == SUBREG |
| && REG_P (SUBREG_REG (base)) |
| && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) |
| <= UNITS_PER_WORD) |
| reg = SUBREG_REG (base); |
| else |
| { |
| reason = "base is not a register"; |
| goto report_error; |
| } |
| |
| if (GET_MODE (base) != Pmode) |
| { |
| reason = "base is not in Pmode"; |
| goto report_error; |
| } |
| |
| if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) |
| || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) |
| { |
| reason = "base is not valid"; |
| goto report_error; |
| } |
| } |
| |
| /* Validate index register. |
| |
| Don't allow SUBREG's that span more than a word here -- same as above. */ |
| |
| if (index) |
| { |
| rtx reg; |
| reason_rtx = index; |
| |
| if (REG_P (index)) |
| reg = index; |
| else if (GET_CODE (index) == SUBREG |
| && REG_P (SUBREG_REG (index)) |
| && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) |
| <= UNITS_PER_WORD) |
| reg = SUBREG_REG (index); |
| else |
| { |
| reason = "index is not a register"; |
| goto report_error; |
| } |
| |
| if (GET_MODE (index) != Pmode) |
| { |
| reason = "index is not in Pmode"; |
| goto report_error; |
| } |
| |
| if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) |
| || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) |
| { |
| reason = "index is not valid"; |
| goto report_error; |
| } |
| } |
| |
| /* Validate scale factor. */ |
| if (scale != 1) |
| { |
| reason_rtx = GEN_INT (scale); |
| if (!index) |
| { |
| reason = "scale without index"; |
| goto report_error; |
| } |
| |
| if (scale != 2 && scale != 4 && scale != 8) |
| { |
| reason = "scale is not a valid multiplier"; |
| goto report_error; |
| } |
| } |
| |
| /* Validate displacement. */ |
| if (disp) |
| { |
| reason_rtx = disp; |
| |
| if (GET_CODE (disp) == CONST |
| && GET_CODE (XEXP (disp, 0)) == UNSPEC) |
| switch (XINT (XEXP (disp, 0), 1)) |
| { |
| /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when |
| used. While ABI specify also 32bit relocations, we don't produce |
| them at all and use IP relative instead. */ |
| case UNSPEC_GOT: |
| case UNSPEC_GOTOFF: |
| gcc_assert (flag_pic); |
| if (!TARGET_64BIT) |
| goto is_legitimate_pic; |
| reason = "64bit address unspec"; |
| goto report_error; |
| |
| case UNSPEC_GOTPCREL: |
| gcc_assert (flag_pic); |
| goto is_legitimate_pic; |
| |
| case UNSPEC_GOTTPOFF: |
| case UNSPEC_GOTNTPOFF: |
| case UNSPEC_INDNTPOFF: |
| case UNSPEC_NTPOFF: |
| case UNSPEC_DTPOFF: |
| break; |
| |
| default: |
| reason = "invalid address unspec"; |
| goto report_error; |
| } |
| |
| else if (SYMBOLIC_CONST (disp) |
| && (flag_pic |
| || (TARGET_MACHO |
| #if TARGET_MACHO |
| && MACHOPIC_INDIRECT |
| && !machopic_operand_p (disp) |
| #endif |
| ))) |
| { |
| |
| is_legitimate_pic: |
| if (TARGET_64BIT && (index || base)) |
| { |
| /* foo@dtpoff(%rX) is ok. */ |
| if (GET_CODE (disp) != CONST |
| || GET_CODE (XEXP (disp, 0)) != PLUS |
| || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC |
| || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT |
| || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
| && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) |
| { |
| reason = "non-constant pic memory reference"; |
| goto report_error; |
| } |
| } |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| else if (flag_pic && ! legitimate_pic_address_disp_p (disp)) |
| { |
| reason = "displacement is an invalid pic construct"; |
| goto report_error; |
| } |
| #if TARGET_MACHO |
| else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp)) |
| { |
| reason = "displacment must be referenced via non_lazy_pointer"; |
| goto report_error; |
| } |
| #endif |
| /* APPLE LOCAL end dynamic-no-pic */ |
| |
| /* This code used to verify that a symbolic pic displacement |
| includes the pic_offset_table_rtx register. |
| |
| While this is good idea, unfortunately these constructs may |
| be created by "adds using lea" optimization for incorrect |
| code like: |
| |
| int a; |
| int foo(int i) |
| { |
| return *(&a+i); |
| } |
| |
| This code is nonsensical, but results in addressing |
| GOT table with pic_offset_table_rtx base. We can't |
| just refuse it easily, since it gets matched by |
| "addsi3" pattern, that later gets split to lea in the |
| case output register differs from input. While this |
| can be handled by separate addsi pattern for this case |
| that never results in lea, this seems to be easier and |
| correct fix for crash to disable this test. */ |
| } |
| else if (GET_CODE (disp) != LABEL_REF |
| && GET_CODE (disp) != CONST_INT |
| && (GET_CODE (disp) != CONST |
| || !legitimate_constant_p (disp)) |
| && (GET_CODE (disp) != SYMBOL_REF |
| || !legitimate_constant_p (disp))) |
| { |
| reason = "displacement is not constant"; |
| goto report_error; |
| } |
| else if (TARGET_64BIT |
| && !x86_64_immediate_operand (disp, VOIDmode)) |
| { |
| reason = "displacement is out of range"; |
| goto report_error; |
| } |
| } |
| |
| /* Everything looks valid. */ |
| if (TARGET_DEBUG_ADDR) |
| fprintf (stderr, "Success.\n"); |
| return TRUE; |
| |
| report_error: |
| if (TARGET_DEBUG_ADDR) |
| { |
| fprintf (stderr, "Error: %s\n", reason); |
| debug_rtx (reason_rtx); |
| } |
| return FALSE; |
| } |
| |
| /* Return a unique alias set for the GOT. */ |
| |
| static HOST_WIDE_INT |
| ix86_GOT_alias_set (void) |
| { |
| static HOST_WIDE_INT set = -1; |
| if (set == -1) |
| set = new_alias_set (); |
| return set; |
| } |
| |
| /* Return a legitimate reference for ORIG (an address) using the |
| register REG. If REG is 0, a new pseudo is generated. |
| |
| There are two types of references that must be handled: |
| |
| 1. Global data references must load the address from the GOT, via |
| the PIC reg. An insn is emitted to do this load, and the reg is |
| returned. |
| |
| 2. Static data references, constant pool addresses, and code labels |
| compute the address as an offset from the GOT, whose base is in |
| the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to |
| differentiate them from global data objects. The returned |
| address is the PIC reg + an unspec constant. |
| |
| GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC |
| reg also appears in the address. */ |
| |
| static rtx |
| legitimize_pic_address (rtx orig, rtx reg) |
| { |
| rtx addr = orig; |
| rtx new = orig; |
| rtx base; |
| |
| #if TARGET_MACHO |
| if (TARGET_MACHO && !TARGET_64BIT) |
| { |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| /* Use the generic Mach-O PIC machinery. */ |
| return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); |
| } |
| #endif |
| |
| if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) |
| new = addr; |
| else if (TARGET_64BIT |
| && ix86_cmodel != CM_SMALL_PIC |
| && local_symbolic_operand (addr, Pmode)) |
| { |
| rtx tmpreg; |
| /* This symbol may be referenced via a displacement from the PIC |
| base address (@GOTOFF). */ |
| |
| if (reload_in_progress) |
| regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; |
| if (GET_CODE (addr) == CONST) |
| addr = XEXP (addr, 0); |
| if (GET_CODE (addr) == PLUS) |
| { |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); |
| new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); |
| } |
| else |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
| new = gen_rtx_CONST (Pmode, new); |
| if (!reg) |
| tmpreg = gen_reg_rtx (Pmode); |
| else |
| tmpreg = reg; |
| emit_move_insn (tmpreg, new); |
| |
| if (reg != 0) |
| { |
| new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, |
| tmpreg, 1, OPTAB_DIRECT); |
| new = reg; |
| } |
| else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); |
| } |
| else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) |
| { |
| /* This symbol may be referenced via a displacement from the PIC |
| base address (@GOTOFF). */ |
| |
| if (reload_in_progress) |
| regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; |
| if (GET_CODE (addr) == CONST) |
| addr = XEXP (addr, 0); |
| if (GET_CODE (addr) == PLUS) |
| { |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); |
| new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); |
| } |
| else |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
| new = gen_rtx_CONST (Pmode, new); |
| new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); |
| |
| if (reg != 0) |
| { |
| emit_move_insn (reg, new); |
| new = reg; |
| } |
| } |
| else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) |
| { |
| if (TARGET_64BIT) |
| { |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); |
| new = gen_rtx_CONST (Pmode, new); |
| new = gen_const_mem (Pmode, new); |
| set_mem_alias_set (new, ix86_GOT_alias_set ()); |
| |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| /* Use directly gen_movsi, otherwise the address is loaded |
| into register for CSE. We don't want to CSE this addresses, |
| instead we CSE addresses from the GOT table, so skip this. */ |
| emit_insn (gen_movsi (reg, new)); |
| new = reg; |
| } |
| else |
| { |
| /* This symbol must be referenced via a load from the |
| Global Offset Table (@GOT). */ |
| |
| if (reload_in_progress) |
| regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); |
| new = gen_rtx_CONST (Pmode, new); |
| new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); |
| new = gen_const_mem (Pmode, new); |
| set_mem_alias_set (new, ix86_GOT_alias_set ()); |
| |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| emit_move_insn (reg, new); |
| new = reg; |
| } |
| } |
| else |
| { |
| if (GET_CODE (addr) == CONST_INT |
| && !x86_64_immediate_operand (addr, VOIDmode)) |
| { |
| if (reg) |
| { |
| emit_move_insn (reg, addr); |
| new = reg; |
| } |
| else |
| new = force_reg (Pmode, addr); |
| } |
| else if (GET_CODE (addr) == CONST) |
| { |
| addr = XEXP (addr, 0); |
| |
| /* We must match stuff we generate before. Assume the only |
| unspecs that can get here are ours. Not that we could do |
| anything with them anyway.... */ |
| if (GET_CODE (addr) == UNSPEC |
| || (GET_CODE (addr) == PLUS |
| && GET_CODE (XEXP (addr, 0)) == UNSPEC)) |
| return orig; |
| gcc_assert (GET_CODE (addr) == PLUS); |
| } |
| if (GET_CODE (addr) == PLUS) |
| { |
| rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); |
| |
| /* Check first to see if this is a constant offset from a @GOTOFF |
| symbol reference. */ |
| if (local_symbolic_operand (op0, Pmode) |
| && GET_CODE (op1) == CONST_INT) |
| { |
| if (!TARGET_64BIT) |
| { |
| if (reload_in_progress) |
| regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; |
| new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), |
| UNSPEC_GOTOFF); |
| new = gen_rtx_PLUS (Pmode, new, op1); |
| new = gen_rtx_CONST (Pmode, new); |
| new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); |
| |
| if (reg != 0) |
| { |
| emit_move_insn (reg, new); |
| new = reg; |
| } |
| } |
| else |
| { |
| if (INTVAL (op1) < -16*1024*1024 |
| || INTVAL (op1) >= 16*1024*1024) |
| { |
| if (!x86_64_immediate_operand (op1, Pmode)) |
| op1 = force_reg (Pmode, op1); |
| new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); |
| } |
| } |
| } |
| else |
| { |
| base = legitimize_pic_address (XEXP (addr, 0), reg); |
| new = legitimize_pic_address (XEXP (addr, 1), |
| base == reg ? NULL_RTX : reg); |
| |
| if (GET_CODE (new) == CONST_INT) |
| new = plus_constant (base, INTVAL (new)); |
| else |
| { |
| if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) |
| { |
| base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); |
| new = XEXP (new, 1); |
| } |
| new = gen_rtx_PLUS (Pmode, base, new); |
| /* APPLE LOCAL begin fix-and-continue 6358507 */ |
| if (!legitimate_address_p (Pmode, new, FALSE)) |
| new = force_reg (Pmode, new); |
| /* APPLE LOCAL end fix-and-continue 6358507 */ |
| } |
| } |
| } |
| } |
| return new; |
| } |
| |
| /* Load the thread pointer. If TO_REG is true, force it into a register. */ |
| |
| static rtx |
| get_thread_pointer (int to_reg) |
| { |
| rtx tp, reg, insn; |
| |
| tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); |
| if (!to_reg) |
| return tp; |
| |
| reg = gen_reg_rtx (Pmode); |
| insn = gen_rtx_SET (VOIDmode, reg, tp); |
| insn = emit_insn (insn); |
| |
| return reg; |
| } |
| |
| /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is |
| false if we expect this to be used for a memory address and true if |
| we expect to load the address into a register. */ |
| |
| static rtx |
| legitimize_tls_address (rtx x, enum tls_model model, int for_mov) |
| { |
| rtx dest, base, off, pic, tp; |
| int type; |
| |
| switch (model) |
| { |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| dest = gen_reg_rtx (Pmode); |
| tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; |
| |
| if (TARGET_64BIT && ! TARGET_GNU2_TLS) |
| { |
| rtx rax = gen_rtx_REG (Pmode, 0), insns; |
| |
| start_sequence (); |
| emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); |
| insns = get_insns (); |
| end_sequence (); |
| |
| emit_libcall_block (insns, dest, rax, x); |
| } |
| else if (TARGET_64BIT && TARGET_GNU2_TLS) |
| emit_insn (gen_tls_global_dynamic_64 (dest, x)); |
| else |
| emit_insn (gen_tls_global_dynamic_32 (dest, x)); |
| |
| if (TARGET_GNU2_TLS) |
| { |
| dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); |
| |
| set_unique_reg_note (get_last_insn (), REG_EQUIV, x); |
| } |
| break; |
| |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| base = gen_reg_rtx (Pmode); |
| tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; |
| |
| if (TARGET_64BIT && ! TARGET_GNU2_TLS) |
| { |
| rtx rax = gen_rtx_REG (Pmode, 0), insns, note; |
| |
| start_sequence (); |
| emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); |
| insns = get_insns (); |
| end_sequence (); |
| |
| note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); |
| note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); |
| emit_libcall_block (insns, base, rax, note); |
| } |
| else if (TARGET_64BIT && TARGET_GNU2_TLS) |
| emit_insn (gen_tls_local_dynamic_base_64 (base)); |
| else |
| emit_insn (gen_tls_local_dynamic_base_32 (base)); |
| |
| if (TARGET_GNU2_TLS) |
| { |
| rtx x = ix86_tls_module_base (); |
| |
| set_unique_reg_note (get_last_insn (), REG_EQUIV, |
| gen_rtx_MINUS (Pmode, x, tp)); |
| } |
| |
| off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); |
| off = gen_rtx_CONST (Pmode, off); |
| |
| dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); |
| |
| if (TARGET_GNU2_TLS) |
| { |
| dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); |
| |
| set_unique_reg_note (get_last_insn (), REG_EQUIV, x); |
| } |
| |
| break; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| if (TARGET_64BIT) |
| { |
| pic = NULL; |
| type = UNSPEC_GOTNTPOFF; |
| } |
| else if (flag_pic) |
| { |
| if (reload_in_progress) |
| regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; |
| pic = pic_offset_table_rtx; |
| type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; |
| } |
| else if (!TARGET_ANY_GNU_TLS) |
| { |
| pic = gen_reg_rtx (Pmode); |
| emit_insn (gen_set_got (pic)); |
| type = UNSPEC_GOTTPOFF; |
| } |
| else |
| { |
| pic = NULL; |
| type = UNSPEC_INDNTPOFF; |
| } |
| |
| off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); |
| off = gen_rtx_CONST (Pmode, off); |
| if (pic) |
| off = gen_rtx_PLUS (Pmode, pic, off); |
| off = gen_const_mem (Pmode, off); |
| set_mem_alias_set (off, ix86_GOT_alias_set ()); |
| |
| if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| { |
| base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
| off = force_reg (Pmode, off); |
| return gen_rtx_PLUS (Pmode, base, off); |
| } |
| else |
| { |
| base = get_thread_pointer (true); |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (gen_subsi3 (dest, base, off)); |
| } |
| break; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
| (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| ? UNSPEC_NTPOFF : UNSPEC_TPOFF); |
| off = gen_rtx_CONST (Pmode, off); |
| |
| if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| { |
| base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
| return gen_rtx_PLUS (Pmode, base, off); |
| } |
| else |
| { |
| base = get_thread_pointer (true); |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (gen_subsi3 (dest, base, off)); |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return dest; |
| } |
| |
| /* Try machine-dependent ways of modifying an illegitimate address |
| to be legitimate. If we find one, return the new, valid address. |
| This macro is used in only one place: `memory_address' in explow.c. |
| |
| OLDX is the address as it was before break_out_memory_refs was called. |
| In some cases it is useful to look at this to decide what needs to be done. |
| |
| MODE and WIN are passed so that this macro can use |
| GO_IF_LEGITIMATE_ADDRESS. |
| |
| It is always safe for this macro to do nothing. It exists to recognize |
| opportunities to optimize the output. |
| |
| For the 80386, we handle X+REG by loading X into a register R and |
| using R+REG. R will go in a general reg and indexing will be used. |
| However, if REG is a broken-out memory address or multiplication, |
| nothing needs to be done because REG can certainly go in a general reg. |
| |
| When -fpic is used, special handling is needed for symbolic references. |
| See comments by legitimize_pic_address in i386.c for details. */ |
| |
| rtx |
| legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) |
| { |
| int changed = 0; |
| unsigned log; |
| |
| if (TARGET_DEBUG_ADDR) |
| { |
| fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", |
| GET_MODE_NAME (mode)); |
| debug_rtx (x); |
| } |
| |
| log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; |
| if (log) |
| return legitimize_tls_address (x, log, false); |
| if (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF |
| && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) |
| { |
| rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); |
| return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); |
| } |
| |
| if (flag_pic && SYMBOLIC_CONST (x)) |
| return legitimize_pic_address (x, 0); |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| #if TARGET_MACHO |
| if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) |
| return machopic_indirect_data_reference (x, 0); |
| #endif |
| /* APPLE LOCAL end dynamic-no-pic */ |
| |
| /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ |
| if (GET_CODE (x) == ASHIFT |
| && GET_CODE (XEXP (x, 1)) == CONST_INT |
| && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) |
| { |
| changed = 1; |
| log = INTVAL (XEXP (x, 1)); |
| x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), |
| GEN_INT (1 << log)); |
| } |
| |
| if (GET_CODE (x) == PLUS) |
| { |
| /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ |
| |
| if (GET_CODE (XEXP (x, 0)) == ASHIFT |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
| && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) |
| { |
| changed = 1; |
| log = INTVAL (XEXP (XEXP (x, 0), 1)); |
| XEXP (x, 0) = gen_rtx_MULT (Pmode, |
| force_reg (Pmode, XEXP (XEXP (x, 0), 0)), |
| GEN_INT (1 << log)); |
| } |
| |
| if (GET_CODE (XEXP (x, 1)) == ASHIFT |
| && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT |
| && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) |
| { |
| changed = 1; |
| log = INTVAL (XEXP (XEXP (x, 1), 1)); |
| XEXP (x, 1) = gen_rtx_MULT (Pmode, |
| force_reg (Pmode, XEXP (XEXP (x, 1), 0)), |
| GEN_INT (1 << log)); |
| } |
| |
| /* Put multiply first if it isn't already. */ |
| if (GET_CODE (XEXP (x, 1)) == MULT) |
| { |
| rtx tmp = XEXP (x, 0); |
| XEXP (x, 0) = XEXP (x, 1); |
| XEXP (x, 1) = tmp; |
| changed = 1; |
| } |
| |
| /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) |
| into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be |
| created by virtual register instantiation, register elimination, and |
| similar optimizations. */ |
| if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) |
| { |
| changed = 1; |
| x = gen_rtx_PLUS (Pmode, |
| gen_rtx_PLUS (Pmode, XEXP (x, 0), |
| XEXP (XEXP (x, 1), 0)), |
| XEXP (XEXP (x, 1), 1)); |
| } |
| |
| /* Canonicalize |
| (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) |
| into (plus (plus (mult (reg) (const)) (reg)) (const)). */ |
| else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS |
| && CONSTANT_P (XEXP (x, 1))) |
| { |
| rtx constant; |
| rtx other = NULL_RTX; |
| |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT) |
| { |
| constant = XEXP (x, 1); |
| other = XEXP (XEXP (XEXP (x, 0), 1), 1); |
| } |
| else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) |
| { |
| constant = XEXP (XEXP (XEXP (x, 0), 1), 1); |
| other = XEXP (x, 1); |
| } |
| else |
| constant = 0; |
| |
| if (constant) |
| { |
| changed = 1; |
| x = gen_rtx_PLUS (Pmode, |
| gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), |
| XEXP (XEXP (XEXP (x, 0), 1), 0)), |
| plus_constant (other, INTVAL (constant))); |
| } |
| } |
| |
| if (changed && legitimate_address_p (mode, x, FALSE)) |
| return x; |
| |
| if (GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| changed = 1; |
| XEXP (x, 0) = force_operand (XEXP (x, 0), 0); |
| } |
| |
| if (GET_CODE (XEXP (x, 1)) == MULT) |
| { |
| changed = 1; |
| XEXP (x, 1) = force_operand (XEXP (x, 1), 0); |
| } |
| |
| if (changed |
| && GET_CODE (XEXP (x, 1)) == REG |
| && GET_CODE (XEXP (x, 0)) == REG) |
| return x; |
| |
| if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) |
| { |
| changed = 1; |
| x = legitimize_pic_address (x, 0); |
| } |
| |
| if (changed && legitimate_address_p (mode, x, FALSE)) |
| return x; |
| |
| if (GET_CODE (XEXP (x, 0)) == REG) |
| { |
| rtx temp = gen_reg_rtx (Pmode); |
| rtx val = force_operand (XEXP (x, 1), temp); |
| if (val != temp) |
| emit_move_insn (temp, val); |
| |
| XEXP (x, 1) = temp; |
| return x; |
| } |
| |
| else if (GET_CODE (XEXP (x, 1)) == REG) |
| { |
| rtx temp = gen_reg_rtx (Pmode); |
| rtx val = force_operand (XEXP (x, 0), temp); |
| if (val != temp) |
| emit_move_insn (temp, val); |
| |
| XEXP (x, 0) = temp; |
| return x; |
| } |
| } |
| |
| return x; |
| } |
| |
| /* Print an integer constant expression in assembler syntax. Addition |
| and subtraction are the only arithmetic that may appear in these |
| expressions. FILE is the stdio stream to write to, X is the rtx, and |
| CODE is the operand print code from the output string. */ |
| |
| static void |
| output_pic_addr_const (FILE *file, rtx x, int code) |
| { |
| char buf[256]; |
| |
| switch (GET_CODE (x)) |
| { |
| case PC: |
| gcc_assert (flag_pic); |
| putc ('.', file); |
| break; |
| |
| case SYMBOL_REF: |
| /* APPLE LOCAL begin axe stubs 5571540 */ |
| if (! TARGET_MACHO || |
| #if TARGET_MACHO |
| ! darwin_stubs || |
| #endif |
| TARGET_64BIT) |
| /* APPLE LOCAL end axe stubs 5571540 */ |
| output_addr_const (file, x); |
| else |
| { |
| const char *name = XSTR (x, 0); |
| |
| /* Mark the decl as referenced so that cgraph will output the function. */ |
| if (SYMBOL_REF_DECL (x)) |
| mark_decl_referenced (SYMBOL_REF_DECL (x)); |
| |
| #if TARGET_MACHO |
| if (MACHOPIC_INDIRECT |
| && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) |
| name = machopic_indirection_name (x, /*stub_p=*/true); |
| #endif |
| assemble_name (file, name); |
| } |
| if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) |
| fputs ("@PLT", file); |
| break; |
| |
| case LABEL_REF: |
| x = XEXP (x, 0); |
| /* FALLTHRU */ |
| case CODE_LABEL: |
| ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); |
| assemble_name (asm_out_file, buf); |
| break; |
| |
| case CONST_INT: |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
| break; |
| |
| case CONST: |
| /* This used to output parentheses around the expression, |
| but that does not work on the 386 (either ATT or BSD assembler). */ |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| break; |
| |
| case CONST_DOUBLE: |
| if (GET_MODE (x) == VOIDmode) |
| { |
| /* We can use %d if the number is <32 bits and positive. */ |
| if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) |
| fprintf (file, "0x%lx%08lx", |
| (unsigned long) CONST_DOUBLE_HIGH (x), |
| (unsigned long) CONST_DOUBLE_LOW (x)); |
| else |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); |
| } |
| else |
| /* We can't handle floating point constants; |
| PRINT_OPERAND must handle them. */ |
| output_operand_lossage ("floating constant misused"); |
| break; |
| |
| case PLUS: |
| /* Some assemblers need integer constants to appear first. */ |
| if (GET_CODE (XEXP (x, 0)) == CONST_INT) |
| { |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| putc ('+', file); |
| output_pic_addr_const (file, XEXP (x, 1), code); |
| } |
| else |
| { |
| gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); |
| output_pic_addr_const (file, XEXP (x, 1), code); |
| putc ('+', file); |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| } |
| break; |
| |
| case MINUS: |
| if (!TARGET_MACHO) |
| putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| putc ('-', file); |
| output_pic_addr_const (file, XEXP (x, 1), code); |
| if (!TARGET_MACHO) |
| putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); |
| break; |
| |
| case UNSPEC: |
| gcc_assert (XVECLEN (x, 0) == 1); |
| output_pic_addr_const (file, XVECEXP (x, 0, 0), code); |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_GOT: |
| fputs ("@GOT", file); |
| break; |
| case UNSPEC_GOTOFF: |
| fputs ("@GOTOFF", file); |
| break; |
| case UNSPEC_GOTPCREL: |
| fputs ("@GOTPCREL(%rip)", file); |
| break; |
| case UNSPEC_GOTTPOFF: |
| /* FIXME: This might be @TPOFF in Sun ld too. */ |
| fputs ("@GOTTPOFF", file); |
| break; |
| case UNSPEC_TPOFF: |
| fputs ("@TPOFF", file); |
| break; |
| case UNSPEC_NTPOFF: |
| if (TARGET_64BIT) |
| fputs ("@TPOFF", file); |
| else |
| fputs ("@NTPOFF", file); |
| break; |
| case UNSPEC_DTPOFF: |
| fputs ("@DTPOFF", file); |
| break; |
| case UNSPEC_GOTNTPOFF: |
| if (TARGET_64BIT) |
| fputs ("@GOTTPOFF(%rip)", file); |
| else |
| fputs ("@GOTNTPOFF", file); |
| break; |
| case UNSPEC_INDNTPOFF: |
| fputs ("@INDNTPOFF", file); |
| break; |
| default: |
| output_operand_lossage ("invalid UNSPEC as operand"); |
| break; |
| } |
| break; |
| |
| default: |
| output_operand_lossage ("invalid expression as operand"); |
| } |
| } |
| |
| /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. |
| We need to emit DTP-relative relocations. */ |
| |
| static void |
| i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
| { |
| fputs (ASM_LONG, file); |
| output_addr_const (file, x); |
| fputs ("@DTPOFF", file); |
| switch (size) |
| { |
| case 4: |
| break; |
| case 8: |
| fputs (", 0", file); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* In the name of slightly smaller debug output, and to cater to |
| general assembler lossage, recognize PIC+GOTOFF and turn it back |
| into a direct symbol reference. |
| |
| On Darwin, this is necessary to avoid a crash, because Darwin |
| has a different PIC label for each routine but the DWARF debugging |
| information is not associated with any particular routine, so it's |
| necessary to remove references to the PIC label from RTL stored by |
| the DWARF output code. */ |
| |
| static rtx |
| ix86_delegitimize_address (rtx orig_x) |
| { |
| rtx x = orig_x; |
| /* reg_addend is NULL or a multiple of some register. */ |
| rtx reg_addend = NULL_RTX; |
| /* const_addend is NULL or a const_int. */ |
| rtx const_addend = NULL_RTX; |
| /* This is the result, or NULL. */ |
| rtx result = NULL_RTX; |
| |
| if (GET_CODE (x) == MEM) |
| x = XEXP (x, 0); |
| |
| if (TARGET_64BIT) |
| { |
| if (GET_CODE (x) != CONST |
| || GET_CODE (XEXP (x, 0)) != UNSPEC |
| || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL |
| || GET_CODE (orig_x) != MEM) |
| return orig_x; |
| return XVECEXP (XEXP (x, 0), 0, 0); |
| } |
| |
| if (GET_CODE (x) != PLUS |
| || GET_CODE (XEXP (x, 1)) != CONST) |
| return orig_x; |
| |
| if (GET_CODE (XEXP (x, 0)) == REG |
| && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) |
| /* %ebx + GOT/GOTOFF */ |
| ; |
| else if (GET_CODE (XEXP (x, 0)) == PLUS) |
| { |
| /* %ebx + %reg * scale + GOT/GOTOFF */ |
| reg_addend = XEXP (x, 0); |
| if (GET_CODE (XEXP (reg_addend, 0)) == REG |
| && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) |
| reg_addend = XEXP (reg_addend, 1); |
| else if (GET_CODE (XEXP (reg_addend, 1)) == REG |
| && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) |
| reg_addend = XEXP (reg_addend, 0); |
| else |
| return orig_x; |
| if (GET_CODE (reg_addend) != REG |
| && GET_CODE (reg_addend) != MULT |
| && GET_CODE (reg_addend) != ASHIFT) |
| return orig_x; |
| } |
| else |
| return orig_x; |
| |
| x = XEXP (XEXP (x, 1), 0); |
| if (GET_CODE (x) == PLUS |
| && GET_CODE (XEXP (x, 1)) == CONST_INT) |
| { |
| const_addend = XEXP (x, 1); |
| x = XEXP (x, 0); |
| } |
| |
| if (GET_CODE (x) == UNSPEC |
| && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) |
| || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) |
| result = XVECEXP (x, 0, 0); |
| |
| if (TARGET_MACHO && darwin_local_data_pic (x) |
| && GET_CODE (orig_x) != MEM) |
| result = XEXP (x, 0); |
| |
| if (! result) |
| return orig_x; |
| |
| if (const_addend) |
| result = gen_rtx_PLUS (Pmode, result, const_addend); |
| if (reg_addend) |
| result = gen_rtx_PLUS (Pmode, reg_addend, result); |
| return result; |
| } |
| |
| static void |
| put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, |
| int fp, FILE *file) |
| { |
| const char *suffix; |
| |
| if (mode == CCFPmode || mode == CCFPUmode) |
| { |
| enum rtx_code second_code, bypass_code; |
| ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); |
| gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); |
| code = ix86_fp_compare_code_to_integer (code); |
| mode = CCmode; |
| } |
| if (reverse) |
| code = reverse_condition (code); |
| |
| switch (code) |
| { |
| case EQ: |
| suffix = "e"; |
| break; |
| case NE: |
| suffix = "ne"; |
| break; |
| case GT: |
| gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); |
| suffix = "g"; |
| break; |
| case GTU: |
| /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. |
| Those same assemblers have the same but opposite lossage on cmov. */ |
| gcc_assert (mode == CCmode); |
| suffix = fp ? "nbe" : "a"; |
| break; |
| case LT: |
| switch (mode) |
| { |
| case CCNOmode: |
| case CCGOCmode: |
| suffix = "s"; |
| break; |
| |
| case CCmode: |
| case CCGCmode: |
| suffix = "l"; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| case LTU: |
| gcc_assert (mode == CCmode); |
| suffix = "b"; |
| break; |
| case GE: |
| switch (mode) |
| { |
| case CCNOmode: |
| case CCGOCmode: |
| suffix = "ns"; |
| break; |
| |
| case CCmode: |
| case CCGCmode: |
| suffix = "ge"; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| case GEU: |
| /* ??? As above. */ |
| gcc_assert (mode == CCmode); |
| suffix = fp ? "nb" : "ae"; |
| break; |
| case LE: |
| gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); |
| suffix = "le"; |
| break; |
| case LEU: |
| gcc_assert (mode == CCmode); |
| suffix = "be"; |
| break; |
| case UNORDERED: |
| suffix = fp ? "u" : "p"; |
| break; |
| case ORDERED: |
| suffix = fp ? "nu" : "np"; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| fputs (suffix, file); |
| } |
| |
| /* Print the name of register X to FILE based on its machine mode and number. |
| If CODE is 'w', pretend the mode is HImode. |
| If CODE is 'b', pretend the mode is QImode. |
| If CODE is 'k', pretend the mode is SImode. |
| If CODE is 'q', pretend the mode is DImode. |
| If CODE is 'h', pretend the reg is the 'high' byte register. |
| If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ |
| |
| void |
| print_reg (rtx x, int code, FILE *file) |
| { |
| gcc_assert (REGNO (x) != ARG_POINTER_REGNUM |
| && REGNO (x) != FRAME_POINTER_REGNUM |
| && REGNO (x) != FLAGS_REG |
| && REGNO (x) != FPSR_REG); |
| |
| if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) |
| putc ('%', file); |
| |
| if (code == 'w' || MMX_REG_P (x)) |
| code = 2; |
| else if (code == 'b') |
| code = 1; |
| else if (code == 'k') |
| code = 4; |
| else if (code == 'q') |
| code = 8; |
| else if (code == 'y') |
| code = 3; |
| else if (code == 'h') |
| code = 0; |
| else |
| code = GET_MODE_SIZE (GET_MODE (x)); |
| |
| /* Irritatingly, AMD extended registers use different naming convention |
| from the normal registers. */ |
| if (REX_INT_REG_P (x)) |
| { |
| gcc_assert (TARGET_64BIT); |
| switch (code) |
| { |
| case 0: |
| error ("extended registers have no high halves"); |
| break; |
| case 1: |
| fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); |
| break; |
| case 2: |
| fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); |
| break; |
| case 4: |
| fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); |
| break; |
| case 8: |
| fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); |
| break; |
| default: |
| error ("unsupported operand size for extended register"); |
| break; |
| } |
| return; |
| } |
| switch (code) |
| { |
| case 3: |
| if (STACK_TOP_P (x)) |
| { |
| fputs ("st(0)", file); |
| break; |
| } |
| /* FALLTHRU */ |
| case 8: |
| case 4: |
| case 12: |
| if (! ANY_FP_REG_P (x)) |
| putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); |
| /* FALLTHRU */ |
| case 16: |
| case 2: |
| normal: |
| fputs (hi_reg_name[REGNO (x)], file); |
| break; |
| case 1: |
| if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) |
| goto normal; |
| fputs (qi_reg_name[REGNO (x)], file); |
| break; |
| case 0: |
| if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) |
| goto normal; |
| fputs (qi_high_reg_name[REGNO (x)], file); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Locate some local-dynamic symbol still in use by this function |
| so that we can print its name in some tls_local_dynamic_base |
| pattern. */ |
| |
| static const char * |
| get_some_local_dynamic_name (void) |
| { |
| rtx insn; |
| |
| if (cfun->machine->some_ld_name) |
| return cfun->machine->some_ld_name; |
| |
| for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) |
| if (INSN_P (insn) |
| && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) |
| return cfun->machine->some_ld_name; |
| |
| gcc_unreachable (); |
| } |
| |
| static int |
| get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) |
| { |
| rtx x = *px; |
| |
| if (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) |
| { |
| cfun->machine->some_ld_name = XSTR (x, 0); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Meaning of CODE: |
| L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
| C -- print opcode suffix for set/cmov insn. |
| c -- like C, but print reversed condition |
| F,f -- likewise, but for floating-point. |
| O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
| otherwise nothing |
| R -- print the prefix for register names. |
| z -- print the opcode suffix for the size of the current operand. |
| * -- print a star (in certain assembler syntax) |
| A -- print an absolute memory reference. |
| w -- print the operand as if it's a "word" (HImode) even if it isn't. |
| s -- print a shift double count, followed by the assemblers argument |
| delimiter. |
| b -- print the QImode name of the register for the indicated operand. |
| %b0 would print %al if operands[0] is reg 0. |
| w -- likewise, print the HImode name of the register. |
| k -- likewise, print the SImode name of the register. |
| q -- likewise, print the DImode name of the register. |
| h -- print the QImode name for a "high" register, either ah, bh, ch or dh. |
| y -- print "st(0)" instead of "st" as a register. |
| D -- print condition for SSE cmp instruction. |
| P -- if PIC, print an @PLT suffix. |
| X -- don't print any sort of PIC '@' suffix for a symbol. |
| & -- print some in-use local-dynamic symbol name. |
| H -- print a memory address offset by 8; used for sse high-parts |
| */ |
| |
| void |
| print_operand (FILE *file, rtx x, int code) |
| { |
| if (code) |
| { |
| switch (code) |
| { |
| case '*': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('*', file); |
| return; |
| |
| case '&': |
| assemble_name (file, get_some_local_dynamic_name ()); |
| return; |
| |
| case 'A': |
| switch (ASSEMBLER_DIALECT) |
| { |
| case ASM_ATT: |
| putc ('*', file); |
| break; |
| |
| case ASM_INTEL: |
| /* Intel syntax. For absolute addresses, registers should not |
| be surrounded by braces. */ |
| if (GET_CODE (x) != REG) |
| { |
| putc ('[', file); |
| PRINT_OPERAND (file, x, 0); |
| putc (']', file); |
| return; |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| PRINT_OPERAND (file, x, 0); |
| return; |
| |
| |
| case 'L': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('l', file); |
| return; |
| |
| case 'W': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('w', file); |
| return; |
| |
| case 'B': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('b', file); |
| return; |
| |
| case 'Q': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('l', file); |
| return; |
| |
| case 'S': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('s', file); |
| return; |
| |
| case 'T': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('t', file); |
| return; |
| |
| case 'z': |
| /* 387 opcodes don't get size suffixes if the operands are |
| registers. */ |
| if (STACK_REG_P (x)) |
| return; |
| |
| /* Likewise if using Intel opcodes. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| return; |
| |
| /* This is the size of op from size of operand. */ |
| switch (GET_MODE_SIZE (GET_MODE (x))) |
| { |
| case 2: |
| #ifdef HAVE_GAS_FILDS_FISTS |
| putc ('s', file); |
| #endif |
| return; |
| |
| case 4: |
| if (GET_MODE (x) == SFmode) |
| { |
| putc ('s', file); |
| return; |
| } |
| else |
| putc ('l', file); |
| return; |
| |
| case 12: |
| case 16: |
| putc ('t', file); |
| return; |
| |
| case 8: |
| if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
| { |
| #ifdef GAS_MNEMONICS |
| putc ('q', file); |
| #else |
| putc ('l', file); |
| putc ('l', file); |
| #endif |
| } |
| else |
| putc ('l', file); |
| return; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| case 'b': |
| case 'w': |
| case 'k': |
| case 'q': |
| case 'h': |
| case 'y': |
| case 'X': |
| case 'P': |
| break; |
| |
| case 's': |
| if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) |
| { |
| PRINT_OPERAND (file, x, 0); |
| putc (',', file); |
| } |
| return; |
| |
| case 'D': |
| /* Little bit of braindamage here. The SSE compare instructions |
| does use completely different names for the comparisons that the |
| fp conditional moves. */ |
| switch (GET_CODE (x)) |
| { |
| case EQ: |
| case UNEQ: |
| fputs ("eq", file); |
| break; |
| case LT: |
| case UNLT: |
| fputs ("lt", file); |
| break; |
| case LE: |
| case UNLE: |
| fputs ("le", file); |
| break; |
| case UNORDERED: |
| fputs ("unord", file); |
| break; |
| case NE: |
| case LTGT: |
| fputs ("neq", file); |
| break; |
| case UNGE: |
| case GE: |
| fputs ("nlt", file); |
| break; |
| case UNGT: |
| case GT: |
| fputs ("nle", file); |
| break; |
| case ORDERED: |
| fputs ("ord", file); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| return; |
| case 'O': |
| #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| { |
| switch (GET_MODE (x)) |
| { |
| case HImode: putc ('w', file); break; |
| case SImode: |
| case SFmode: putc ('l', file); break; |
| case DImode: |
| case DFmode: putc ('q', file); break; |
| default: gcc_unreachable (); |
| } |
| putc ('.', file); |
| } |
| #endif |
| return; |
| case 'C': |
| put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); |
| return; |
| case 'F': |
| #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('.', file); |
| #endif |
| put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); |
| return; |
| |
| /* Like above, but reverse condition */ |
| case 'c': |
| /* Check to see if argument to %c is really a constant |
| and not a condition code which needs to be reversed. */ |
| if (!COMPARISON_P (x)) |
| { |
| output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); |
| return; |
| } |
| put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); |
| return; |
| case 'f': |
| #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('.', file); |
| #endif |
| put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); |
| return; |
| |
| case 'H': |
| /* It doesn't actually matter what mode we use here, as we're |
| only going to use this for printing. */ |
| x = adjust_address_nv (x, DImode, 8); |
| break; |
| |
| case '+': |
| { |
| rtx x; |
| |
| if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) |
| return; |
| |
| x = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
| if (x) |
| { |
| int pred_val = INTVAL (XEXP (x, 0)); |
| |
| if (pred_val < REG_BR_PROB_BASE * 45 / 100 |
| || pred_val > REG_BR_PROB_BASE * 55 / 100) |
| { |
| int taken = pred_val > REG_BR_PROB_BASE / 2; |
| int cputaken = final_forward_branch_p (current_output_insn) == 0; |
| |
| /* Emit hints only in the case default branch prediction |
| heuristics would fail. */ |
| if (taken != cputaken) |
| { |
| /* We use 3e (DS) prefix for taken branches and |
| 2e (CS) prefix for not taken branches. */ |
| if (taken) |
| fputs ("ds ; ", file); |
| else |
| fputs ("cs ; ", file); |
| } |
| } |
| } |
| return; |
| } |
| default: |
| output_operand_lossage ("invalid operand code '%c'", code); |
| } |
| } |
| |
| if (GET_CODE (x) == REG) |
| print_reg (x, code, file); |
| |
| else if (GET_CODE (x) == MEM) |
| { |
| /* No `byte ptr' prefix for call instructions. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') |
| { |
| const char * size; |
| switch (GET_MODE_SIZE (GET_MODE (x))) |
| { |
| case 1: size = "BYTE"; break; |
| case 2: size = "WORD"; break; |
| case 4: size = "DWORD"; break; |
| case 8: size = "QWORD"; break; |
| case 12: size = "XWORD"; break; |
| case 16: size = "XMMWORD"; break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Check for explicit size override (codes 'b', 'w' and 'k') */ |
| if (code == 'b') |
| size = "BYTE"; |
| else if (code == 'w') |
| size = "WORD"; |
| else if (code == 'k') |
| size = "DWORD"; |
| |
| fputs (size, file); |
| fputs (" PTR ", file); |
| } |
| |
| x = XEXP (x, 0); |
| /* Avoid (%rip) for call operands. */ |
| if (CONSTANT_ADDRESS_P (x) && code == 'P' |
| && GET_CODE (x) != CONST_INT) |
| output_addr_const (file, x); |
| else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) |
| output_operand_lossage ("invalid constraints for operand"); |
| else |
| output_address (x); |
| } |
| |
| else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) |
| { |
| REAL_VALUE_TYPE r; |
| long l; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
| REAL_VALUE_TO_TARGET_SINGLE (r, l); |
| |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('$', file); |
| fprintf (file, "0x%08lx", l); |
| } |
| |
| /* These float cases don't actually occur as immediate operands. */ |
| else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) |
| { |
| char dstr[30]; |
| |
| real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
| fprintf (file, "%s", dstr); |
| } |
| |
| else if (GET_CODE (x) == CONST_DOUBLE |
| && GET_MODE (x) == XFmode) |
| { |
| char dstr[30]; |
| |
| real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
| fprintf (file, "%s", dstr); |
| } |
| |
| else |
| { |
| /* We have patterns that allow zero sets of memory, for instance. |
| In 64-bit mode, we should probably support all 8-byte vectors, |
| since we can in fact encode that into an immediate. */ |
| if (GET_CODE (x) == CONST_VECTOR) |
| { |
| gcc_assert (x == CONST0_RTX (GET_MODE (x))); |
| x = const0_rtx; |
| } |
| |
| if (code != 'P') |
| { |
| if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) |
| { |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('$', file); |
| } |
| else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF |
| || GET_CODE (x) == LABEL_REF) |
| { |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('$', file); |
| else |
| fputs ("OFFSET FLAT:", file); |
| } |
| } |
| if (GET_CODE (x) == CONST_INT) |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| else if (flag_pic || (TARGET_MACHO && MACHOPIC_INDIRECT)) |
| /* APPLE LOCAL end dynamic-no-pic */ |
| output_pic_addr_const (file, x, code); |
| else |
| output_addr_const (file, x); |
| } |
| } |
| |
| /* Print a memory operand whose address is ADDR. */ |
| |
| void |
| print_operand_address (FILE *file, rtx addr) |
| { |
| struct ix86_address parts; |
| rtx base, index, disp; |
| int scale; |
| int ok = ix86_decompose_address (addr, &parts); |
| |
| gcc_assert (ok); |
| |
| base = parts.base; |
| index = parts.index; |
| disp = parts.disp; |
| scale = parts.scale; |
| |
| switch (parts.seg) |
| { |
| case SEG_DEFAULT: |
| break; |
| case SEG_FS: |
| case SEG_GS: |
| if (USER_LABEL_PREFIX[0] == 0) |
| putc ('%', file); |
| fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (!base && !index) |
| { |
| /* Displacement only requires special attention. */ |
| |
| if (GET_CODE (disp) == CONST_INT) |
| { |
| if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) |
| { |
| if (USER_LABEL_PREFIX[0] == 0) |
| putc ('%', file); |
| fputs ("ds:", file); |
| } |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); |
| } |
| else if (flag_pic) |
| output_pic_addr_const (file, disp, 0); |
| else |
| output_addr_const (file, disp); |
| |
| /* Use one byte shorter RIP relative addressing for 64bit mode. */ |
| if (TARGET_64BIT) |
| { |
| if (GET_CODE (disp) == CONST |
| && GET_CODE (XEXP (disp, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) |
| disp = XEXP (XEXP (disp, 0), 0); |
| if (GET_CODE (disp) == LABEL_REF |
| || (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == 0)) |
| fputs ("(%rip)", file); |
| } |
| } |
| else |
| { |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| { |
| if (disp) |
| { |
| if (flag_pic) |
| output_pic_addr_const (file, disp, 0); |
| else if (GET_CODE (disp) == LABEL_REF) |
| output_asm_label (disp); |
| else |
| output_addr_const (file, disp); |
| } |
| |
| putc ('(', file); |
| if (base) |
| print_reg (base, 0, file); |
| if (index) |
| { |
| putc (',', file); |
| print_reg (index, 0, file); |
| if (scale != 1) |
| fprintf (file, ",%d", scale); |
| } |
| putc (')', file); |
| } |
| else |
| { |
| rtx offset = NULL_RTX; |
| |
| if (disp) |
| { |
| /* Pull out the offset of a symbol; print any symbol itself. */ |
| if (GET_CODE (disp) == CONST |
| && GET_CODE (XEXP (disp, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) |
| { |
| offset = XEXP (XEXP (disp, 0), 1); |
| disp = gen_rtx_CONST (VOIDmode, |
| XEXP (XEXP (disp, 0), 0)); |
| } |
| |
| if (flag_pic) |
| output_pic_addr_const (file, disp, 0); |
| else if (GET_CODE (disp) == LABEL_REF) |
| output_asm_label (disp); |
| else if (GET_CODE (disp) == CONST_INT) |
| offset = disp; |
| else |
| output_addr_const (file, disp); |
| } |
| |
| putc ('[', file); |
| if (base) |
| { |
| print_reg (base, 0, file); |
| if (offset) |
| { |
| if (INTVAL (offset) >= 0) |
| putc ('+', file); |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
| } |
| } |
| else if (offset) |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); |
| else |
| putc ('0', file); |
| |
| if (index) |
| { |
| putc ('+', file); |
| print_reg (index, 0, file); |
| if (scale != 1) |
| fprintf (file, "*%d", scale); |
| } |
| putc (']', file); |
| } |
| } |
| } |
| |
| bool |
| output_addr_const_extra (FILE *file, rtx x) |
| { |
| rtx op; |
| |
| if (GET_CODE (x) != UNSPEC) |
| return false; |
| |
| op = XVECEXP (x, 0, 0); |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_GOTTPOFF: |
| output_addr_const (file, op); |
| /* FIXME: This might be @TPOFF in Sun ld. */ |
| fputs ("@GOTTPOFF", file); |
| break; |
| case UNSPEC_TPOFF: |
| output_addr_const (file, op); |
| fputs ("@TPOFF", file); |
| break; |
| case UNSPEC_NTPOFF: |
| output_addr_const (file, op); |
| if (TARGET_64BIT) |
| fputs ("@TPOFF", file); |
| else |
| fputs ("@NTPOFF", file); |
| break; |
| case UNSPEC_DTPOFF: |
| output_addr_const (file, op); |
| fputs ("@DTPOFF", file); |
| break; |
| case UNSPEC_GOTNTPOFF: |
| output_addr_const (file, op); |
| if (TARGET_64BIT) |
| fputs ("@GOTTPOFF(%rip)", file); |
| else |
| fputs ("@GOTNTPOFF", file); |
| break; |
| case UNSPEC_INDNTPOFF: |
| output_addr_const (file, op); |
| fputs ("@INDNTPOFF", file); |
| break; |
| |
| default: |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Split one or more DImode RTL references into pairs of SImode |
| references. The RTL can be REG, offsettable MEM, integer constant, or |
| CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to |
| split and "num" is its length. lo_half and hi_half are output arrays |
| that parallel "operands". */ |
| |
| void |
| split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) |
| { |
| while (num--) |
| { |
| rtx op = operands[num]; |
| |
| /* simplify_subreg refuse to split volatile memory addresses, |
| but we still have to handle it. */ |
| if (GET_CODE (op) == MEM) |
| { |
| lo_half[num] = adjust_address (op, SImode, 0); |
| hi_half[num] = adjust_address (op, SImode, 4); |
| } |
| else |
| { |
| lo_half[num] = simplify_gen_subreg (SImode, op, |
| GET_MODE (op) == VOIDmode |
| ? DImode : GET_MODE (op), 0); |
| hi_half[num] = simplify_gen_subreg (SImode, op, |
| GET_MODE (op) == VOIDmode |
| ? DImode : GET_MODE (op), 4); |
| } |
| } |
| } |
| /* Split one or more TImode RTL references into pairs of DImode |
| references. The RTL can be REG, offsettable MEM, integer constant, or |
| CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to |
| split and "num" is its length. lo_half and hi_half are output arrays |
| that parallel "operands". */ |
| |
| void |
| split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) |
| { |
| while (num--) |
| { |
| rtx op = operands[num]; |
| |
| /* simplify_subreg refuse to split volatile memory addresses, but we |
| still have to handle it. */ |
| if (GET_CODE (op) == MEM) |
| { |
| lo_half[num] = adjust_address (op, DImode, 0); |
| hi_half[num] = adjust_address (op, DImode, 8); |
| } |
| else |
| { |
| lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); |
| hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); |
| } |
| } |
| } |
| |
| /* Output code to perform a 387 binary operation in INSN, one of PLUS, |
| MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] |
| is the expression of the binary operation. The output may either be |
| emitted here, or returned to the caller, like all output_* functions. |
| |
| There is no guarantee that the operands are the same mode, as they |
| might be within FLOAT or FLOAT_EXTEND expressions. */ |
| |
| #ifndef SYSV386_COMPAT |
| /* Set to 1 for compatibility with brain-damaged assemblers. No-one |
| wants to fix the assemblers because that causes incompatibility |
| with gcc. No-one wants to fix gcc because that causes |
| incompatibility with assemblers... You can use the option of |
| -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ |
| #define SYSV386_COMPAT 1 |
| #endif |
| |
| const char * |
| output_387_binary_op (rtx insn, rtx *operands) |
| { |
| static char buf[30]; |
| const char *p; |
| const char *ssep; |
| int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); |
| |
| #ifdef ENABLE_CHECKING |
| /* Even if we do not want to check the inputs, this documents input |
| constraints. Which helps in understanding the following code. */ |
| if (STACK_REG_P (operands[0]) |
| && ((REG_P (operands[1]) |
| && REGNO (operands[0]) == REGNO (operands[1]) |
| && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) |
| || (REG_P (operands[2]) |
| && REGNO (operands[0]) == REGNO (operands[2]) |
| && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) |
| && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) |
| ; /* ok */ |
| else |
| gcc_assert (is_sse); |
| #endif |
| |
| switch (GET_CODE (operands[3])) |
| { |
| case PLUS: |
| if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
| || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
| p = "fiadd"; |
| else |
| p = "fadd"; |
| ssep = "add"; |
| break; |
| |
| case MINUS: |
| if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
| || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
| p = "fisub"; |
| else |
| p = "fsub"; |
| ssep = "sub"; |
| break; |
| |
| case MULT: |
| if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
| || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
| p = "fimul"; |
| else |
| p = "fmul"; |
| ssep = "mul"; |
| break; |
| |
| case DIV: |
| if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT |
| || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) |
| p = "fidiv"; |
| else |
| p = "fdiv"; |
| ssep = "div"; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (is_sse) |
| { |
| strcpy (buf, ssep); |
| if (GET_MODE (operands[0]) == SFmode) |
| strcat (buf, "ss\t{%2, %0|%0, %2}"); |
| else |
| strcat (buf, "sd\t{%2, %0|%0, %2}"); |
| return buf; |
| } |
| strcpy (buf, p); |
| |
| switch (GET_CODE (operands[3])) |
| { |
| case MULT: |
| case PLUS: |
| if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) |
| { |
| rtx temp = operands[2]; |
| operands[2] = operands[1]; |
| operands[1] = temp; |
| } |
| |
| /* know operands[0] == operands[1]. */ |
| |
| if (GET_CODE (operands[2]) == MEM) |
| { |
| p = "%z2\t%2"; |
| break; |
| } |
| |
| if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
| { |
| if (STACK_TOP_P (operands[0])) |
| /* How is it that we are storing to a dead operand[2]? |
| Well, presumably operands[1] is dead too. We can't |
| store the result to st(0) as st(0) gets popped on this |
| instruction. Instead store to operands[2] (which I |
| think has to be st(1)). st(1) will be popped later. |
| gcc <= 2.8.1 didn't have this check and generated |
| assembly code that the Unixware assembler rejected. */ |
| p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ |
| else |
| p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ |
| break; |
| } |
| |
| if (STACK_TOP_P (operands[0])) |
| p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ |
| else |
| p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ |
| break; |
| |
| case MINUS: |
| case DIV: |
| if (GET_CODE (operands[1]) == MEM) |
| { |
| p = "r%z1\t%1"; |
| break; |
| } |
| |
| if (GET_CODE (operands[2]) == MEM) |
| { |
| p = "%z2\t%2"; |
| break; |
| } |
| |
| if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) |
| { |
| #if SYSV386_COMPAT |
| /* The SystemV/386 SVR3.2 assembler, and probably all AT&T |
| derived assemblers, confusingly reverse the direction of |
| the operation for fsub{r} and fdiv{r} when the |
| destination register is not st(0). The Intel assembler |
| doesn't have this brain damage. Read !SYSV386_COMPAT to |
| figure out what the hardware really does. */ |
| if (STACK_TOP_P (operands[0])) |
| p = "{p\t%0, %2|rp\t%2, %0}"; |
| else |
| p = "{rp\t%2, %0|p\t%0, %2}"; |
| #else |
| if (STACK_TOP_P (operands[0])) |
| /* As above for fmul/fadd, we can't store to st(0). */ |
| p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ |
| else |
| p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ |
| #endif |
| break; |
| } |
| |
| if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
| { |
| #if SYSV386_COMPAT |
| if (STACK_TOP_P (operands[0])) |
| p = "{rp\t%0, %1|p\t%1, %0}"; |
| else |
| p = "{p\t%1, %0|rp\t%0, %1}"; |
| #else |
| if (STACK_TOP_P (operands[0])) |
| p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ |
| else |
| p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ |
| #endif |
| break; |
| } |
| |
| if (STACK_TOP_P (operands[0])) |
| { |
| if (STACK_TOP_P (operands[1])) |
| p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ |
| else |
| p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ |
| break; |
| } |
| else if (STACK_TOP_P (operands[1])) |
| { |
| #if SYSV386_COMPAT |
| p = "{\t%1, %0|r\t%0, %1}"; |
| #else |
| p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ |
| #endif |
| } |
| else |
| { |
| #if SYSV386_COMPAT |
| p = "{r\t%2, %0|\t%0, %2}"; |
| #else |
| p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ |
| #endif |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| strcat (buf, p); |
| return buf; |
| } |
| |
| /* Return needed mode for entity in optimize_mode_switching pass. */ |
| |
| int |
| ix86_mode_needed (int entity, rtx insn) |
| { |
| enum attr_i387_cw mode; |
| |
| /* The mode UNINITIALIZED is used to store control word after a |
| function call or ASM pattern. The mode ANY specify that function |
| has no requirements on the control word and make no changes in the |
| bits we are interested in. */ |
| |
| if (CALL_P (insn) |
| || (NONJUMP_INSN_P (insn) |
| && (asm_noperands (PATTERN (insn)) >= 0 |
| || GET_CODE (PATTERN (insn)) == ASM_INPUT))) |
| return I387_CW_UNINITIALIZED; |
| |
| if (recog_memoized (insn) < 0) |
| return I387_CW_ANY; |
| |
| mode = get_attr_i387_cw (insn); |
| |
| switch (entity) |
| { |
| case I387_TRUNC: |
| if (mode == I387_CW_TRUNC) |
| return mode; |
| break; |
| |
| case I387_FLOOR: |
| if (mode == I387_CW_FLOOR) |
| return mode; |
| break; |
| |
| case I387_CEIL: |
| if (mode == I387_CW_CEIL) |
| return mode; |
| break; |
| |
| case I387_MASK_PM: |
| if (mode == I387_CW_MASK_PM) |
| return mode; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return I387_CW_ANY; |
| } |
| |
| /* Output code to initialize control word copies used by trunc?f?i and |
| rounding patterns. CURRENT_MODE is set to current control word, |
| while NEW_MODE is set to new control word. */ |
| |
| void |
| emit_i387_cw_initialization (int mode) |
| { |
| rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); |
| rtx new_mode; |
| |
| int slot; |
| |
| rtx reg = gen_reg_rtx (HImode); |
| |
| emit_insn (gen_x86_fnstcw_1 (stored_mode)); |
| emit_move_insn (reg, stored_mode); |
| |
| if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) |
| { |
| switch (mode) |
| { |
| case I387_CW_TRUNC: |
| /* round toward zero (truncate) */ |
| emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); |
| slot = SLOT_CW_TRUNC; |
| break; |
| |
| case I387_CW_FLOOR: |
| /* round down toward -oo */ |
| emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
| emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); |
| slot = SLOT_CW_FLOOR; |
| break; |
| |
| case I387_CW_CEIL: |
| /* round up toward +oo */ |
| emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); |
| emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); |
| slot = SLOT_CW_CEIL; |
| break; |
| |
| case I387_CW_MASK_PM: |
| /* mask precision exception for nearbyint() */ |
| emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); |
| slot = SLOT_CW_MASK_PM; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| else |
| { |
| switch (mode) |
| { |
| case I387_CW_TRUNC: |
| /* round toward zero (truncate) */ |
| emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); |
| slot = SLOT_CW_TRUNC; |
| break; |
| |
| case I387_CW_FLOOR: |
| /* round down toward -oo */ |
| emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); |
| slot = SLOT_CW_FLOOR; |
| break; |
| |
| case I387_CW_CEIL: |
| /* round up toward +oo */ |
| emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); |
| slot = SLOT_CW_CEIL; |
| break; |
| |
| case I387_CW_MASK_PM: |
| /* mask precision exception for nearbyint() */ |
| emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); |
| slot = SLOT_CW_MASK_PM; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| gcc_assert (slot < MAX_386_STACK_LOCALS); |
| |
| new_mode = assign_386_stack_local (HImode, slot); |
| emit_move_insn (new_mode, reg); |
| } |
| |
| /* Output code for INSN to convert a float to a signed int. OPERANDS |
| are the insn operands. The output may be [HSD]Imode and the input |
| operand may be [SDX]Fmode. */ |
| |
| const char * |
| output_fix_trunc (rtx insn, rtx *operands, int fisttp) |
| { |
| int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; |
| int dimode_p = GET_MODE (operands[0]) == DImode; |
| int round_mode = get_attr_i387_cw (insn); |
| |
| /* Jump through a hoop or two for DImode, since the hardware has no |
| non-popping instruction. We used to do this a different way, but |
| that was somewhat fragile and broke with post-reload splitters. */ |
| if ((dimode_p || fisttp) && !stack_top_dies) |
| output_asm_insn ("fld\t%y1", operands); |
| |
| gcc_assert (STACK_TOP_P (operands[1])); |
| gcc_assert (GET_CODE (operands[0]) == MEM); |
| |
| if (fisttp) |
| output_asm_insn ("fisttp%z0\t%0", operands); |
| else |
| { |
| if (round_mode != I387_CW_ANY) |
| output_asm_insn ("fldcw\t%3", operands); |
| if (stack_top_dies || dimode_p) |
| output_asm_insn ("fistp%z0\t%0", operands); |
| else |
| output_asm_insn ("fist%z0\t%0", operands); |
| if (round_mode != I387_CW_ANY) |
| output_asm_insn ("fldcw\t%2", operands); |
| } |
| |
| return ""; |
| } |
| |
| /* Output code for x87 ffreep insn. The OPNO argument, which may only |
| have the values zero or one, indicates the ffreep insn's operand |
| from the OPERANDS array. */ |
| |
| static const char * |
| output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) |
| { |
| if (TARGET_USE_FFREEP) |
| #if HAVE_AS_IX86_FFREEP |
| return opno ? "ffreep\t%y1" : "ffreep\t%y0"; |
| #else |
| switch (REGNO (operands[opno])) |
| { |
| case FIRST_STACK_REG + 0: return ".word\t0xc0df"; |
| case FIRST_STACK_REG + 1: return ".word\t0xc1df"; |
| case FIRST_STACK_REG + 2: return ".word\t0xc2df"; |
| case FIRST_STACK_REG + 3: return ".word\t0xc3df"; |
| case FIRST_STACK_REG + 4: return ".word\t0xc4df"; |
| case FIRST_STACK_REG + 5: return ".word\t0xc5df"; |
| case FIRST_STACK_REG + 6: return ".word\t0xc6df"; |
| case FIRST_STACK_REG + 7: return ".word\t0xc7df"; |
| } |
| #endif |
| |
| return opno ? "fstp\t%y1" : "fstp\t%y0"; |
| } |
| |
| |
| /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi |
| should be used. UNORDERED_P is true when fucom should be used. */ |
| |
| const char * |
| output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) |
| { |
| int stack_top_dies; |
| rtx cmp_op0, cmp_op1; |
| int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); |
| |
| if (eflags_p) |
| { |
| cmp_op0 = operands[0]; |
| cmp_op1 = operands[1]; |
| } |
| else |
| { |
| cmp_op0 = operands[1]; |
| cmp_op1 = operands[2]; |
| } |
| |
| if (is_sse) |
| { |
| if (GET_MODE (operands[0]) == SFmode) |
| if (unordered_p) |
| return "ucomiss\t{%1, %0|%0, %1}"; |
| else |
| return "comiss\t{%1, %0|%0, %1}"; |
| else |
| if (unordered_p) |
| return "ucomisd\t{%1, %0|%0, %1}"; |
| else |
| return "comisd\t{%1, %0|%0, %1}"; |
| } |
| |
| gcc_assert (STACK_TOP_P (cmp_op0)); |
| |
| stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; |
| |
| if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) |
| { |
| if (stack_top_dies) |
| { |
| output_asm_insn ("ftst\n\tfnstsw\t%0", operands); |
| return output_387_ffreep (operands, 1); |
| } |
| else |
| return "ftst\n\tfnstsw\t%0"; |
| } |
| |
| if (STACK_REG_P (cmp_op1) |
| && stack_top_dies |
| && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) |
| && REGNO (cmp_op1) != FIRST_STACK_REG) |
| { |
| /* If both the top of the 387 stack dies, and the other operand |
| is also a stack register that dies, then this must be a |
| `fcompp' float compare */ |
| |
| if (eflags_p) |
| { |
| /* There is no double popping fcomi variant. Fortunately, |
| eflags is immune from the fstp's cc clobbering. */ |
| if (unordered_p) |
| output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); |
| else |
| output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); |
| return output_387_ffreep (operands, 0); |
| } |
| else |
| { |
| if (unordered_p) |
| return "fucompp\n\tfnstsw\t%0"; |
| else |
| return "fcompp\n\tfnstsw\t%0"; |
| } |
| } |
| else |
| { |
| /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ |
| |
| static const char * const alt[16] = |
| { |
| "fcom%z2\t%y2\n\tfnstsw\t%0", |
| "fcomp%z2\t%y2\n\tfnstsw\t%0", |
| "fucom%z2\t%y2\n\tfnstsw\t%0", |
| "fucomp%z2\t%y2\n\tfnstsw\t%0", |
| |
| "ficom%z2\t%y2\n\tfnstsw\t%0", |
| "ficomp%z2\t%y2\n\tfnstsw\t%0", |
| NULL, |
| NULL, |
| |
| "fcomi\t{%y1, %0|%0, %y1}", |
| "fcomip\t{%y1, %0|%0, %y1}", |
| "fucomi\t{%y1, %0|%0, %y1}", |
| "fucomip\t{%y1, %0|%0, %y1}", |
| |
| NULL, |
| NULL, |
| NULL, |
| NULL |
| }; |
| |
| int mask; |
| const char *ret; |
| |
| mask = eflags_p << 3; |
| mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; |
| mask |= unordered_p << 1; |
| mask |= stack_top_dies; |
| |
| gcc_assert (mask < 16); |
| ret = alt[mask]; |
| gcc_assert (ret); |
| |
| return ret; |
| } |
| } |
| |
| void |
| ix86_output_addr_vec_elt (FILE *file, int value) |
| { |
| const char *directive = ASM_LONG; |
| |
| #ifdef ASM_QUAD |
| if (TARGET_64BIT) |
| directive = ASM_QUAD; |
| #else |
| gcc_assert (!TARGET_64BIT); |
| #endif |
| |
| fprintf (file, "%s%s%d\n", directive, LPREFIX, value); |
| } |
| |
| void |
| ix86_output_addr_diff_elt (FILE *file, int value, int rel) |
| { |
| if (TARGET_64BIT) |
| fprintf (file, "%s%s%d-%s%d\n", |
| ASM_LONG, LPREFIX, value, LPREFIX, rel); |
| else if (HAVE_AS_GOTOFF_IN_DATA) |
| fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); |
| #if TARGET_MACHO |
| else if (TARGET_MACHO) |
| { |
| fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); |
| machopic_output_function_base_name (file); |
| fprintf(file, "\n"); |
| } |
| #endif |
| else |
| asm_fprintf (file, "%s%U%s+[.-%s%d]\n", |
| ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); |
| } |
| |
| /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate |
| for the target. */ |
| |
| void |
| ix86_expand_clear (rtx dest) |
| { |
| rtx tmp; |
| |
| /* We play register width games, which are only valid after reload. */ |
| gcc_assert (reload_completed); |
| |
| /* Avoid HImode and its attendant prefix byte. */ |
| if (GET_MODE_SIZE (GET_MODE (dest)) < 4) |
| dest = gen_rtx_REG (SImode, REGNO (dest)); |
| |
| tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); |
| |
| /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ |
| if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) |
| { |
| rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); |
| tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); |
| } |
| |
| emit_insn (tmp); |
| } |
| |
| /* X is an unchanging MEM. If it is a constant pool reference, return |
| the constant pool rtx, else NULL. */ |
| |
| rtx |
| maybe_get_pool_constant (rtx x) |
| { |
| x = ix86_delegitimize_address (XEXP (x, 0)); |
| |
| if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) |
| return get_pool_constant (x); |
| |
| return NULL_RTX; |
| } |
| |
| void |
| ix86_expand_move (enum machine_mode mode, rtx operands[]) |
| { |
| int strict = (reload_in_progress || reload_completed); |
| /* APPLE LOCAL dynamic-no-pic */ |
| rtx insn, op0, op1; |
| enum tls_model model; |
| |
| op0 = operands[0]; |
| op1 = operands[1]; |
| |
| if (GET_CODE (op1) == SYMBOL_REF) |
| { |
| model = SYMBOL_REF_TLS_MODEL (op1); |
| if (model) |
| { |
| op1 = legitimize_tls_address (op1, model, true); |
| op1 = force_operand (op1, op0); |
| if (op1 == op0) |
| return; |
| } |
| } |
| else if (GET_CODE (op1) == CONST |
| && GET_CODE (XEXP (op1, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) |
| { |
| model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); |
| if (model) |
| { |
| rtx addend = XEXP (XEXP (op1, 0), 1); |
| op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); |
| op1 = force_operand (op1, NULL); |
| op1 = expand_simple_binop (Pmode, PLUS, op1, addend, |
| op0, 1, OPTAB_DIRECT); |
| if (op1 == op0) |
| return; |
| } |
| } |
| |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| /* allow macho & macho for x86_64 to coexist */ |
| if (((TARGET_MACHO && MACHOPIC_INDIRECT) |
| || flag_pic) |
| && mode == Pmode && symbolic_operand (op1, Pmode)) |
| /* APPLE LOCAL end dynamic-no-pic */ |
| { |
| if (TARGET_MACHO && !TARGET_64BIT) |
| { |
| #if TARGET_MACHO |
| /* APPLE LOCAL begin dynamic-no-pic */ |
| if (MACHOPIC_INDIRECT) |
| { |
| rtx temp = ((reload_in_progress |
| || ((op0 && GET_CODE (op0) == REG) |
| && mode == Pmode)) |
| ? op0 : gen_reg_rtx (Pmode)); |
| op1 = machopic_indirect_data_reference (op1, temp); |
| if (MACHOPIC_PURE) |
| op1 = machopic_legitimize_pic_address (op1, mode, |
| temp == op1 ? 0 : temp); |
| } |
| if (op0 != op1 && GET_CODE (op0) != MEM) |
| { |
| insn = gen_rtx_SET (VOIDmode, op0, op1); |
| emit_insn (insn); |
| return; |
| } |
| if (GET_CODE (op0) == MEM) |
| op1 = force_reg (Pmode, op1); |
| else |
| { |
| rtx temp = op0; |
| if (GET_CODE (temp) != REG) |
| temp = gen_reg_rtx (Pmode); |
| temp = legitimize_pic_address (op1, temp); |
| if (temp == op0) |
| return; |
| op1 = temp; |
| } |
| /* APPLE LOCAL end dynamic-no-pic */ |
| #endif |
| } |
| else |
| { |
| if (GET_CODE (op0) == MEM) |
| op1 = force_reg (Pmode, op1); |
| else |
| op1 = legitimize_address (op1, op1, Pmode); |
| } |
| } |
| else |
| { |
| if (GET_CODE (op0) == MEM |
| && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) |
| || !push_operand (op0, mode)) |
| && GET_CODE (op1) == MEM) |
| op1 = force_reg (mode, op1); |
| |
| if (push_operand (op0, mode) |
| && ! general_no_elim_operand (op1, mode)) |
| op1 = copy_to_mode_reg (mode, op1); |
| |
| /* Force large constants in 64bit compilation into register |
| to get them CSEed. */ |
| if (TARGET_64BIT && mode == DImode |
| && immediate_operand (op1, mode) |
| && !x86_64_zext_immediate_operand (op1, VOIDmode) |
| && !register_operand (op0, mode) |
| && optimize && !reload_completed && !reload_in_progress) |
| op1 = copy_to_mode_reg (mode, op1); |
| |
| if (FLOAT_MODE_P (mode)) |
| { |
| /* If we are loading a floating point constant to a register, |
| force the value to memory now, since we'll get better code |
| out the back end. */ |
| |
| if (strict) |
| ; |
| else if (GET_CODE (op1) == CONST_DOUBLE) |
| { |
| op1 = validize_mem (force_const_mem (mode, op1)); |
| if (!register_operand (op0, mode)) |
| { |
| rtx temp = gen_reg_rtx (mode); |
| emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); |
| emit_move_insn (op0, temp); |
| return; |
| } |
| } |
| } |
| } |
| |
| emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); |
| } |
| |
| void |
| ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) |
| { |
| rtx op0 = operands[0], op1 = operands[1]; |
| /* APPLE LOCAL begin radar 4614623 */ |
| cfun->uses_vector = 1; |
| /* APPLE LOCAL end radar 4614623 */ |
| |
| /* Force constants other than zero into memory. We do not know how |
| the instructions used to build constants modify the upper 64 bits |
| of the register, once we have that information we may be able |
| to handle some of them more efficiently. */ |
| if ((reload_in_progress | reload_completed) == 0 |
| && register_operand (op0, mode) |
| && CONSTANT_P (op1) |
| && standard_sse_constant_p (op1) <= 0) |
| op1 = validize_mem (force_const_mem (mode, op1)); |
| |
| /* Make operand1 a register if it isn't already. */ |
| if (!no_new_pseudos |
| && !register_operand (op0, mode) |
| && !register_operand (op1, mode)) |
| { |
| emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); |
| return; |
| } |
| |
| emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); |
| } |
| |
| /* Implement the movmisalign patterns for SSE. Non-SSE modes go |
| straight to ix86_expand_vector_move. */ |
| |
| void |
| ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) |
| { |
| rtx op0, op1, m; |
| |
| op0 = operands[0]; |
| op1 = operands[1]; |
| |
| if (MEM_P (op1)) |
| { |
| /* If we're optimizing for size, movups is the smallest. */ |
| if (optimize_size) |
| { |
| op0 = gen_lowpart (V4SFmode, op0); |
| op1 = gen_lowpart (V4SFmode, op1); |
| emit_insn (gen_sse_movups (op0, op1)); |
| return; |
| } |
| |
| /* ??? If we have typed data, then it would appear that using |
| movdqu is the only way to get unaligned data loaded with |
| integer type. */ |
| if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| { |
| op0 = gen_lowpart (V16QImode, op0); |
| op1 = gen_lowpart (V16QImode, op1); |
| emit_insn (gen_sse2_movdqu (op0, op1)); |
| return; |
| } |
| |
| if (TARGET_SSE2 && mode == V2DFmode) |
| { |
| rtx zero; |
| |
| /* When SSE registers are split into halves, we can avoid |
| writing to the top half twice. */ |
| if (TARGET_SSE_SPLIT_REGS) |
| { |
| emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); |
| zero = op0; |
| } |
| else |
| { |
| /* ??? Not sure about the best option for the Intel chips. |
| The following would seem to satisfy; the register is |
| entirely cleared, breaking the dependency chain. We |
| then store to the upper half, with a dependency depth |
| of one. A rumor has it that Intel recommends two movsd |
| followed by an unpacklpd, but this is unconfirmed. And |
| given that the dependency depth of the unpacklpd would |
| still be one, I'm not sure why this would be better. */ |
| zero = CONST0_RTX (V2DFmode); |
| } |
| |
| m = adjust_address (op1, DFmode, 0); |
| emit_insn (gen_sse2_loadlpd (op0, zero, m)); |
| m = adjust_address (op1, DFmode, 8); |
| emit_insn (gen_sse2_loadhpd (op0, op0, m)); |
| } |
| else |
| { |
| if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) |
| emit_move_insn (op0, CONST0_RTX (mode)); |
| else |
| emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); |
| |
| if (mode != V4SFmode) |
| op0 = gen_lowpart (V4SFmode, op0); |
| m = adjust_address (op1, V2SFmode, 0); |
| emit_insn (gen_sse_loadlps (op0, op0, m)); |
| m = adjust_address (op1, V2SFmode, 8); |
| emit_insn (gen_sse_loadhps (op0, op0, m)); |
| } |
| } |
| else if (MEM_P (op0)) |
| { |
| /* If we're optimizing for size, movups is the smallest. */ |
| if (optimize_size) |
| { |
| op0 = gen_lowpart (V4SFmode, op0); |
| op1 = gen_lowpart (V4SFmode, op1); |
| emit_insn (gen_sse_movups (op0, op1)); |
| return; |
| } |
| |
| /* ??? Similar to above, only less clear because of quote |
| typeless stores unquote. */ |
| if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES |
| && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) |
| { |
| op0 = gen_lowpart (V16QImode, op0); |
| op1 = gen_lowpart (V16QImode, op1); |
| emit_insn (gen_sse2_movdqu (op0, op1)); |
| return; |
| } |
| |
| if (TARGET_SSE2 && mode == V2DFmode) |
| { |
| m = adjust_address (op0, DFmode, 0); |
| emit_insn (gen_sse2_storelpd (m, op1)); |
| m = adjust_address (op0, DFmode, 8); |
| emit_insn (gen_sse2_storehpd (m, op1)); |
| } |
| else |
| { |
| if (mode != V4SFmode) |
| op1 = gen_lowpart (V4SFmode, op1); |
| m = adjust_address (op0, V2SFmode, 0); |
| emit_insn (gen_sse_storelps (m, op1)); |
| m = adjust_address (op0, V2SFmode, 8); |
| emit_insn (gen_sse_storehps (m, op1)); |
| } |
| } |
| else |
| gcc_unreachable (); |
| } |
| |
| /* Expand a push in MODE. This is some mode for which we do not support |
| proper push instructions, at least from the registers that we expect |
| the value to live in. */ |
| |
| void |
| ix86_expand_push (enum machine_mode mode, rtx x) |
| { |
| rtx tmp; |
| |
| tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, |
| GEN_INT (-GET_MODE_SIZE (mode)), |
| stack_pointer_rtx, 1, OPTAB_DIRECT); |
| if (tmp != stack_pointer_rtx) |
| emit_move_insn (stack_pointer_rtx, tmp); |
| |
| tmp = gen_rtx_MEM (mode, stack_pointer_rtx); |
| emit_move_insn (tmp, x); |
| } |
| |
| /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the |
| destination to use for the operation. If different from the true |
| destination in operands[0], a copy operation will be required. */ |
| |
| rtx |
| ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, |
| rtx operands[]) |
| { |
| int matching_memory; |
| rtx src1, src2, dst; |
| |
| dst = operands[0]; |
| src1 = operands[1]; |
| src2 = operands[2]; |
| |
| /* Recognize <var1> = <value> <op> <var1> for commutative operators */ |
| if (GET_RTX_CLASS (code) == RTX_COMM_ARITH |
| && (rtx_equal_p (dst, src2) |
| || immediate_operand (src1, mode))) |
| { |
| rtx temp = src1; |
| src1 = src2; |
| src2 = temp; |
| } |
| |
| /* If the destination is memory, and we do not have matching source |
| operands, do things in registers. */ |
| matching_memory = 0; |
| if (GET_CODE (dst) == MEM) |
| { |
| if (rtx_equal_p (dst, src1)) |
| matching_memory = 1; |
| else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH |
| && rtx_equal_p (dst, src2)) |
| matching_memory = 2; |
| else |
| dst = gen_reg_rtx (mode); |
| } |
| |
| /* Both source operands cannot be in memory. */ |
| if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) |
| { |
| if (matching_memory != 2) |
| src2 = force_reg (mode, src2); |
| else |
| src1 = force_reg (mode, src1); |
| } |
| |
| /* If the operation is not commutable, source 1 cannot be a constant |
| or non-matching memory. */ |
| if ((CONSTANT_P (src1) |
| || (!matching_memory && GET_CODE (src1) == MEM)) |
| && GET_RTX_CLASS (code) != RTX_COMM_ARITH) |
| src1 = force_reg (mode, src1); |
| |
| src1 = operands[1] = src1; |
| src2 = operands[2] = src2; |
| return dst; |
| } |
| |
| /* Similarly, but assume that the destination has already been |
| set up properly. */ |
| |
| void |
| ix86_fixup_binary_operands_no_copy (enum rtx_code code, |
| enum machine_mode mode, rtx operands[]) |
| { |
| rtx dst = ix86_fixup_binary_operands (code, mode, operands); |
| gcc_assert (dst == operands[0]); |
| } |
| |
| /* Attempt to expand a binary operator. Make the expansion closer to the |
| actual machine, then just general_operand, which will allow 3 separate |
| memory references (one output, two input) in a single insn. */ |
| |
| void |
| ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, |
| rtx operands[]) |
| { |
| rtx src1, src2, dst, op, clob; |
| |
| dst = ix86_fixup_binary_operands (code, mode, operands); |
| src1 = operands[1]; |
| src2 = operands[2]; |
| |
| /* Emit the instruction. */ |
| |
| op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); |
| if (reload_in_progress) |
| { |
| /* Reload doesn't know about the flags register, and doesn't know that |
| it doesn't want to clobber it. We can only do this with PLUS. */ |
| gcc_assert (code == PLUS); |
| emit_insn (op); |
| } |
| else |
| { |
| clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); |
| emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); |
| } |
| |
| /* Fix up the destination if needed. */ |
| if (dst != operands[0]) |
| emit_move_insn (operands[0], dst); |
| } |
| |
| /* Return TRUE or FALSE depending on whether the binary operator meets the |
| appropriate constraints. */ |
| |
| int |
| ix86_binary_operator_ok (enum rtx_code code, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| rtx operands[3]) |
| { |
| /* Both source operands cannot be in memory. */ |
| if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) |
| return 0; |
| /* If the operation is not commutable, source 1 cannot be a constant. */ |
| if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) |
| return 0; |
| /* If the destination is memory, we must have a matching source operand. */ |
| if (GET_CODE (operands[0]) == MEM |
| && ! (rtx_equal_p (operands[0], operands[1]) |
| || (GET_RTX_CLASS (code) == RTX_COMM_ARITH |
| && rtx_equal_p (operands[0], operands[2])))) |
| return 0; |
| /* If the operation is not commutable and the source 1 is memory, we must |
| have a matching destination. */ |
| if (GET_CODE (operands[1]) == MEM |
| && GET_RTX_CLASS (code) != RTX_COMM_ARITH |
| && ! rtx_equal_p (operands[0], operands[1])) |
| return 0; |
| return 1; |
| } |
| |
| /* Attempt to expand a unary operator. Make the expansion closer to the |
| actual machine, then just general_operand, which will allow 2 separate |
| memory references (one output, one input) in a single insn. */ |
| |
| void |
| ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, |
| rtx operands[]) |
| { |
| int matching_memory; |
| rtx src, dst, op, clob; |
| |
| dst = operands[0]; |
| src = operands[1]; |
| |
| /* If the destination is memory, and we do not have matching source |
| operands, do things in registers. */ |
| matching_memory = 0; |
| if (MEM_P (dst)) |
| { |
| if (rtx_equal_p (dst, src)) |
| matching_memory = 1; |
| else |
| dst = gen_reg_rtx (mode); |
| } |
| |
| /* When source operand is memory, destination must match. */ |
| if (MEM_P (src) && !matching_memory) |
| src = force_reg (mode, src); |
| |
| /* Emit the instruction. */ |
| |
| op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); |
| if (reload_in_progress || code == NOT) |
| { |
| /* Reload doesn't know about the flags register, and doesn't know that |
| it doesn't want to clobber it. */ |
| gcc_assert (code == NOT); |
| emit_insn (op); |
| } |
| else |
| { |
| clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); |
| emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); |
| } |
| |
| /* Fix up the destination if needed. */ |
| if (dst != operands[0]) |
| emit_move_insn (operands[0], dst); |
| } |
| |
| /* Return TRUE or FALSE depending on whether the unary operator meets the |
| appropriate constraints. */ |
| |
| int |
| ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| rtx operands[2] ATTRIBUTE_UNUSED) |
| { |
| /* If one of operands is memory, source and destination must match. */ |
| if ((GET_CODE (operands[0]) == MEM |
| || GET_CODE (operands[1]) == MEM) |
| && ! rtx_equal_p (operands[0], operands[1])) |
| return FALSE; |
| return TRUE; |
| } |
| |
| /* APPLE LOCAL begin 4176531 4424891 */ |
| static void |
| ix86_expand_vector_move2 (enum machine_mode mode, rtx op0, rtx op1) |
| { |
| rtx operands[2]; |
| operands[0] = op0; |
| operands[1] = op1; |
| ix86_expand_vector_move (mode, operands); |
| } |
| |
| static rtvec |
| gen_2_4_rtvec (int scalars_per_vector, rtx val, enum machine_mode mode) |
| { |
| rtvec rval; |
| switch (scalars_per_vector) |
| { |
| case 2: rval = gen_rtvec (2, val, CONST0_RTX (mode)); |
| break; |
| case 4: rval = gen_rtvec (4, val, CONST0_RTX (mode), |
| CONST0_RTX (mode), CONST0_RTX (mode)); |
| break; |
| default: abort (); |
| } |
| return rval; |
| } |
| |
| /* Convert a DFmode value in an SSE register into an unsigned SImode. |
| When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64 |
| conversion, and ignoring the upper 32 bits of the result. On |
| x86_64, there is an equivalent SSE %xmm->signed-int-64 conversion. |
| On x86_32, we don't have the instruction, nor the 64-bit |
| destination register it requires. Do the conversion inline in the |
| SSE registers. Requires SSE2. For x86_32, -mfpmath=sse, |
| !optimize_size only. */ |
| const char * |
| ix86_expand_convert_uns_DF2SI_sse (rtx operands[]) |
| { |
| rtx int_zero_as_fp, int_maxval_as_fp, int_two31_as_fp; |
| REAL_VALUE_TYPE rvt_zero, rvt_int_maxval, rvt_int_two31; |
| rtx int_zero_as_xmm, int_maxval_as_xmm; |
| rtx fp_value = operands[1]; |
| rtx target = operands[0]; |
| rtx large_xmm; |
| rtx large_xmm_v2di; |
| rtx le_op; |
| rtx zero_or_two31_xmm; |
| rtx final_result_rtx; |
| rtx v_rtx; |
| rtx incoming_value; |
| |
| cfun->uses_vector = 1; |
| |
| real_from_integer (&rvt_zero, DFmode, 0ULL, 0ULL, 1); |
| int_zero_as_fp = const_double_from_real_value (rvt_zero, DFmode); |
| |
| real_from_integer (&rvt_int_maxval, DFmode, 0xffffffffULL, 0ULL, 1); |
| int_maxval_as_fp = const_double_from_real_value (rvt_int_maxval, DFmode); |
| |
| real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1); |
| int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode); |
| |
| incoming_value = force_reg (GET_MODE (operands[1]), operands[1]); |
| |
| gcc_assert (ix86_preferred_stack_boundary >= 128); |
| |
| fp_value = gen_reg_rtx (V2DFmode); |
| ix86_expand_vector_move2 (V2DFmode, fp_value, |
| gen_rtx_SUBREG (V2DFmode, incoming_value, 0)); |
| large_xmm = gen_reg_rtx (V2DFmode); |
| |
| v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, |
| gen_2_4_rtvec (2, int_two31_as_fp, DFmode)); |
| ix86_expand_vector_move2 (DFmode, large_xmm, v_rtx); |
| le_op = gen_rtx_fmt_ee (LE, V2DFmode, |
| gen_rtx_SUBREG (V2DFmode, fp_value, 0), large_xmm); |
| /* large_xmm = (fp_value >= 2**31) ? -1 : 0 ; */ |
| emit_insn (gen_sse2_vmmaskcmpv2df3 (large_xmm, large_xmm, fp_value, le_op)); |
| |
| int_maxval_as_xmm = gen_reg_rtx (V2DFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, |
| gen_2_4_rtvec (2, int_maxval_as_fp, DFmode)); |
| ix86_expand_vector_move2 (DFmode, int_maxval_as_xmm, v_rtx); |
| |
| emit_insn (gen_sse2_vmsminv2df3 (fp_value, fp_value, int_maxval_as_xmm)); |
| |
| int_zero_as_xmm = gen_reg_rtx (V2DFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, |
| gen_2_4_rtvec (2, int_zero_as_fp, DFmode)); |
| |
| ix86_expand_vector_move2 (DFmode, int_zero_as_xmm, v_rtx); |
| |
| emit_insn (gen_sse2_vmsmaxv2df3 (fp_value, fp_value, int_zero_as_xmm)); |
| |
| zero_or_two31_xmm = gen_reg_rtx (V2DFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, |
| gen_2_4_rtvec (2, int_two31_as_fp, DFmode)); |
| ix86_expand_vector_move2 (DFmode, zero_or_two31_xmm, v_rtx); |
| |
| /* zero_or_two31 = (large_xmm) ? 2**31 : 0; */ |
| emit_insn (gen_andv2df3 (zero_or_two31_xmm, zero_or_two31_xmm, large_xmm)); |
| /* if (large_xmm) fp_value -= 2**31; */ |
| emit_insn (gen_subv2df3 (fp_value, fp_value, zero_or_two31_xmm)); |
| /* assert (0 <= fp_value && fp_value < 2**31); |
| int_result = trunc (fp_value); */ |
| final_result_rtx = gen_reg_rtx (V4SImode); |
| emit_insn (gen_sse2_cvttpd2dq (final_result_rtx, fp_value)); |
| |
| large_xmm_v2di = gen_reg_rtx (V2DImode); |
| emit_move_insn (large_xmm_v2di, gen_rtx_SUBREG (V2DImode, large_xmm, 0)); |
| emit_insn (gen_ashlv2di3 (large_xmm_v2di, large_xmm_v2di, |
| gen_rtx_CONST_INT (SImode, 31))); |
| |
| emit_insn (gen_xorv4si3 (final_result_rtx, final_result_rtx, |
| gen_rtx_SUBREG (V4SImode, large_xmm_v2di, 0))); |
| if (!rtx_equal_p (target, final_result_rtx)) |
| emit_insn (gen_sse2_stored (target, final_result_rtx)); |
| return ""; |
| } |
| |
| /* Convert a SFmode value in an SSE register into an unsigned DImode. |
| When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64 |
| conversion, and subsequently ignoring the upper 32 bits of the |
| result. On x86_64, there is an equivalent SSE %xmm->signed-int-64 |
| conversion. On x86_32, we don't have the instruction, nor the |
| 64-bit destination register it requires. Do the conversion inline |
| in the SSE registers. Requires SSE2. For x86_32, -mfpmath=sse, |
| !optimize_size only. */ |
| const char * |
| ix86_expand_convert_uns_SF2SI_sse (rtx operands[]) |
| { |
| rtx int_zero_as_fp, int_two31_as_fp, int_two32_as_fp; |
| REAL_VALUE_TYPE rvt_zero, rvt_int_two31, rvt_int_two32; |
| rtx int_zero_as_xmm; |
| rtx fp_value = operands[1]; |
| rtx target = operands[0]; |
| rtx large_xmm; |
| rtx two31_xmm, two32_xmm; |
| rtx above_two31_xmm, above_two32_xmm; |
| rtx zero_or_two31_SI_xmm; |
| rtx le_op; |
| rtx zero_or_two31_SF_xmm; |
| rtx int_result_xmm; |
| rtx v_rtx; |
| rtx incoming_value; |
| #ifdef ENABLE_LLVM |
| gcc_assert(0 && "rtx floating point conversion?"); |
| return 0; |
| #endif |
| |
| cfun->uses_vector = 1; |
| |
| real_from_integer (&rvt_zero, SFmode, 0ULL, 0ULL, 1); |
| int_zero_as_fp = const_double_from_real_value (rvt_zero, SFmode); |
| |
| real_from_integer (&rvt_int_two31, SFmode, 0x80000000ULL, 0ULL, 1); |
| int_two31_as_fp = const_double_from_real_value (rvt_int_two31, SFmode); |
| |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| /* This warns if HOST_WIDE_INT < 64. */ |
| real_from_integer (&rvt_int_two32, SFmode, (HOST_WIDE_INT)0x100000000ULL, |
| 0ULL, 1); |
| #endif |
| /* LLVM LOCAL end */ |
| int_two32_as_fp = const_double_from_real_value (rvt_int_two32, SFmode); |
| |
| incoming_value = force_reg (GET_MODE (operands[1]), operands[1]); |
| |
| gcc_assert (ix86_preferred_stack_boundary >= 128); |
| |
| fp_value = gen_reg_rtx (V4SFmode); |
| ix86_expand_vector_move2 (V4SFmode, fp_value, |
| gen_rtx_SUBREG (V4SFmode, incoming_value, 0)); |
| large_xmm = gen_reg_rtx (V4SFmode); |
| |
| /* fp_value = MAX (fp_value, 0.0); */ |
| /* Preclude negative values; truncate at zero. */ |
| int_zero_as_xmm = gen_reg_rtx (V4SFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V4SFmode, |
| gen_2_4_rtvec (4, int_zero_as_fp, SFmode)); |
| ix86_expand_vector_move2 (SFmode, int_zero_as_xmm, v_rtx); |
| emit_insn (gen_sse_vmsmaxv4sf3 (fp_value, fp_value, int_zero_as_xmm)); |
| |
| /* two31_xmm = 0x8000000; */ |
| two31_xmm = gen_reg_rtx (V4SFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V4SFmode, |
| gen_2_4_rtvec (4, int_two31_as_fp, SFmode)); |
| ix86_expand_vector_move2 (SFmode, two31_xmm, v_rtx); |
| |
| /* zero_or_two31_xmm = 0x8000000; */ |
| zero_or_two31_SF_xmm = gen_reg_rtx (V4SFmode); |
| ix86_expand_vector_move2 (SFmode, zero_or_two31_SF_xmm, two31_xmm); |
| |
| /* above_two31_xmm = (fp_value >= 2**31) ? 0xffff_ffff : 0 ; */ |
| above_two31_xmm = gen_reg_rtx (V4SFmode); |
| ix86_expand_vector_move2 (SFmode, above_two31_xmm, two31_xmm); |
| le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two31_xmm, |
| gen_rtx_SUBREG (V4SFmode, two31_xmm, 0)); |
| emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two31_xmm, above_two31_xmm, |
| fp_value, le_op)); |
| |
| /* two32_xmm = 0x1_0000_0000; */ |
| two32_xmm = gen_reg_rtx (V4SFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V4SFmode, |
| gen_2_4_rtvec (4, int_two32_as_fp, SFmode)); |
| ix86_expand_vector_move2 (SFmode, two32_xmm, v_rtx); |
| |
| /* above_two32_xmm = (fp_value >= 2**32) ? 0xffff_ffff : 0 ; */ |
| above_two32_xmm = gen_reg_rtx (V4SFmode); |
| ix86_expand_vector_move2 (SFmode, above_two32_xmm, two32_xmm); |
| le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two32_xmm, |
| gen_rtx_SUBREG (V4SFmode, two32_xmm, 0)); |
| emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two32_xmm, above_two32_xmm, |
| fp_value, le_op)); |
| |
| /* zero_or_two31_SF_xmm = (above_two31_xmm) ? 2**31 : 0; */ |
| emit_insn (gen_andv4sf3 (zero_or_two31_SF_xmm, zero_or_two31_SF_xmm, |
| above_two31_xmm)); |
| |
| /* zero_or_two31_SI_xmm = (above_two31_xmm & 0x8000_0000); */ |
| zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode); |
| emit_move_insn (zero_or_two31_SI_xmm, |
| gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0)); |
| emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm, |
| gen_rtx_CONST_INT (SImode, 31))); |
| |
| /* zero_or_two31_SI_xmm = (above_two_31_xmm << 31); */ |
| zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode); |
| emit_move_insn (zero_or_two31_SI_xmm, |
| gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0)); |
| emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm, |
| gen_rtx_CONST_INT (SImode, 31))); |
| |
| /* if (above_two31_xmm) fp_value -= 2**31; */ |
| /* If the input FP value is greater than 2**31, subtract that amount |
| from the FP value before conversion. We'll re-add that amount as |
| an integer after the conversion. */ |
| emit_insn (gen_subv4sf3 (fp_value, fp_value, zero_or_two31_SF_xmm)); |
| |
| /* assert (0.0 <= fp_value && fp_value < 2**31); |
| int_result_xmm = trunc (fp_value); */ |
| /* Apply the SSE double -> signed_int32 conversion to our biased, |
| clamped SF value. */ |
| int_result_xmm = gen_reg_rtx (V4SImode); |
| emit_insn (gen_sse2_cvttps2dq (int_result_xmm, fp_value)); |
| |
| /* int_result_xmm += zero_or_two_31_SI_xmm; */ |
| /* Restore the 2**31 bias we may have subtracted earlier. If the |
| input FP value was between 2**31 and 2**32, this will unbias the |
| result. |
| |
| input_fp_value < 2**31: this won't change the value |
| 2**31 <= input_fp_value < 2**32: |
| this will restore the 2**31 bias we subtracted earler |
| input_fp_value >= 2**32: this insn doesn't matter; |
| the next insn will clobber this result |
| */ |
| emit_insn (gen_addv4si3 (int_result_xmm, int_result_xmm, |
| zero_or_two31_SI_xmm)); |
| |
| /* int_result_xmm |= above_two32_xmm; */ |
| /* If the input value was greater than 2**32, force the integral |
| result to 0xffff_ffff. */ |
| emit_insn (gen_iorv4si3 (int_result_xmm, int_result_xmm, |
| gen_rtx_SUBREG (V4SImode, above_two32_xmm, 0))); |
| |
| if (!rtx_equal_p (target, int_result_xmm)) |
| emit_insn (gen_sse2_stored (target, int_result_xmm)); |
| return ""; |
| } |
| |
| /* Convert an unsigned DImode value into a DFmode, using only SSE. |
| Expects the 64-bit DImode to be supplied as two 32-bit parts in two |
| SSE %xmm registers; result returned in an %xmm register. Requires |
| SSE2; will use SSE3 if available. For x86_32, -mfpmath=sse, |
| !optimize_size only. */ |
| const char * |
| ix86_expand_convert_uns_DI2DF_sse (rtx operands[]) |
| { |
| REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; |
| rtx bias_lo_rtx, bias_hi_rtx; |
| rtx target = operands[0]; |
| rtx fp_value = operands[1]; |
| rtx fp_value_hi, fp_value_lo; |
| rtx fp_value_hi_xmm, fp_value_lo_xmm; |
| rtx int_xmm; |
| rtx final_result_xmm, result_lo_xmm; |
| rtx biases, exponents; |
| rtvec biases_rtvec, exponents_rtvec; |
| |
| #ifdef ENABLE_LLVM |
| gcc_assert(0 && "rtx floating point conversion?"); |
| return 0; |
| #endif |
| |
| cfun->uses_vector = 1; |
| |
| gcc_assert (ix86_preferred_stack_boundary >= 128); |
| |
| int_xmm = gen_reg_rtx (V4SImode); |
| |
| fp_value = force_reg (GET_MODE (operands[1]), operands[1]); |
| |
| fp_value_lo = gen_rtx_SUBREG (SImode, fp_value, 0); |
| fp_value_lo_xmm = gen_reg_rtx (V4SImode); |
| emit_insn (gen_sse2_loadld (fp_value_lo_xmm, CONST0_RTX (V4SImode), |
| fp_value_lo)); |
| |
| fp_value_hi = gen_rtx_SUBREG (SImode, fp_value, 4); |
| fp_value_hi_xmm = gen_reg_rtx (V4SImode); |
| emit_insn (gen_sse2_loadld (fp_value_hi_xmm, CONST0_RTX (V4SImode), |
| fp_value_hi)); |
| |
| ix86_expand_vector_move2 (V4SImode, int_xmm, fp_value_hi_xmm); |
| emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, fp_value_lo_xmm)); |
| |
| exponents_rtvec = gen_rtvec (4, GEN_INT (0x45300000UL), |
| GEN_INT (0x43300000UL), |
| CONST0_RTX (SImode), CONST0_RTX (SImode)); |
| exponents = validize_mem ( |
| force_const_mem (V4SImode, gen_rtx_CONST_VECTOR (V4SImode, |
| exponents_rtvec))); |
| emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); |
| |
| final_result_xmm = gen_reg_rtx (V2DFmode); |
| ix86_expand_vector_move2 (V2DFmode, final_result_xmm, |
| gen_rtx_SUBREG (V2DFmode, int_xmm, 0)); |
| |
| /* Integral versions of the DFmode 'exponents' above. */ |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| /* These get warnings if HOST_WIDE_INT < 64 */ |
| REAL_VALUE_FROM_INT (bias_lo_rvt, 0x00000000000000ULL, 0x100000ULL, DFmode); |
| REAL_VALUE_FROM_INT (bias_hi_rvt, 0x10000000000000ULL, 0x000000ULL, DFmode); |
| #endif |
| /* LLVM LOCAL end */ |
| bias_lo_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_lo_rvt, DFmode); |
| bias_hi_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_hi_rvt, DFmode); |
| biases_rtvec = gen_rtvec (2, bias_lo_rtx, bias_hi_rtx); |
| biases = validize_mem (force_const_mem (V2DFmode, |
| gen_rtx_CONST_VECTOR (V2DFmode, |
| biases_rtvec))); |
| emit_insn (gen_subv2df3 (final_result_xmm, final_result_xmm, biases)); |
| |
| if (TARGET_SSE3) |
| { |
| emit_insn (gen_sse3_haddv2df3 (final_result_xmm, final_result_xmm, |
| final_result_xmm)); |
| } |
| else |
| { |
| result_lo_xmm = gen_reg_rtx (V2DFmode); |
| ix86_expand_vector_move2 (V2DFmode, result_lo_xmm, final_result_xmm); |
| emit_insn (gen_sse2_unpckhpd (final_result_xmm, final_result_xmm, |
| final_result_xmm)); |
| emit_insn (gen_addv2df3 (final_result_xmm, final_result_xmm, |
| result_lo_xmm)); |
| } |
| |
| if (!rtx_equal_p (target, final_result_xmm)) |
| emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0)); |
| |
| return ""; |
| } |
| /* APPLE LOCAL end 4176531 4424891 */ |
| |
| /* APPLE LOCAL begin 4424891 */ |
| /* Convert an unsigned SImode value into a DFmode, using only SSE. |
| Result returned in an %xmm register. For x86_32, -mfpmath=sse, |
| !optimize_size only. */ |
| const char * |
| ix86_expand_convert_uns_SI2DF_sse (rtx operands[]) |
| { |
| REAL_VALUE_TYPE rvt_int_two31; |
| rtx int_value_reg; |
| rtx fp_value_xmm, fp_value_as_int_xmm; |
| rtx final_result_xmm; |
| rtx int_two31_as_fp, int_two31_as_fp_vec; |
| rtx v_rtx; |
| rtx target = operands[0]; |
| |
| gcc_assert (ix86_preferred_stack_boundary >= 128); |
| gcc_assert (GET_MODE (operands[1]) == SImode); |
| |
| cfun->uses_vector = 1; |
| |
| int_value_reg = gen_reg_rtx (SImode); |
| emit_move_insn (int_value_reg, operands[1]); |
| emit_insn (gen_addsi3 (int_value_reg, int_value_reg, |
| GEN_INT (-2147483648LL /* MIN_INT */))); |
| |
| fp_value_as_int_xmm = gen_reg_rtx (V4SImode); |
| emit_insn (gen_sse2_loadld (fp_value_as_int_xmm, CONST0_RTX (V4SImode), |
| int_value_reg)); |
| |
| fp_value_xmm = gen_reg_rtx (V2DFmode); |
| emit_insn (gen_sse2_cvtdq2pd (fp_value_xmm, |
| gen_rtx_SUBREG (V4SImode, |
| fp_value_as_int_xmm, 0))); |
| |
| real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1); |
| int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode); |
| v_rtx = gen_rtx_CONST_VECTOR (V2DFmode, |
| gen_2_4_rtvec (2, int_two31_as_fp, DFmode)); |
| |
| int_two31_as_fp_vec = validize_mem (force_const_mem (V2DFmode, v_rtx)); |
| |
| final_result_xmm = gen_reg_rtx (V2DFmode); |
| emit_move_insn (final_result_xmm, fp_value_xmm); |
| emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm, |
| int_two31_as_fp_vec)); |
| |
| if (!rtx_equal_p (target, final_result_xmm)) |
| emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0)); |
| |
| return ""; |
| } |
| |
| /* Convert a signed DImode value into a DFmode, using only SSE. |
| Result returned in an %xmm register. For x86_32, -mfpmath=sse, |
| !optimize_size only. */ |
| const char * |
| ix86_expand_convert_sign_DI2DF_sse (rtx operands[]) |
| { |
| rtx my_operands[2]; |
| REAL_VALUE_TYPE rvt_int_two32; |
| rtx rvt_int_two32_vec; |
| rtx fp_value_hi_xmm, fp_value_hi_shifted_xmm; |
| rtx final_result_xmm; |
| rtx int_two32_as_fp, int_two32_as_fp_vec; |
| rtx target = operands[0]; |
| rtx input = force_reg (DImode, operands[1]); |
| /* LLVM LOCAL begin */ |
| #ifdef ENABLE_LLVM |
| gcc_assert(0 && "rtx floating point conversion?"); |
| return 0; |
| #endif |
| /* LLVM LOCAL end */ |
| |
| gcc_assert (ix86_preferred_stack_boundary >= 128); |
| gcc_assert (GET_MODE (input) == DImode); |
| |
| cfun->uses_vector = 1; |
| |
| fp_value_hi_xmm = gen_reg_rtx (V2DFmode); |
| emit_insn (gen_sse2_cvtsi2sd (fp_value_hi_xmm, fp_value_hi_xmm, |
| gen_rtx_SUBREG (SImode, input, 4))); |
| |
| /* LLVM LOCAL begin */ |
| #ifndef ENABLE_LLVM |
| /* This gets warnings when HOST_WIDE_INT < 64 */ |
| real_from_integer (&rvt_int_two32, DFmode, 0x100000000ULL, 0ULL, 1); |
| #endif |
| /* LLVM LOCAL end */ |
| int_two32_as_fp = const_double_from_real_value (rvt_int_two32, DFmode); |
| rvt_int_two32_vec = gen_rtx_CONST_VECTOR (V2DFmode, |
| gen_2_4_rtvec (2, int_two32_as_fp, DFmode)); |
| |
| int_two32_as_fp_vec = validize_mem (force_const_mem (V2DFmode, |
| rvt_int_two32_vec)); |
| |
| fp_value_hi_shifted_xmm = gen_reg_rtx (V2DFmode); |
| emit_move_insn (fp_value_hi_shifted_xmm, fp_value_hi_xmm); |
| emit_insn (gen_sse2_vmmulv2df3 (fp_value_hi_shifted_xmm, |
| fp_value_hi_shifted_xmm, |
| int_two32_as_fp_vec)); |
| |
| my_operands[0] = gen_reg_rtx (DFmode); |
| my_operands[1] = gen_rtx_SUBREG (SImode, input, 0); |
| (void) ix86_expand_convert_uns_SI2DF_sse (my_operands); |
| |
| final_result_xmm = REG_P (target) && GET_MODE (target) == V2DFmode |
| ? target : gen_reg_rtx (V2DFmode); |
| emit_move_insn (final_result_xmm, gen_rtx_SUBREG (V2DFmode, |
| my_operands[0], 0)); |
| emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm, |
| fp_value_hi_shifted_xmm)); |
| |
| if (!rtx_equal_p (target, final_result_xmm)) |
| emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0)); |
| |
| return ""; |
| } |
| /* APPLE LOCAL end 4424891 */ |
| |
| /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. |
| Create a mask for the sign bit in MODE for an SSE register. If VECT is |
| true, then replicate the mask for all elements of the vector register. |
| If INVERT is true, then create a mask excluding the sign bit. */ |
| |
| rtx |
| ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) |
| { |
| enum machine_mode vec_mode; |
| HOST_WIDE_INT hi, lo; |
| int shift = 63; |
| rtvec v; |
| rtx mask; |
| |
| /* Find the sign bit, sign extended to 2*HWI. */ |
| if (mode == SFmode) |
| lo = 0x80000000, hi = lo < 0; |
| else if (HOST_BITS_PER_WIDE_INT >= 64) |
| lo = (HOST_WIDE_INT)1 << shift, hi = -1; |
| else |
| lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); |
| |
| if (invert) |
| lo = ~lo, hi = ~hi; |
| |
| /* Force this value into the low part of a fp vector constant. */ |
| mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); |
| mask = gen_lowpart (mode, mask); |
| |
| if (mode == SFmode) |
| { |
| if (vect) |
| v = gen_rtvec (4, mask, mask, mask, mask); |
| else |
| v = gen_rtvec (4, mask, CONST0_RTX (SFmode), |
| CONST0_RTX (SFmode), CONST0_RTX (SFmode)); |
| vec_mode = V4SFmode; |
| } |
| else |
| { |
| if (vect) |
| v = gen_rtvec (2, mask, mask); |
| else |
| v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); |
| vec_mode = V2DFmode; |
| } |
| |
| return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); |
| } |
| |
| /* Generate code for floating point ABS or NEG. */ |
| |
| void |
| ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, |
| rtx operands[]) |
| { |
| rtx mask, set, use, clob, dst, src; |
| bool matching_memory; |
| bool use_sse = false; |
| bool vector_mode = VECTOR_MODE_P (mode); |
| enum machine_mode elt_mode = mode; |
| |
| if (vector_mode) |
| { |
| elt_mode = GET_MODE_INNER (mode); |
| use_sse = true; |
| } |
| else if (TARGET_SSE_MATH) |
| use_sse = SSE_FLOAT_MODE_P (mode); |
| |
| /* NEG and ABS performed with SSE use bitwise mask operations. |
| Create the appropriate mask now. */ |
| if (use_sse) |
| mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); |
| else |
| mask = NULL_RTX; |
| |
| dst = operands[0]; |
| src = operands[1]; |
| |
| /* If the destination is memory, and we don't have matching source |
| operands or we're using the x87, do things in registers. */ |
| matching_memory = false; |
| if (MEM_P (dst)) |
| { |
| if (use_sse && rtx_equal_p (dst, src)) |
| matching_memory = true; |
| else |
| dst = gen_reg_rtx (mode); |
| } |
| if (MEM_P (src) && !matching_memory) |
| src = force_reg (mode, src); |
| |
| if (vector_mode) |
| { |
| set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); |
| set = gen_rtx_SET (VOIDmode, dst, set); |
| emit_insn (set); |
| } |
| else |
| { |
| set = gen_rtx_fmt_e (code, mode, src); |
| set = gen_rtx_SET (VOIDmode, dst, set); |
| if (mask) |
| { |
| use = gen_rtx_USE (VOIDmode, mask); |
| clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); |
| emit_insn (gen_rtx_PARALLEL (VOIDmode, |
| gen_rtvec (3, set, use, clob))); |
| } |
| else |
| emit_insn (set); |
| } |
| |
| if (dst != operands[0]) |
| emit_move_insn (operands[0], dst); |
| } |
| |
| /* Expand a copysign operation. Special case operand 0 being a constant. */ |
| |
| void |
| ix86_expand_copysign (rtx operands[]) |
| { |
| enum machine_mode mode, vmode; |
| rtx dest, op0, op1, mask, nmask; |
| |
| dest = operands[0]; |
| op0 = operands[1]; |
| op1 = operands[2]; |
| |
| mode = GET_MODE (dest); |
| vmode = mode == SFmode ? V4SFmode : V2DFmode; |
| |
| if (GET_CODE (op0) == CONST_DOUBLE) |
| { |
| rtvec v; |
| |
| if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) |
| op0 = simplify_unary_operation (ABS, mode, op0, mode); |
| |
| if (op0 == CONST0_RTX (mode)) |
| op0 = CONST0_RTX (vmode); |
| else |
| { |
| if (mode == SFmode) |
| v = gen_rtvec (4, op0, CONST0_RTX (SFmode), |
| CONST0_RTX (SFmode), CONST0_RTX (SFmode)); |
| else |
| v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); |
| op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); |
| } |
| |
| mask = ix86_build_signbit_mask (mode, 0, 0); |
| |
| if (mode == SFmode) |
| emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); |
| else |
| emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); |
| } |
| else |
| { |
| nmask = ix86_build_signbit_mask (mode, 0, 1); |
| mask = ix86_build_signbit_mask (mode, 0, 0); |
| |
| if (mode == SFmode) |
| emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); |
| else |
| emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); |
| } |
| } |
| |
| /* Deconstruct a copysign operation into bit masks. Operand 0 is known to |
| be a constant, and so has already been expanded into a vector constant. */ |
| |
| void |
| ix86_split_copysign_const (rtx operands[]) |
| { |
| enum machine_mode mode, vmode; |
| rtx dest, op0, op1, mask, x; |
| |
| dest = operands[0]; |
| op0 = operands[1]; |
| op1 = operands[2]; |
| mask = operands[3]; |
| |
| mode = GET_MODE (dest); |
| vmode = GET_MODE (mask); |
| |
| dest = simplify_gen_subreg (vmode, dest, mode, 0); |
| x = gen_rtx_AND (vmode, dest, mask); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| |
| if (op0 != CONST0_RTX (vmode)) |
| { |
| x = gen_rtx_IOR (vmode, dest, op0); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| } |
| |
| /* Deconstruct a copysign operation into bit masks. Operand 0 is variable, |
| so we have to do two masks. */ |
| |
| void |
| ix86_split_copysign_var (rtx operands[]) |
| { |
| enum machine_mode mode, vmode; |
| rtx dest, scratch, op0, op1, mask, nmask, x; |
| |
| dest = operands[0]; |
| scratch = operands[1]; |
| op0 = operands[2]; |
| op1 = operands[3]; |
| nmask = operands[4]; |
| mask = operands[5]; |
| |
| mode = GET_MODE (dest); |
| vmode = GET_MODE (mask); |
| |
| if (rtx_equal_p (op0, op1)) |
| { |
| /* Shouldn't happen often (it's useless, obviously), but when it does |
| we'd generate incorrect code if we continue below. */ |
| emit_move_insn (dest, op0); |
| return; |
| } |
| |
| if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ |
| { |
| gcc_assert (REGNO (op1) == REGNO (scratch)); |
| |
| x = gen_rtx_AND (vmode, scratch, mask); |
| emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); |
| |
| dest = mask; |
| op0 = simplify_gen_subreg (vmode, op0, mode, 0); |
| x = gen_rtx_NOT (vmode, dest); |
| x = gen_rtx_AND (vmode, x, op0); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| else |
| { |
| if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ |
| { |
| x = gen_rtx_AND (vmode, scratch, mask); |
| } |
| else /* alternative 2,4 */ |
| { |
| gcc_assert (REGNO (mask) == REGNO (scratch)); |
| op1 = simplify_gen_subreg (vmode, op1, mode, 0); |
| x = gen_rtx_AND (vmode, scratch, op1); |
| } |
| emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); |
| |
| if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ |
| { |
| dest = simplify_gen_subreg (vmode, op0, mode, 0); |
| x = gen_rtx_AND (vmode, dest, nmask); |
| } |
| else /* alternative 3,4 */ |
| { |
| gcc_assert (REGNO (nmask) == REGNO (dest)); |
| dest = nmask; |
| op0 = simplify_gen_subreg (vmode, op0, mode, 0); |
| x = gen_rtx_AND (vmode, dest, op0); |
| } |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| |
| x = gen_rtx_IOR (vmode, dest, scratch); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| |
| /* Return TRUE or FALSE depending on whether the first SET in INSN |
| has source and destination with matching CC modes, and that the |
| CC mode is at least as constrained as REQ_MODE. */ |
| |
| int |
| ix86_match_ccmode (rtx insn, enum machine_mode req_mode) |
| { |
| rtx set; |
| enum machine_mode set_mode; |
| |
| set = PATTERN (insn); |
| if (GET_CODE (set) == PARALLEL) |
| set = XVECEXP (set, 0, 0); |
| gcc_assert (GET_CODE (set) == SET); |
| gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); |
| |
| set_mode = GET_MODE (SET_DEST (set)); |
| switch (set_mode) |
| { |
| case CCNOmode: |
| if (req_mode != CCNOmode |
| && (req_mode != CCmode |
| || XEXP (SET_SRC (set), 1) != const0_rtx)) |
| return 0; |
| break; |
| case CCmode: |
| if (req_mode == CCGCmode) |
| return 0; |
| /* FALLTHRU */ |
| case CCGCmode: |
| if (req_mode == CCGOCmode || req_mode == CCNOmode) |
| return 0; |
| /* FALLTHRU */ |
| case CCGOCmode: |
| if (req_mode == CCZmode) |
| return 0; |
| /* FALLTHRU */ |
| case CCZmode: |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return (GET_MODE (SET_SRC (set)) == set_mode); |
| } |
| |
| /* Generate insn patterns to do an integer compare of OPERANDS. */ |
| |
| static rtx |
| ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) |
| { |
| enum machine_mode cmpmode; |
| rtx tmp, flags; |
| |
| cmpmode = SELECT_CC_MODE (code, op0, op1); |
| flags = gen_rtx_REG (cmpmode, FLAGS_REG); |
| |
| /* This is very simple, but making the interface the same as in the |
| FP case makes the rest of the code easier. */ |
| tmp = gen_rtx_COMPARE (cmpmode, op0, op1); |
| emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); |
| |
| /* Return the test that should be put into the flags user, i.e. |
| the bcc, scc, or cmov instruction. */ |
| return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); |
| } |
| |
| /* Figure out whether to use ordered or unordered fp comparisons. |
| Return the appropriate mode to use. */ |
| |
| enum machine_mode |
| ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) |
| { |
| /* ??? In order to make all comparisons reversible, we do all comparisons |
| non-trapping when compiling for IEEE. Once gcc is able to distinguish |
| all forms trapping and nontrapping comparisons, we can make inequality |
| comparisons trapping again, since it results in better code when using |
| FCOM based compares. */ |
| return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; |
| } |
| |
| enum machine_mode |
| ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) |
| { |
| if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) |
| return ix86_fp_compare_mode (code); |
| switch (code) |
| { |
| /* Only zero flag is needed. */ |
| case EQ: /* ZF=0 */ |
| case NE: /* ZF!=0 */ |
| return CCZmode; |
| /* Codes needing carry flag. */ |
| case GEU: /* CF=0 */ |
| case GTU: /* CF=0 & ZF=0 */ |
| case LTU: /* CF=1 */ |
| case LEU: /* CF=1 | ZF=1 */ |
| return CCmode; |
| /* Codes possibly doable only with sign flag when |
| comparing against zero. */ |
| case GE: /* SF=OF or SF=0 */ |
| case LT: /* SF<>OF or SF=1 */ |
| if (op1 == const0_rtx) |
| return CCGOCmode; |
| else |
| /* For other cases Carry flag is not required. */ |
| return CCGCmode; |
| /* Codes doable only with sign flag when comparing |
| against zero, but we miss jump instruction for it |
| so we need to use relational tests against overflow |
| that thus needs to be zero. */ |
| case GT: /* ZF=0 & SF=OF */ |
| case LE: /* ZF=1 | SF<>OF */ |
| if (op1 == const0_rtx) |
| return CCNOmode; |
| else |
| return CCGCmode; |
| /* strcmp pattern do (use flags) and combine may ask us for proper |
| mode. */ |
| case USE: |
| return CCmode; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return the fixed registers used for condition codes. */ |
| |
| static bool |
| ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) |
| { |
| *p1 = FLAGS_REG; |
| *p2 = FPSR_REG; |
| return true; |
| } |
| |
| /* If two condition code modes are compatible, return a condition code |
| mode which is compatible with both. Otherwise, return |
| VOIDmode. */ |
| |
| static enum machine_mode |
| ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) |
| { |
| if (m1 == m2) |
| return m1; |
| |
| if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) |
| return VOIDmode; |
| |
| if ((m1 == CCGCmode && m2 == CCGOCmode) |
| || (m1 == CCGOCmode && m2 == CCGCmode)) |
| return CCGCmode; |
| |
| switch (m1) |
| { |
| default: |
| gcc_unreachable (); |
| |
| case CCmode: |
| case CCGCmode: |
| case CCGOCmode: |
| case CCNOmode: |
| case CCZmode: |
| switch (m2) |
| { |
| default: |
| return VOIDmode; |
| |
| case CCmode: |
| case CCGCmode: |
| case CCGOCmode: |
| case CCNOmode: |
| case CCZmode: |
| return CCmode; |
| } |
| |
| case CCFPmode: |
| case CCFPUmode: |
| /* These are only compatible with themselves, which we already |
| checked above. */ |
| return VOIDmode; |
| } |
| } |
| |
| /* Return true if we should use an FCOMI instruction for this fp comparison. */ |
| |
| int |
| ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) |
| { |
| enum rtx_code swapped_code = swap_condition (code); |
| return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) |
| || (ix86_fp_comparison_cost (swapped_code) |
| == ix86_fp_comparison_fcomi_cost (swapped_code))); |
| } |
| |
| /* Swap, force into registers, or otherwise massage the two operands |
| to a fp comparison. The operands are updated in place; the new |
| comparison code is returned. */ |
| |
| static enum rtx_code |
| ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) |
| { |
| enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); |
| rtx op0 = *pop0, op1 = *pop1; |
| enum machine_mode op_mode = GET_MODE (op0); |
| int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); |
| |
| /* All of the unordered compare instructions only work on registers. |
| The same is true of the fcomi compare instructions. The XFmode |
| compare instructions require registers except when comparing |
| against zero or when converting operand 1 from fixed point to |
| floating point. */ |
| |
| if (!is_sse |
| && (fpcmp_mode == CCFPUmode |
| || (op_mode == XFmode |
| && ! (standard_80387_constant_p (op0) == 1 |
| || standard_80387_constant_p (op1) == 1) |
| && GET_CODE (op1) != FLOAT) |
| || ix86_use_fcomi_compare (code))) |
| { |
| op0 = force_reg (op_mode, op0); |
| op1 = force_reg (op_mode, op1); |
| } |
| else |
| { |
| /* %%% We only allow op1 in memory; op0 must be st(0). So swap |
| things around if they appear profitable, otherwise force op0 |
| into a register. */ |
| |
| if (standard_80387_constant_p (op0) == 0 |
| || (GET_CODE (op0) == MEM |
| && ! (standard_80387_constant_p (op1) == 0 |
| || GET_CODE (op1) == MEM))) |
| { |
| rtx tmp; |
| tmp = op0, op0 = op1, op1 = tmp; |
| code = swap_condition (code); |
| } |
| |
| if (GET_CODE (op0) != REG) |
| op0 = force_reg (op_mode, op0); |
| |
| if (CONSTANT_P (op1)) |
| { |
| int tmp = standard_80387_constant_p (op1); |
| if (tmp == 0) |
| op1 = validize_mem (force_const_mem (op_mode, op1)); |
| else if (tmp == 1) |
| { |
| if (TARGET_CMOVE) |
| op1 = force_reg (op_mode, op1); |
| } |
| else |
| op1 = force_reg (op_mode, op1); |
| } |
| } |
| |
| /* Try to rearrange the comparison to make it cheaper. */ |
| if (ix86_fp_comparison_cost (code) |
| > ix86_fp_comparison_cost (swap_condition (code)) |
| && (GET_CODE (op1) == REG || !no_new_pseudos)) |
| { |
| rtx tmp; |
| tmp = op0, op0 = op1, op1 = tmp; |
| code = swap_condition (code); |
| if (GET_CODE (op0) != REG) |
| op0 = force_reg (op_mode, op0); |
| } |
| |
| *pop0 = op0; |
| *pop1 = op1; |
| return code; |
| } |
| |
| /* Convert comparison codes we use to represent FP comparison to integer |
| code that will result in proper branch. Return UNKNOWN if no such code |
| is available. */ |
| |
| enum rtx_code |
| ix86_fp_compare_code_to_integer (enum rtx_code code) |
| { |
| switch (code) |
| { |
| case GT: |
| return GTU; |
| case GE: |
| return GEU; |
| case ORDERED: |
| case UNORDERED: |
| return code; |
| break; |
| case UNEQ: |
| return EQ; |
| break; |
| case UNLT: |
| return LTU; |
| break; |
| case UNLE: |
| return LEU; |
| break; |
| case LTGT: |
| return NE; |
| break; |
| default: |
| return UNKNOWN; |
| } |
| } |
| |
| /* Split comparison code CODE into comparisons we can do using branch |
| instructions. BYPASS_CODE is comparison code for branch that will |
| branch around FIRST_CODE and SECOND_CODE. If some of branches |
| is not required, set value to UNKNOWN. |
| We never require more than two branches. */ |
| |
| void |
| ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, |
| enum rtx_code *first_code, |
| enum rtx_code *second_code) |
| { |
| *first_code = code; |
| *bypass_code = UNKNOWN; |
| *second_code = UNKNOWN; |
| |
| /* The fcomi comparison sets flags as follows: |
| |
| cmp ZF PF CF |
| > 0 0 0 |
| < 0 0 1 |
| = 1 0 0 |
| un 1 1 1 */ |
| |
| switch (code) |
| { |
| case GT: /* GTU - CF=0 & ZF=0 */ |
| case GE: /* GEU - CF=0 */ |
| case ORDERED: /* PF=0 */ |
| case UNORDERED: /* PF=1 */ |
| case UNEQ: /* EQ - ZF=1 */ |
| case UNLT: /* LTU - CF=1 */ |
| case UNLE: /* LEU - CF=1 | ZF=1 */ |
| case LTGT: /* EQ - ZF=0 */ |
| break; |
| case LT: /* LTU - CF=1 - fails on unordered */ |
| *first_code = UNLT; |
| *bypass_code = UNORDERED; |
| break; |
| case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ |
| *first_code = UNLE; |
| *bypass_code = UNORDERED; |
| break; |
| case EQ: /* EQ - ZF=1 - fails on unordered */ |
| *first_code = UNEQ; |
| *bypass_code = UNORDERED; |
| break; |
| case NE: /* NE - ZF=0 - fails on unordered */ |
| *first_code = LTGT; |
| *second_code = UNORDERED; |
| break; |
| case UNGE: /* GEU - CF=0 - fails on unordered */ |
| *first_code = GE; |
| *second_code = UNORDERED; |
| break; |
| case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ |
| *first_code = GT; |
| *second_code = UNORDERED; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| if (!TARGET_IEEE_FP) |
| { |
| *second_code = UNKNOWN; |
| *bypass_code = UNKNOWN; |
| } |
| } |
| |
| /* Return cost of comparison done fcom + arithmetics operations on AX. |
| All following functions do use number of instructions as a cost metrics. |
| In future this should be tweaked to compute bytes for optimize_size and |
| take into account performance of various instructions on various CPUs. */ |
| static int |
| ix86_fp_comparison_arithmetics_cost (enum rtx_code code) |
| { |
| if (!TARGET_IEEE_FP) |
| return 4; |
| /* The cost of code output by ix86_expand_fp_compare. */ |
| switch (code) |
| { |
| case UNLE: |
| case UNLT: |
| case LTGT: |
| case GT: |
| case GE: |
| case UNORDERED: |
| case ORDERED: |
| case UNEQ: |
| return 4; |
| break; |
| case LT: |
| case NE: |
| case EQ: |
| case UNGE: |
| return 5; |
| break; |
| case LE: |
| case UNGT: |
| return 6; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return cost of comparison done using fcomi operation. |
| See ix86_fp_comparison_arithmetics_cost for the metrics. */ |
| static int |
| ix86_fp_comparison_fcomi_cost (enum rtx_code code) |
| { |
| enum rtx_code bypass_code, first_code, second_code; |
| /* Return arbitrarily high cost when instruction is not supported - this |
| prevents gcc from using it. */ |
| if (!TARGET_CMOVE) |
| return 1024; |
| ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
| return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; |
| } |
| |
| /* Return cost of comparison done using sahf operation. |
| See ix86_fp_comparison_arithmetics_cost for the metrics. */ |
| static int |
| ix86_fp_comparison_sahf_cost (enum rtx_code code) |
| { |
| enum rtx_code bypass_code, first_code, second_code; |
| /* Return arbitrarily high cost when instruction is not preferred - this |
| avoids gcc from using it. */ |
| if (!TARGET_USE_SAHF && !optimize_size) |
| return 1024; |
| ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
| return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; |
| } |
| |
| /* Compute cost of the comparison done using any method. |
| See ix86_fp_comparison_arithmetics_cost for the metrics. */ |
| static int |
| ix86_fp_comparison_cost (enum rtx_code code) |
| { |
| int fcomi_cost, sahf_cost, arithmetics_cost = 1024; |
| int min; |
| |
| fcomi_cost = ix86_fp_comparison_fcomi_cost (code); |
| sahf_cost = ix86_fp_comparison_sahf_cost (code); |
| |
| min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); |
| if (min > sahf_cost) |
| min = sahf_cost; |
| if (min > fcomi_cost) |
| min = fcomi_cost; |
| return min; |
| } |
| |
| /* Generate insn patterns to do a floating point compare of OPERANDS. */ |
| |
| static rtx |
| ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
| rtx *second_test, rtx *bypass_test) |
| { |
| enum machine_mode fpcmp_mode, intcmp_mode; |
| rtx tmp, tmp2; |
| int cost = ix86_fp_comparison_cost (code); |
| enum rtx_code bypass_code, first_code, second_code; |
| |
| fpcmp_mode = ix86_fp_compare_mode (code); |
| code = ix86_prepare_fp_compare_args (code, &op0, &op1); |
| |
| if (second_test) |
| *second_test = NULL_RTX; |
| if (bypass_test) |
| *bypass_test = NULL_RTX; |
| |
| ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
| |
| /* Do fcomi/sahf based test when profitable. */ |
| if ((bypass_code == UNKNOWN || bypass_test) |
| && (second_code == UNKNOWN || second_test) |
| && ix86_fp_comparison_arithmetics_cost (code) > cost) |
| { |
| if (TARGET_CMOVE) |
| { |
| tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); |
| tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), |
| tmp); |
| emit_insn (tmp); |
| } |
| else |
| { |
| tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); |
| tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); |
| if (!scratch) |
| scratch = gen_reg_rtx (HImode); |
| emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); |
| emit_insn (gen_x86_sahf_1 (scratch)); |
| } |
| |
| /* The FP codes work out to act like unsigned. */ |
| intcmp_mode = fpcmp_mode; |
| code = first_code; |
| if (bypass_code != UNKNOWN) |
| *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, |
| gen_rtx_REG (intcmp_mode, FLAGS_REG), |
| const0_rtx); |
| if (second_code != UNKNOWN) |
| *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, |
| gen_rtx_REG (intcmp_mode, FLAGS_REG), |
| const0_rtx); |
| } |
| else |
| { |
| /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ |
| tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); |
| tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); |
| if (!scratch) |
| scratch = gen_reg_rtx (HImode); |
| emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); |
| |
| /* In the unordered case, we have to check C2 for NaN's, which |
| doesn't happen to work out to anything nice combination-wise. |
| So do some bit twiddling on the value we've got in AH to come |
| up with an appropriate set of condition codes. */ |
| |
| intcmp_mode = CCNOmode; |
| switch (code) |
| { |
| case GT: |
| case UNGT: |
| if (code == GT || !TARGET_IEEE_FP) |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); |
| code = EQ; |
| } |
| else |
| { |
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
| emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); |
| emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); |
| intcmp_mode = CCmode; |
| code = GEU; |
| } |
| break; |
| case LT: |
| case UNLT: |
| if (code == LT && TARGET_IEEE_FP) |
| { |
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
| emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); |
| intcmp_mode = CCmode; |
| code = EQ; |
| } |
| else |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); |
| code = NE; |
| } |
| break; |
| case GE: |
| case UNGE: |
| if (code == GE || !TARGET_IEEE_FP) |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); |
| code = EQ; |
| } |
| else |
| { |
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
| emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, |
| GEN_INT (0x01))); |
| code = NE; |
| } |
| break; |
| case LE: |
| case UNLE: |
| if (code == LE && TARGET_IEEE_FP) |
| { |
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
| emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); |
| emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); |
| intcmp_mode = CCmode; |
| code = LTU; |
| } |
| else |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); |
| code = NE; |
| } |
| break; |
| case EQ: |
| case UNEQ: |
| if (code == EQ && TARGET_IEEE_FP) |
| { |
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
| emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); |
| intcmp_mode = CCmode; |
| code = EQ; |
| } |
| else |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); |
| code = NE; |
| break; |
| } |
| break; |
| case NE: |
| case LTGT: |
| if (code == NE && TARGET_IEEE_FP) |
| { |
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
| emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, |
| GEN_INT (0x40))); |
| code = NE; |
| } |
| else |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); |
| code = EQ; |
| } |
| break; |
| |
| case UNORDERED: |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); |
| code = NE; |
| break; |
| case ORDERED: |
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); |
| code = EQ; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return the test that should be put into the flags user, i.e. |
| the bcc, scc, or cmov instruction. */ |
| return gen_rtx_fmt_ee (code, VOIDmode, |
| gen_rtx_REG (intcmp_mode, FLAGS_REG), |
| const0_rtx); |
| } |
| |
| rtx |
| ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) |
| { |
| rtx op0, op1, ret; |
| op0 = ix86_compare_op0; |
| op1 = ix86_compare_op1; |
| |
| if (second_test) |
| *second_test = NULL_RTX; |
| if (bypass_test) |
| *bypass_test = NULL_RTX; |
| |
| if (ix86_compare_emitted) |
| { |
| ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); |
| ix86_compare_emitted = NULL_RTX; |
| } |
| else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) |
| ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, |
| second_test, bypass_test); |
| else |
| ret = ix86_expand_int_compare (code, op0, op1); |
| |
| return ret; |
| } |
| |
| /* Return true if the CODE will result in nontrivial jump sequence. */ |
| bool |
| ix86_fp_jump_nontrivial_p (enum rtx_code code) |
| { |
| enum rtx_code bypass_code, first_code, second_code; |
| if (!TARGET_CMOVE) |
| return true; |
| ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
| return bypass_code != UNKNOWN || second_code != UNKNOWN; |
| } |
| |
| void |
| ix86_expand_branch (enum rtx_code code, rtx label) |
| { |
| rtx tmp; |
| |
| /* If we have emitted a compare insn, go straight to simple. |
| ix86_expand_compare won't emit anything if ix86_compare_emitted |
| is non NULL. */ |
| if (ix86_compare_emitted) |
| goto simple; |
| |
| switch (GET_MODE (ix86_compare_op0)) |
| { |
| case QImode: |
| case HImode: |
| case SImode: |
| simple: |
| tmp = ix86_expand_compare (code, NULL, NULL); |
| tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, |
| gen_rtx_LABEL_REF (VOIDmode, label), |
| pc_rtx); |
| emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); |
| return; |
| |
| case SFmode: |
| case DFmode: |
| case XFmode: |
| { |
| rtvec vec; |
| int use_fcomi; |
| enum rtx_code bypass_code, first_code, second_code; |
| |
| code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, |
| &ix86_compare_op1); |
| |
| ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
| |
| /* Check whether we will use the natural sequence with one jump. If |
| so, we can expand jump early. Otherwise delay expansion by |
| creating compound insn to not confuse optimizers. */ |
| if (bypass_code == UNKNOWN && second_code == UNKNOWN |
| && TARGET_CMOVE) |
| { |
| ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, |
| gen_rtx_LABEL_REF (VOIDmode, label), |
| pc_rtx, NULL_RTX, NULL_RTX); |
| } |
| else |
| { |
| tmp = gen_rtx_fmt_ee (code, VOIDmode, |
| ix86_compare_op0, ix86_compare_op1); |
| tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, |
| gen_rtx_LABEL_REF (VOIDmode, label), |
| pc_rtx); |
| tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); |
| |
| use_fcomi = ix86_use_fcomi_compare (code); |
| vec = rtvec_alloc (3 + !use_fcomi); |
| RTVEC_ELT (vec, 0) = tmp; |
| RTVEC_ELT (vec, 1) |
| = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); |
| RTVEC_ELT (vec, 2) |
| = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); |
| if (! use_fcomi) |
| RTVEC_ELT (vec, 3) |
| = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); |
| |
| emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); |
| } |
| return; |
| } |
| |
| case DImode: |
| if (TARGET_64BIT) |
| goto simple; |
| case TImode: |
| /* Expand DImode branch into multiple compare+branch. */ |
| { |
| rtx lo[2], hi[2], label2; |
| enum rtx_code code1, code2, code3; |
| enum machine_mode submode; |
| |
| if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) |
| { |
| tmp = ix86_compare_op0; |
| ix86_compare_op0 = ix86_compare_op1; |
| ix86_compare_op1 = tmp; |
| code = swap_condition (code); |
| } |
| if (GET_MODE (ix86_compare_op0) == DImode) |
| { |
| split_di (&ix86_compare_op0, 1, lo+0, hi+0); |
| split_di (&ix86_compare_op1, 1, lo+1, hi+1); |
| submode = SImode; |
| } |
| else |
| { |
| split_ti (&ix86_compare_op0, 1, lo+0, hi+0); |
| split_ti (&ix86_compare_op1, 1, lo+1, hi+1); |
| submode = DImode; |
| } |
| |
| /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to |
| avoid two branches. This costs one extra insn, so disable when |
| optimizing for size. */ |
| |
| if ((code == EQ || code == NE) |
| && (!optimize_size |
| || hi[1] == const0_rtx || lo[1] == const0_rtx)) |
| { |
| rtx xor0, xor1; |
| |
| xor1 = hi[0]; |
| if (hi[1] != const0_rtx) |
| xor1 = expand_binop (submode, xor_optab, xor1, hi[1], |
| NULL_RTX, 0, OPTAB_WIDEN); |
| |
| xor0 = lo[0]; |
| if (lo[1] != const0_rtx) |
| xor0 = expand_binop (submode, xor_optab, xor0, lo[1], |
| NULL_RTX, 0, OPTAB_WIDEN); |
| |
| tmp = expand_binop (submode, ior_optab, xor1, xor0, |
| NULL_RTX, 0, OPTAB_WIDEN); |
| |
| ix86_compare_op0 = tmp; |
| ix86_compare_op1 = const0_rtx; |
| ix86_expand_branch (code, label); |
| return; |
| } |
| |
| /* Otherwise, if we are doing less-than or greater-or-equal-than, |
| op1 is a constant and the low word is zero, then we can just |
| examine the high word. */ |
| |
| if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) |
| switch (code) |
| { |
| case LT: case LTU: case GE: case GEU: |
| ix86_compare_op0 = hi[0]; |
| ix86_compare_op1 = hi[1]; |
| ix86_expand_branch (code, label); |
| return; |
| default: |
| break; |
| } |
| |
| /* Otherwise, we need two or three jumps. */ |
| |
| label2 = gen_label_rtx (); |
| |
| code1 = code; |
| code2 = swap_condition (code); |
| code3 = unsigned_condition (code); |
| |
| switch (code) |
| { |
| case LT: case GT: case LTU: case GTU: |
| break; |
| |
| case LE: code1 = LT; code2 = GT; break; |
| case GE: code1 = GT; code2 = LT; break; |
| case LEU: code1 = LTU; code2 = GTU; break; |
| case GEU: code1 = GTU; code2 = LTU; break; |
| |
| case EQ: code1 = UNKNOWN; code2 = NE; break; |
| case NE: code2 = UNKNOWN; break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* |
| * a < b => |
| * if (hi(a) < hi(b)) goto true; |
| * if (hi(a) > hi(b)) goto false; |
| * if (lo(a) < lo(b)) goto true; |
| * false: |
| */ |
| |
| ix86_compare_op0 = hi[0]; |
| ix86_compare_op1 = hi[1]; |
| |
| if (code1 != UNKNOWN) |
| ix86_expand_branch (code1, label); |
| if (code2 != UNKNOWN) |
| ix86_expand_branch (code2, label2); |
| |
| ix86_compare_op0 = lo[0]; |
| ix86_compare_op1 = lo[1]; |
| ix86_expand_branch (code3, label); |
| |
| if (code2 != UNKNOWN) |
| emit_label (label2); |
| return; |
| } |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Split branch based on floating point condition. */ |
| void |
| ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, |
| rtx target1, rtx target2, rtx tmp, rtx pushed) |
| { |
| rtx second, bypass; |
| rtx label = NULL_RTX; |
| rtx condition; |
| int bypass_probability = -1, second_probability = -1, probability = -1; |
| rtx i; |
| |
| if (target2 != pc_rtx) |
| { |
| rtx tmp = target2; |
| code = reverse_condition_maybe_unordered (code); |
| target2 = target1; |
| target1 = tmp; |
| } |
| |
| condition = ix86_expand_fp_compare (code, op1, op2, |
| tmp, &second, &bypass); |
| |
| /* Remove pushed operand from stack. */ |
| if (pushed) |
| ix86_free_from_memory (GET_MODE (pushed)); |
| |
| if (split_branch_probability >= 0) |
| { |
| /* Distribute the probabilities across the jumps. |
| Assume the BYPASS and SECOND to be always test |
| for UNORDERED. */ |
| probability = split_branch_probability; |
| |
| /* Value of 1 is low enough to make no need for probability |
| to be updated. Later we may run some experiments and see |
| if unordered values are more frequent in practice. */ |
| if (bypass) |
| bypass_probability = 1; |
| if (second) |
| second_probability = 1; |
| } |
| if (bypass != NULL_RTX) |
| { |
| label = gen_label_rtx (); |
| i = emit_jump_insn (gen_rtx_SET |
| (VOIDmode, pc_rtx, |
| gen_rtx_IF_THEN_ELSE (VOIDmode, |
| bypass, |
| gen_rtx_LABEL_REF (VOIDmode, |
| label), |
| pc_rtx))); |
| if (bypass_probability >= 0) |
| REG_NOTES (i) |
| = gen_rtx_EXPR_LIST (REG_BR_PROB, |
| GEN_INT (bypass_probability), |
| REG_NOTES (i)); |
| } |
| i = emit_jump_insn (gen_rtx_SET |
| (VOIDmode, pc_rtx, |
| gen_rtx_IF_THEN_ELSE (VOIDmode, |
| condition, target1, target2))); |
| if (probability >= 0) |
| REG_NOTES (i) |
| = gen_rtx_EXPR_LIST (REG_BR_PROB, |
| GEN_INT (probability), |
| REG_NOTES (i)); |
| if (second != NULL_RTX) |
| { |
| i = emit_jump_insn (gen_rtx_SET |
| (VOIDmode, pc_rtx, |
| gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, |
| target2))); |
| if (second_probability >= 0) |
| REG_NOTES (i) |
| = gen_rtx_EXPR_LIST (REG_BR_PROB, |
| GEN_INT (second_probability), |
| REG_NOTES (i)); |
| } |
| if (label != NULL_RTX) |
| emit_label (label); |
| } |
| |
| int |
| ix86_expand_setcc (enum rtx_code code, rtx dest) |
| { |
| rtx ret, tmp, tmpreg, equiv; |
| rtx second_test, bypass_test; |
| |
| if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) |
| return 0; /* FAIL */ |
| |
| gcc_assert (GET_MODE (dest) == QImode); |
| |
| ret = ix86_expand_compare (code, &second_test, &bypass_test); |
| PUT_MODE (ret, QImode); |
| |
| tmp = dest; |
| tmpreg = dest; |
| |
| emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); |
| if (bypass_test || second_test) |
| { |
| rtx test = second_test; |
| int bypass = 0; |
| rtx tmp2 = gen_reg_rtx (QImode); |
| if (bypass_test) |
| { |
| gcc_assert (!second_test); |
| test = bypass_test; |
| bypass = 1; |
| PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); |
| } |
| PUT_MODE (test, QImode); |
| emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); |
| |
| if (bypass) |
| emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); |
| else |
| emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); |
| } |
| |
| /* Attach a REG_EQUAL note describing the comparison result. */ |
| if (ix86_compare_op0 && ix86_compare_op1) |
| { |
| equiv = simplify_gen_relational (code, QImode, |
| GET_MODE (ix86_compare_op0), |
| ix86_compare_op0, ix86_compare_op1); |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); |
| } |
| |
| return 1; /* DONE */ |
| } |
| |
| /* Expand comparison setting or clearing carry flag. Return true when |
| successful and set pop for the operation. */ |
| static bool |
| ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) |
| { |
| enum machine_mode mode = |
| GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); |
| |
| /* Do not handle DImode compares that go through special path. Also we can't |
| deal with FP compares yet. This is possible to add. */ |
| if (mode == (TARGET_64BIT ? TImode : DImode)) |
| return false; |
| if (FLOAT_MODE_P (mode)) |
| { |
| rtx second_test = NULL, bypass_test = NULL; |
| rtx compare_op, compare_seq; |
| |
| /* Shortcut: following common codes never translate into carry flag compares. */ |
| if (code == EQ || code == NE || code == UNEQ || code == LTGT |
| || code == ORDERED || code == UNORDERED) |
| return false; |
| |
| /* These comparisons require zero flag; swap operands so they won't. */ |
| if ((code == GT || code == UNLE || code == LE || code == UNGT) |
| && !TARGET_IEEE_FP) |
| { |
| rtx tmp = op0; |
| op0 = op1; |
| op1 = tmp; |
| code = swap_condition (code); |
| } |
| |
| /* Try to expand the comparison and verify that we end up with carry flag |
| based comparison. This is fails to be true only when we decide to expand |
| comparison using arithmetic that is not too common scenario. */ |
| start_sequence (); |
| compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, |
| &second_test, &bypass_test); |
| compare_seq = get_insns (); |
| end_sequence (); |
| |
| if (second_test || bypass_test) |
| return false; |
| if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode |
| || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) |
| code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); |
| else |
| code = GET_CODE (compare_op); |
| if (code != LTU && code != GEU) |
| return false; |
| emit_insn (compare_seq); |
| *pop = compare_op; |
| return true; |
| } |
| if (!INTEGRAL_MODE_P (mode)) |
| return false; |
| switch (code) |
| { |
| case LTU: |
| case GEU: |
| break; |
| |
| /* Convert a==0 into (unsigned)a<1. */ |
| case EQ: |
| case NE: |
| if (op1 != const0_rtx) |
| return false; |
| op1 = const1_rtx; |
| code = (code == EQ ? LTU : GEU); |
| break; |
| |
| /* Convert a>b into b<a or a>=b-1. */ |
| case GTU: |
| case LEU: |
| if (GET_CODE (op1) == CONST_INT) |
| { |
| op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); |
| /* Bail out on overflow. We still can swap operands but that |
| would force loading of the constant into register. */ |
| if (op1 == const0_rtx |
| || !x86_64_immediate_operand (op1, GET_MODE (op1))) |
| return false; |
| code = (code == GTU ? GEU : LTU); |
| } |
| else |
| { |
| rtx tmp = op1; |
| op1 = op0; |
| op0 = tmp; |
| code = (code == GTU ? LTU : GEU); |
| } |
| break; |
| |
| /* Convert a>=0 into (unsigned)a<0x80000000. */ |
| case LT: |
| case GE: |
| if (mode == DImode || op1 != const0_rtx) |
| return false; |
| op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); |
| code = (code == LT ? GEU : LTU); |
| break; |
| case LE: |
| case GT: |
| if (mode == DImode || op1 != constm1_rtx) |
| return false; |
| op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); |
| code = (code == LE ? GEU : LTU); |
| break; |
| |
| default: |
| return false; |
| } |
| /* Swapping operands may cause constant to appear as first operand. */ |
| if (!nonimmediate_operand (op0, VOIDmode)) |
| { |
| if (no_new_pseudos) |
| return false; |
| op0 = force_reg (mode, op0); |
| } |
| ix86_compare_op0 = op0; |
| ix86_compare_op1 = op1; |
| *pop = ix86_expand_compare (code, NULL, NULL); |
| gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); |
| return true; |
| } |
| |
| int |
| ix86_expand_int_movcc (rtx operands[]) |
| { |
| enum rtx_code code = GET_CODE (operands[1]), compare_code; |
| rtx compare_seq, compare_op; |
| rtx second_test, bypass_test; |
| enum machine_mode mode = GET_MODE (operands[0]); |
| bool sign_bit_compare_p = false;; |
| |
| start_sequence (); |
| compare_op = ix86_expand_compare (code, &second_test, &bypass_test); |
| compare_seq = get_insns (); |
| end_sequence (); |
| |
| compare_code = GET_CODE (compare_op); |
| |
| if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) |
| || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) |
| sign_bit_compare_p = true; |
| |
| /* Don't attempt mode expansion here -- if we had to expand 5 or 6 |
| HImode insns, we'd be swallowed in word prefix ops. */ |
| |
| if ((mode != HImode || TARGET_FAST_PREFIX) |
| && (mode != (TARGET_64BIT ? TImode : DImode)) |
| && GET_CODE (operands[2]) == CONST_INT |
| && GET_CODE (operands[3]) == CONST_INT) |
| { |
| rtx out = operands[0]; |
| HOST_WIDE_INT ct = INTVAL (operands[2]); |
| HOST_WIDE_INT cf = INTVAL (operands[3]); |
| HOST_WIDE_INT diff; |
| |
| diff = ct - cf; |
| /* Sign bit compares are better done using shifts than we do by using |
| sbb. */ |
| if (sign_bit_compare_p |
| || ix86_expand_carry_flag_compare (code, ix86_compare_op0, |
| ix86_compare_op1, &compare_op)) |
| { |
| /* Detect overlap between destination and compare sources. */ |
| rtx tmp = out; |
| |
| if (!sign_bit_compare_p) |
| { |
| bool fpcmp = false; |
| |
| compare_code = GET_CODE (compare_op); |
| |
| if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode |
| || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) |
| { |
| fpcmp = true; |
| compare_code = ix86_fp_compare_code_to_integer (compare_code); |
| } |
| |
| /* To simplify rest of code, restrict to the GEU case. */ |
| if (compare_code == LTU) |
| { |
| HOST_WIDE_INT tmp = ct; |
| ct = cf; |
| cf = tmp; |
| compare_code = reverse_condition (compare_code); |
| code = reverse_condition (code); |
| } |
| else |
| { |
| if (fpcmp) |
| PUT_CODE (compare_op, |
| reverse_condition_maybe_unordered |
| (GET_CODE (compare_op))); |
| else |
| PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); |
| } |
| diff = ct - cf; |
| |
| if (reg_overlap_mentioned_p (out, ix86_compare_op0) |
| || reg_overlap_mentioned_p (out, ix86_compare_op1)) |
| tmp = gen_reg_rtx (mode); |
| |
| if (mode == DImode) |
| emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); |
| else |
| emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); |
| } |
| else |
| { |
| if (code == GT || code == GE) |
| code = reverse_condition (code); |
| else |
| { |
| HOST_WIDE_INT tmp = ct; |
| ct = cf; |
| cf = tmp; |
| diff = ct - cf; |
| } |
| tmp = emit_store_flag (tmp, code, ix86_compare_op0, |
| ix86_compare_op1, VOIDmode, 0, -1); |
| } |
| |
| if (diff == 1) |
| { |
| /* |
| * cmpl op0,op1 |
| * sbbl dest,dest |
| * [addl dest, ct] |
| * |
| * Size 5 - 8. |
| */ |
| if (ct) |
| tmp = expand_simple_binop (mode, PLUS, |
| tmp, GEN_INT (ct), |
| copy_rtx (tmp), 1, OPTAB_DIRECT); |
| } |
| else if (cf == -1) |
| { |
| /* |
| * cmpl op0,op1 |
| * sbbl dest,dest |
| * orl $ct, dest |
| * |
| * Size 8. |
| */ |
| tmp = expand_simple_binop (mode, IOR, |
| tmp, GEN_INT (ct), |
| copy_rtx (tmp), 1, OPTAB_DIRECT); |
| } |
| else if (diff == -1 && ct) |
| { |
| /* |
| * cmpl op0,op1 |
| * sbbl dest,dest |
| * notl dest |
| * [addl dest, cf] |
| * |
| * Size 8 - 11. |
| */ |
| tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); |
| if (cf) |
| tmp = expand_simple_binop (mode, PLUS, |
| copy_rtx (tmp), GEN_INT (cf), |
| copy_rtx (tmp), 1, OPTAB_DIRECT); |
| } |
| else |
| { |
| /* |
| * cmpl op0,op1 |
| * sbbl dest,dest |
| * [notl dest] |
| * andl cf - ct, dest |
| * [addl dest, ct] |
| * |
| * Size 8 - 11. |
| */ |
| |
| if (cf == 0) |
| { |
| cf = ct; |
| ct = 0; |
| tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); |
| } |
| |
| tmp = expand_simple_binop (mode, AND, |
| copy_rtx (tmp), |
| gen_int_mode (cf - ct, mode), |
| copy_rtx (tmp), 1, OPTAB_DIRECT); |
| if (ct) |
| tmp = expand_simple_binop (mode, PLUS, |
| copy_rtx (tmp), GEN_INT (ct), |
| copy_rtx (tmp), 1, OPTAB_DIRECT); |
| } |
| |
| if (!rtx_equal_p (tmp, out)) |
| emit_move_insn (copy_rtx (out), copy_rtx (tmp)); |
| |
| return 1; /* DONE */ |
| } |
| |
| if (diff < 0) |
| { |
| HOST_WIDE_INT tmp; |
| tmp = ct, ct = cf, cf = tmp; |
| diff = -diff; |
| if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) |
| { |
| /* We may be reversing unordered compare to normal compare, that |
| is not valid in general (we may convert non-trapping condition |
| to trapping one), however on i386 we currently emit all |
| comparisons unordered. */ |
| compare_code = reverse_condition_maybe_unordered (compare_code); |
| code = reverse_condition_maybe_unordered (code); |
| } |
| else |
| { |
| compare_code = reverse_condition (compare_code); |
| code = reverse_condition (code); |
| } |
| } |
| |
| compare_code = UNKNOWN; |
| if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT |
| && GET_CODE (ix86_compare_op1) == CONST_INT) |
| { |
| if (ix86_compare_op1 == const0_rtx |
| && (code == LT || code == GE)) |
| compare_code = code; |
| else if (ix86_compare_op1 == constm1_rtx) |
| { |
| if (code == LE) |
| compare_code = LT; |
| else if (code == GT) |
| compare_code = GE; |
| } |
| } |
| |
| /* Optimize dest = (op0 < 0) ? -1 : cf. */ |
| if (compare_code != UNKNOWN |
| && GET_MODE (ix86_compare_op0) == GET_MODE (out) |
| && (cf == -1 || ct == -1)) |
| { |
| /* If lea code below could be used, only optimize |
| if it results in a 2 insn sequence. */ |
| |
| if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 |
| || diff == 3 || diff == 5 || diff == 9) |
| || (compare_code == LT && ct == -1) |
| || (compare_code == GE && cf == -1)) |
| { |
| /* |
| * notl op1 (if necessary) |
| * sarl $31, op1 |
| * orl cf, op1 |
| */ |
| if (ct != -1) |
| { |
| cf = ct; |
| ct = -1; |
| code = reverse_condition (code); |
| } |
| |
| out = emit_store_flag (out, code, ix86_compare_op0, |
| ix86_compare_op1, VOIDmode, 0, -1); |
| |
| out = expand_simple_binop (mode, IOR, |
| out, GEN_INT (cf), |
| out, 1, OPTAB_DIRECT); |
| if (out != operands[0]) |
| emit_move_insn (operands[0], out); |
| |
| return 1; /* DONE */ |
| } |
| } |
| |
| |
| if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 |
| || diff == 3 || diff == 5 || diff == 9) |
| && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) |
| && (mode != DImode |
| || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) |
| { |
| /* |
| * xorl dest,dest |
| * cmpl op1,op2 |
| * setcc dest |
| * lea cf(dest*(ct-cf)),dest |
| * |
| * Size 14. |
| * |
| * This also catches the degenerate setcc-only case. |
| */ |
| |
| rtx tmp; |
| int nops; |
| |
| out = emit_store_flag (out, code, ix86_compare_op0, |
| ix86_compare_op1, VOIDmode, 0, 1); |
| |
| nops = 0; |
| /* On x86_64 the lea instruction operates on Pmode, so we need |
| to get arithmetics done in proper mode to match. */ |
| if (diff == 1) |
| tmp = copy_rtx (out); |
| else |
| { |
| rtx out1; |
| out1 = copy_rtx (out); |
| tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); |
| nops++; |
| if (diff & 1) |
| { |
| tmp = gen_rtx_PLUS (mode, tmp, out1); |
| nops++; |
| } |
| } |
| if (cf != 0) |
| { |
| tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); |
| nops++; |
| } |
| if (!rtx_equal_p (tmp, out)) |
| { |
| if (nops == 1) |
| out = force_operand (tmp, copy_rtx (out)); |
| else |
| emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); |
| } |
| if (!rtx_equal_p (out, operands[0])) |
| emit_move_insn (operands[0], copy_rtx (out)); |
| |
| return 1; /* DONE */ |
| } |
| |
| /* |
| * General case: Jumpful: |
| * xorl dest,dest cmpl op1, op2 |
| * cmpl op1, op2 movl ct, dest |
| * setcc dest jcc 1f |
| * decl dest movl cf, dest |
| * andl (cf-ct),dest 1: |
| * addl ct,dest |
| * |
| * Size 20. Size 14. |
| * |
| * This is reasonably steep, but branch mispredict costs are |
| * high on modern cpus, so consider failing only if optimizing |
| * for space. |
| */ |
| |
| if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) |
| && BRANCH_COST >= 2) |
| { |
| if (cf == 0) |
| { |
| cf = ct; |
| ct = 0; |
| if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) |
| /* We may be reversing unordered compare to normal compare, |
| that is not valid in general (we may convert non-trapping |
| condition to trapping one), however on i386 we currently |
| emit all comparisons unordered. */ |
| code = reverse_condition_maybe_unordered (code); |
| else |
| { |
| code = reverse_condition (code); |
| if (compare_code != UNKNOWN) |
| compare_code = reverse_condition (compare_code); |
| } |
| } |
| |
| if (compare_code != UNKNOWN) |
| { |
| /* notl op1 (if needed) |
| sarl $31, op1 |
| andl (cf-ct), op1 |
| addl ct, op1 |
| |
| For x < 0 (resp. x <= -1) there will be no notl, |
| so if possible swap the constants to get rid of the |
| complement. |
| True/false will be -1/0 while code below (store flag |
| followed by decrement) is 0/-1, so the constants need |
| to be exchanged once more. */ |
| |
| if (compare_code == GE || !cf) |
| { |
| code = reverse_condition (code); |
| compare_code = LT; |
| } |
| else |
| { |
| HOST_WIDE_INT tmp = cf; |
| cf = ct; |
| ct = tmp; |
| } |
| |
| out = emit_store_flag (out, code, ix86_compare_op0, |
| ix86_compare_op1, VOIDmode, 0, -1); |
| } |
| else |
| { |
| out = emit_store_flag (out, code, ix86_compare_op0, |
| ix86_compare_op1, VOIDmode, 0, 1); |
| |
| out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, |
| copy_rtx (out), 1, OPTAB_DIRECT); |
| } |
| |
| out = expand_simple_binop (mode, AND, copy_rtx (out), |
| gen_int_mode (cf - ct, mode), |
| copy_rtx (out), 1, OPTAB_DIRECT); |
| if (ct) |
| out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), |
| copy_rtx (out), 1, OPTAB_DIRECT); |
| if (!rtx_equal_p (out, operands[0])) |
| emit_move_insn (operands[0], copy_rtx (out)); |
| |
| return 1; /* DONE */ |
| } |
| } |
| |
| if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) |
| { |
| /* Try a few things more with specific constants and a variable. */ |
| |
| optab op; |
| rtx var, orig_out, out, tmp; |
| |
| if (BRANCH_COST <= 2) |
| return 0; /* FAIL */ |
| |
| /* If one of the two operands is an interesting constant, load a |
| constant with the above and mask it in with a logical operation. */ |
| |
| if (GET_CODE (operands[2]) == CONST_INT) |
| { |
| var = operands[3]; |
| if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) |
| operands[3] = constm1_rtx, op = and_optab; |
| else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) |
| operands[3] = const0_rtx, op = ior_optab; |
| else |
| return 0; /* FAIL */ |
| } |
| else if (GET_CODE (operands[3]) == CONST_INT) |
| { |
| var = operands[2]; |
| if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) |
| operands[2] = constm1_rtx, op = and_optab; |
| else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) |
| operands[2] = const0_rtx, op = ior_optab; |
| else |
| return 0; /* FAIL */ |
| } |
| else |
| return 0; /* FAIL */ |
| |
| orig_out = operands[0]; |
| tmp = gen_reg_rtx (mode); |
| operands[0] = tmp; |
| |
| /* Recurse to get the constant loaded. */ |
| if (ix86_expand_int_movcc (operands) == 0) |
| return 0; /* FAIL */ |
| |
| /* Mask in the interesting variable. */ |
| out = expand_binop (mode, op, var, tmp, orig_out, 0, |
| OPTAB_WIDEN); |
| if (!rtx_equal_p (out, orig_out)) |
| emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); |
| |
| return 1; /* DONE */ |
| } |
| |
| /* |
| * For comparison with above, |
| * |
| * movl cf,dest |
| * movl ct,tmp |
| * cmpl op1,op2 |
| * cmovcc tmp,dest |
| * |
| * Size 15. |
| */ |
| |
| if (! nonimmediate_operand (operands[2], mode)) |
| operands[2] = force_reg (mode, operands[2]); |
| if (! nonimmediate_operand (operands[3], mode)) |
| operands[3] = force_reg (mode, operands[3]); |
| |
| if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) |
| { |
| rtx tmp = gen_reg_rtx (mode); |
| emit_move_insn (tmp, operands[3]); |
| operands[3] = tmp; |
| } |
| if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) |
| { |
| rtx tmp = gen_reg_rtx (mode); |
| emit_move_insn (tmp, operands[2]); |
| operands[2] = tmp; |
| } |
| |
| if (! register_operand (operands[2], VOIDmode) |
| && (mode == QImode |
| || ! register_operand (operands[3], VOIDmode))) |
| operands[2] = force_reg (mode, operands[2]); |
| |
| if (mode == QImode |
| && ! register_operand (operands[3], VOIDmode)) |
| operands[3] = force_reg (mode, operands[3]); |
| |
| emit_insn (compare_seq); |
| emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
| gen_rtx_IF_THEN_ELSE (mode, |
| compare_op, operands[2], |
| operands[3]))); |
| if (bypass_test) |
| emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), |
| gen_rtx_IF_THEN_ELSE (mode, |
| bypass_test, |
| copy_rtx (operands[3]), |
| copy_rtx (operands[0])))); |
| if (second_test) |
| emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), |
| gen_rtx_IF_THEN_ELSE (mode, |
| second_test, |
| copy_rtx (operands[2]), |
| copy_rtx (operands[0])))); |
| |
| return 1; /* DONE */ |
| } |
| |
| /* Swap, force into registers, or otherwise massage the two operands |
| to an sse comparison with a mask result. Thus we differ a bit from |
| ix86_prepare_fp_compare_args which expects to produce a flags result. |
| |
| The DEST operand exists to help determine whether to commute commutative |
| operators. The POP0/POP1 operands are updated in place. The new |
| comparison code is returned, or UNKNOWN if not implementable. */ |
| |
| static enum rtx_code |
| ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, |
| rtx *pop0, rtx *pop1) |
| { |
| rtx tmp; |
| |
| switch (code) |
| { |
| case LTGT: |
| case UNEQ: |
| /* We have no LTGT as an operator. We could implement it with |
| NE & ORDERED, but this requires an extra temporary. It's |
| not clear that it's worth it. */ |
| return UNKNOWN; |
| |
| case LT: |
| case LE: |
| case UNGT: |
| case UNGE: |
| /* These are supported directly. */ |
| break; |
| |
| case EQ: |
| case NE: |
| case UNORDERED: |
| case ORDERED: |
| /* For commutative operators, try to canonicalize the destination |
| operand to be first in the comparison - this helps reload to |
| avoid extra moves. */ |
| if (!dest || !rtx_equal_p (dest, *pop1)) |
| break; |
| /* FALLTHRU */ |
| |
| case GE: |
| case GT: |
| case UNLE: |
| case UNLT: |
| /* These are not supported directly. Swap the comparison operands |
| to transform into something that is supported. */ |
| tmp = *pop0; |
| *pop0 = *pop1; |
| *pop1 = tmp; |
| code = swap_condition (code); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return code; |
| } |
| |
| /* Detect conditional moves that exactly match min/max operational |
| semantics. Note that this is IEEE safe, as long as we don't |
| interchange the operands. |
| |
| Returns FALSE if this conditional move doesn't match a MIN/MAX, |
| and TRUE if the operation is successful and instructions are emitted. */ |
| |
| static bool |
| ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, |
| rtx cmp_op1, rtx if_true, rtx if_false) |
| { |
| enum machine_mode mode; |
| bool is_min; |
| rtx tmp; |
| |
| if (code == LT) |
| ; |
| else if (code == UNGE) |
| { |
| tmp = if_true; |
| if_true = if_false; |
| if_false = tmp; |
| } |
| else |
| return false; |
| |
| if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) |
| is_min = true; |
| else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) |
| is_min = false; |
| else |
| return false; |
| |
| mode = GET_MODE (dest); |
| |
| /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, |
| but MODE may be a vector mode and thus not appropriate. */ |
| if (!flag_finite_math_only || !flag_unsafe_math_optimizations) |
| { |
| int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; |
| rtvec v; |
| |
| if_true = force_reg (mode, if_true); |
| v = gen_rtvec (2, if_true, if_false); |
| tmp = gen_rtx_UNSPEC (mode, v, u); |
| } |
| else |
| { |
| code = is_min ? SMIN : SMAX; |
| tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); |
| } |
| |
| emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); |
| return true; |
| } |
| |
| /* Expand an sse vector comparison. Return the register with the result. */ |
| |
| static rtx |
| ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, |
| rtx op_true, rtx op_false) |
| { |
| enum machine_mode mode = GET_MODE (dest); |
| rtx x; |
| |
| cmp_op0 = force_reg (mode, cmp_op0); |
| if (!nonimmediate_operand (cmp_op1, mode)) |
| cmp_op1 = force_reg (mode, cmp_op1); |
| |
| if (optimize |
| || reg_overlap_mentioned_p (dest, op_true) |
| || reg_overlap_mentioned_p (dest, op_false)) |
| dest = gen_reg_rtx (mode); |
| |
| x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| |
| return dest; |
| } |
| |
| /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical |
| operations. This is used for both scalar and vector conditional moves. */ |
| |
| static void |
| ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) |
| { |
| enum machine_mode mode = GET_MODE (dest); |
| rtx t2, t3, x; |
| |
| if (op_false == CONST0_RTX (mode)) |
| { |
| op_true = force_reg (mode, op_true); |
| x = gen_rtx_AND (mode, cmp, op_true); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| else if (op_true == CONST0_RTX (mode)) |
| { |
| op_false = force_reg (mode, op_false); |
| x = gen_rtx_NOT (mode, cmp); |
| x = gen_rtx_AND (mode, x, op_false); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| else |
| { |
| op_true = force_reg (mode, op_true); |
| op_false = force_reg (mode, op_false); |
| |
| t2 = gen_reg_rtx (mode); |
| if (optimize) |
| t3 = gen_reg_rtx (mode); |
| else |
| t3 = dest; |
| |
| x = gen_rtx_AND (mode, op_true, cmp); |
| emit_insn (gen_rtx_SET (VOIDmode, t2, x)); |
| |
| x = gen_rtx_NOT (mode, cmp); |
| x = gen_rtx_AND (mode, x, op_false); |
| emit_insn (gen_rtx_SET (VOIDmode, t3, x)); |
| |
| x = gen_rtx_IOR (mode, t3, t2); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
| } |
| } |
| |
| /* Expand a floating-point conditional move. Return true if successful. */ |
| |
| int |
| ix86_expand_fp_movcc (rtx operands[]) |
| { |
| enum machine_mode mode = GET_MODE (operands[0]); |
| enum rtx_code code = GET_CODE (operands[1]); |
| rtx tmp, compare_op, second_test, bypass_test; |
| |
| if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) |
| { |
| enum machine_mode cmode; |
| |
| /* Since we've no cmove for sse registers, don't force bad register |
| allocation just to gain access to it. Deny movcc when the |
| comparison mode doesn't match the move mode. */ |
| cmode = GET_MODE (ix86_compare_op0); |
| if (cmode == VOIDmode) |
| cmode = GET_MODE (ix86_compare_op1); |
| if (cmode != mode) |
| return 0; |
| |
| code = ix86_prepare_sse_fp_compare_args (operands[0], code, |
| &ix86_compare_op0, |
| &ix86_compare_op1); |
| if (code == UNKNOWN) |
| return 0; |
| |
| if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, |
| ix86_compare_op1, operands[2], |
| operands[3])) |
| return 1; |
| |
| tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, |
| ix86_compare_op1, operands[2], operands[3]); |
| ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); |
| return 1; |
| } |
| |
| /* The floating point conditional move instructions don't directly |
| support conditions resulting from a signed integer comparison. */ |
| |
| compare_op = ix86_expand_compare (code, &second_test, &bypass_test); |
| |
| /* The floating point conditional move instructions don't directly |
| support signed integer comparisons. */ |
| |
| if (!fcmov_comparison_operator (compare_op, VOIDmode)) |
| { |
| gcc_assert (!second_test && !bypass_test); |
| tmp = gen_reg_rtx (QImode); |
| ix86_expand_setcc (code, tmp); |
| code = NE; |
| ix86_compare_op0 = tmp; |
| ix86_compare_op1 = const0_rtx; |
| compare_op = ix86_expand_compare (code, &second_test, &bypass_test); |
| } |
| if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) |
| { |
| tmp = gen_reg_rtx (mode); |
| emit_move_insn (tmp, operands[3]); |
| operands[3] = tmp; |
| } |
| if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) |
| { |
| tmp = gen_reg_rtx (mode); |
| emit_move_insn (tmp, operands[2]); |
| operands[2] = tmp; |
| } |
| |
| emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
| gen_rtx_IF_THEN_ELSE (mode, compare_op, |
| operands[2], operands[3]))); |
| if (bypass_test) |
| emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
| gen_rtx_IF_THEN_ELSE (mode, bypass_test, |
| operands[3], operands[0]))); |
| if (second_test) |
| emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
| gen_rtx_IF_THEN_ELSE (mode, second_test, |
| operands[2], operands[0]))); |
| |
| return 1; |
| } |
| |
| /* Expand a floating-point vector conditional move; a vcond operation |
| rather than a movcc operation. */ |
| |
| bool |
| ix86_expand_fp_vcond (rtx operands[]) |
| { |
| enum rtx_code code = GET_CODE (operands[3]); |
| rtx cmp; |
| |
| code = ix86_prepare_sse_fp_compare_args (operands[0], code, |
| &operands[4], &operands[5]); |
| if (code == UNKNOWN) |
| return false; |
| |
| if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], |
| operands[5], operands[1], operands[2])) |
| return true; |
| |
| cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], |
| operands[1], operands[2]); |
| ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); |
| return true; |
| } |
| |
| /* Expand a signed integral vector conditional move. */ |
| |
| bool |
| ix86_expand_int_vcond (rtx operands[]) |
| { |
| enum machine_mode mode = GET_MODE (operands[0]); |
| enum rtx_code code = GET_CODE (operands[3]); |
| bool negate = false; |
| rtx x, cop0, cop1; |
| |
| cop0 = operands[4]; |
| cop1 = operands[5]; |
| |
| /* Canonicalize the comparison to EQ, GT, GTU. */ |
| switch (code) |
| { |
| case EQ: |
| case GT: |
| case GTU: |
| break; |
| |
| case NE: |
| case LE: |
| case LEU: |
| code = reverse_condition (code); |
| negate = true; |
| break; |
| |
| case GE: |
| case GEU: |
| code = reverse_condition (code); |
| negate = true; |
| /* FALLTHRU */ |
| |
| case LT: |
| case LTU: |
| code = swap_condition (code); |
| x = cop0, cop0 = cop1, cop1 = x; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Only SSE4.1/SSE4.2 supports V2DImode. */ |
| if (mode == V2DImode) |
| { |
| switch (code) |
| { |
| case EQ: |
| /* SSE4.1 supports EQ. */ |
| if (!TARGET_SSE4_1) |
| return false; |
| break; |
| |
| case GT: |
| case GTU: |
| /* SSE4.2 supports GT/GTU. */ |
| if (!TARGET_SSE4_2) |
| return false; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* Unsigned parallel compare is not supported by the hardware. Play some |
| tricks to turn this into a signed comparison against 0. */ |
| if (code == GTU) |
| { |
| cop0 = force_reg (mode, cop0); |
| |
| switch (mode) |
| { |
| case V4SImode: |
| { |
| rtx t1, t2, mask; |
| |
| /* Perform a parallel modulo subtraction. */ |
| t1 = gen_reg_rtx (mode); |
| emit_insn (gen_subv4si3 (t1, cop0, cop1)); |
| |
| /* Extract the original sign bit of op0. */ |
| mask = GEN_INT (-0x80000000); |
| mask = gen_rtx_CONST_VECTOR (mode, |
| gen_rtvec (4, mask, mask, mask, mask)); |
| mask = force_reg (mode, mask); |
| t2 = gen_reg_rtx (mode); |
| emit_insn (gen_andv4si3 (t2, cop0, mask)); |
| |
| /* XOR it back into the result of the subtraction. This results |
| in the sign bit set iff we saw unsigned underflow. */ |
| x = gen_reg_rtx (mode); |
| emit_insn (gen_xorv4si3 (x, t1, t2)); |
| |
| code = GT; |
| } |
| break; |
| |
| case V16QImode: |
| case V8HImode: |
| /* Perform a parallel unsigned saturating subtraction. */ |
| x = gen_reg_rtx (mode); |
| emit_insn (gen_rtx_SET (VOIDmode, x, |
| gen_rtx_US_MINUS (mode, cop0, cop1))); |
| |
| code = EQ; |
| negate = !negate; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| cop0 = x; |
| cop1 = CONST0_RTX (mode); |
| } |
| |
| x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, |
| operands[1+negate], operands[2-negate]); |
| |
| ix86_expand_sse_movcc (operands[0], x, operands[1+negate], |
| operands[2-negate]); |
| return true; |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is |
| true if we should do zero extension, else sign extension. HIGH_P is |
| true if we want the N/2 high elements, else the low elements. */ |
| |
| void |
| ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) |
| { |
| enum machine_mode imode = GET_MODE (operands[1]); |
| rtx (*unpack)(rtx, rtx, rtx); |
| rtx se, dest; |
| |
| switch (imode) |
| { |
| case V16QImode: |
| if (high_p) |
| unpack = gen_vec_interleave_highv16qi; |
| else |
| unpack = gen_vec_interleave_lowv16qi; |
| break; |
| case V8HImode: |
| if (high_p) |
| unpack = gen_vec_interleave_highv8hi; |
| else |
| unpack = gen_vec_interleave_lowv8hi; |
| break; |
| case V4SImode: |
| if (high_p) |
| unpack = gen_vec_interleave_highv4si; |
| else |
| unpack = gen_vec_interleave_lowv4si; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| dest = gen_lowpart (imode, operands[0]); |
| |
| if (unsigned_p) |
| se = force_reg (imode, CONST0_RTX (imode)); |
| else |
| se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), |
| operands[1], pc_rtx, pc_rtx); |
| |
| emit_insn (unpack (dest, operands[1], se)); |
| } |
| |
| /* This function performs the same task as ix86_expand_sse_unpack, |
| but with SSE4.1 instructions. */ |
| |
| void |
| ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) |
| { |
| enum machine_mode imode = GET_MODE (operands[1]); |
| rtx (*unpack)(rtx, rtx); |
| rtx src, dest; |
| |
| switch (imode) |
| { |
| case V16QImode: |
| if (unsigned_p) |
| unpack = gen_sse4_1_zero_extendv8qiv8hi2; |
| else |
| unpack = gen_sse4_1_extendv8qiv8hi2; |
| break; |
| case V8HImode: |
| if (unsigned_p) |
| unpack = gen_sse4_1_zero_extendv4hiv4si2; |
| else |
| unpack = gen_sse4_1_extendv4hiv4si2; |
| break; |
| case V4SImode: |
| if (unsigned_p) |
| unpack = gen_sse4_1_zero_extendv2siv2di2; |
| else |
| unpack = gen_sse4_1_extendv2siv2di2; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| dest = operands[0]; |
| if (high_p) |
| { |
| /* Shift higher 8 bytes to lower 8 bytes. */ |
| src = gen_reg_rtx (imode); |
| emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), |
| gen_lowpart (TImode, operands[1]), |
| GEN_INT (64))); |
| } |
| else |
| src = operands[1]; |
| |
| emit_insn (unpack (dest, src)); |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* Expand conditional increment or decrement using adb/sbb instructions. |
| The default case using setcc followed by the conditional move can be |
| done by generic code. */ |
| int |
| ix86_expand_int_addcc (rtx operands[]) |
| { |
| enum rtx_code code = GET_CODE (operands[1]); |
| rtx compare_op; |
| rtx val = const0_rtx; |
| bool fpcmp = false; |
| enum machine_mode mode = GET_MODE (operands[0]); |
| |
| if (operands[3] != const1_rtx |
| && operands[3] != constm1_rtx) |
| return 0; |
| if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, |
| ix86_compare_op1, &compare_op)) |
| return 0; |
| code = GET_CODE (compare_op); |
| |
| if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode |
| || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) |
| { |
| fpcmp = true; |
| code = ix86_fp_compare_code_to_integer (code); |
| } |
| |
| if (code != LTU) |
| { |
| val = constm1_rtx; |
| if (fpcmp) |
| PUT_CODE (compare_op, |
| reverse_condition_maybe_unordered |
| (GET_CODE (compare_op))); |
| else |
| PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); |
| } |
| PUT_MODE (compare_op, mode); |
| |
| /* Construct either adc or sbb insn. */ |
| if ((code == LTU) == (operands[3] == constm1_rtx)) |
| { |
| switch (GET_MODE (operands[0])) |
| { |
| case QImode: |
| emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); |
| break; |
| case HImode: |
| emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); |
| break; |
| case SImode: |
| emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); |
| break; |
| case DImode: |
| emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| else |
| { |
| switch (GET_MODE (operands[0])) |
| { |
| case QImode: |
| emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); |
| break; |
| case HImode: |
| emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); |
| break; |
| case SImode: |
| emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); |
| break; |
| case DImode: |
| emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| return 1; /* DONE */ |
| } |
| |
| |
| /* Split operands 0 and 1 into SImode parts. Similar to split_di, but |
| works for floating pointer parameters and nonoffsetable memories. |
| For pushes, it returns just stack offsets; the values will be saved |
| in the right order. Maximally three parts are generated. */ |
| |
| static int |
| ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) |
| { |
| int size; |
| |
| if (!TARGET_64BIT) |
| size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; |
| else |
| size = (GET_MODE_SIZE (mode) + 4) / 8; |
| |
| gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); |
| gcc_assert (size >= 2 && size <= 3); |
| |
| /* Optimize constant pool reference to immediates. This is used by fp |
| moves, that force all constants to memory to allow combining. */ |
| if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) |
| { |
| rtx tmp = maybe_get_pool_constant (operand); |
| if (tmp) |
| operand = tmp; |
| } |
| |
| if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) |
| { |
| /* The only non-offsetable memories we handle are pushes. */ |
| int ok = push_operand (operand, VOIDmode); |
| |
| gcc_assert (ok); |
| |
| operand = copy_rtx (operand); |
| PUT_MODE (operand, Pmode); |
| parts[0] = parts[1] = parts[2] = operand; |
| return size; |
| } |
| |
| if (GET_CODE (operand) == CONST_VECTOR) |
| { |
| enum machine_mode imode = int_mode_for_mode (mode); |
| /* Caution: if we looked through a constant pool memory above, |
| the operand may actually have a different mode now. That's |
| ok, since we want to pun this all the way back to an integer. */ |
| operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); |
| gcc_assert (operand != NULL); |
| mode = imode; |
| } |
| |
| if (!TARGET_64BIT) |
| { |
| if (mode == DImode) |
| split_di (&operand, 1, &parts[0], &parts[1]); |
| else |
| { |
| if (REG_P (operand)) |
| { |
| gcc_assert (reload_completed); |
| parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); |
| parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); |
| if (size == 3) |
| parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); |
| } |
| else if (offsettable_memref_p (operand)) |
| { |
| operand = adjust_address (operand, SImode, 0); |
| parts[0] = operand; |
| parts[1] = adjust_address (operand, SImode, 4); |
| if (size == 3) |
| parts[2] = adjust_address (operand, SImode, 8); |
| } |
| else if (GET_CODE (operand) == CONST_DOUBLE) |
| { |
| REAL_VALUE_TYPE r; |
| long l[4]; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, operand); |
| switch (mode) |
| { |
| case XFmode: |
| REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); |
| parts[2] = gen_int_mode (l[2], SImode); |
| break; |
| case DFmode: |
| REAL_VALUE_TO_TARGET_DOUBLE (r, l); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| parts[1] = gen_int_mode (l[1], SImode); |
| parts[0] = gen_int_mode (l[0], SImode); |
| } |
| else |
| gcc_unreachable (); |
| } |
| } |
| else |
| { |
| if (mode == TImode) |
| split_ti (&operand, 1, &parts[0], &parts[1]); |
| if (mode == XFmode || mode == TFmode) |
| { |
| enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; |
| if (REG_P (operand)) |
| { |
| gcc_assert (reload_completed); |
| parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); |
| parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); |
| } |
| else if (offsettable_memref_p (operand)) |
| { |
| operand = adjust_address (operand, DImode, 0); |
| parts[0] = operand; |
| parts[1] = adjust_address (operand, upper_mode, 8); |
| } |
| else if (GET_CODE (operand) == CONST_DOUBLE) |
| { |
| REAL_VALUE_TYPE r; |
| long l[4]; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, operand); |
| real_to_target (l, &r, mode); |
| |
| /* Do not use shift by 32 to avoid warning on 32bit systems. */ |
| if (HOST_BITS_PER_WIDE_INT >= 64) |
| parts[0] |
| = gen_int_mode |
| ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) |
| + ((((HOST_WIDE_INT) l[1]) << 31) << 1), |
| DImode); |
| else |
| parts[0] = immed_double_const (l[0], l[1], DImode); |
| |
| if (upper_mode == SImode) |
| parts[1] = gen_int_mode (l[2], SImode); |
| else if (HOST_BITS_PER_WIDE_INT >= 64) |
| parts[1] |
| = gen_int_mode |
| ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) |
| + ((((HOST_WIDE_INT) l[3]) << 31) << 1), |
| DImode); |
| else |
| parts[1] = immed_double_const (l[2], l[3], DImode); |
| } |
| else |
| gcc_unreachable (); |
| } |
| } |
| |
| return size; |
| } |
| |
| /* Emit insns to perform a move or push of DI, DF, and XF values. |
| Return false when normal moves are needed; true when all required |
| insns have been emitted. Operands 2-4 contain the input values |
| int the correct order; operands 5-7 contain the output values. */ |
| |
| void |
| ix86_split_long_move (rtx operands[]) |
| { |
| rtx part[2][3]; |
| int nparts; |
| int push = 0; |
| int collisions = 0; |
| enum machine_mode mode = GET_MODE (operands[0]); |
| |
| /* The DFmode expanders may ask us to move double. |
| For 64bit target this is single move. By hiding the fact |
| here we simplify i386.md splitters. */ |
| if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) |
| { |
| /* Optimize constant pool reference to immediates. This is used by |
| fp moves, that force all constants to memory to allow combining. */ |
| |
| if (GET_CODE (operands[1]) == MEM |
| && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) |
| operands[1] = get_pool_constant (XEXP (operands[1], 0)); |
| if (push_operand (operands[0], VOIDmode)) |
| { |
| operands[0] = copy_rtx (operands[0]); |
| PUT_MODE (operands[0], Pmode); |
| } |
| else |
| operands[0] = gen_lowpart (DImode, operands[0]); |
| operands[1] = gen_lowpart (DImode, operands[1]); |
| emit_move_insn (operands[0], operands[1]); |
| return; |
| } |
| |
| /* The only non-offsettable memory we handle is push. */ |
| if (push_operand (operands[0], VOIDmode)) |
| push = 1; |
| else |
| gcc_assert (GET_CODE (operands[0]) != MEM |
| || offsettable_memref_p (operands[0])); |
| |
| nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); |
| ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); |
| |
| /* When emitting push, take care for source operands on the stack. */ |
| if (push && GET_CODE (operands[1]) == MEM |
| && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) |
| { |
| /* APPLE LOCAL begin 4099768 */ |
| if (nparts == 3 && TARGET_128BIT_LONG_DOUBLE && mode == XFmode) |
| part[1][2] = adjust_address (part[1][2], SImode, 4); |
| /* APPLE LOCAL end 4099768 */ |
| if (nparts == 3) |
| part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), |
| XEXP (part[1][2], 0)); |
| part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), |
| XEXP (part[1][1], 0)); |
| } |
| |
| /* We need to do copy in the right order in case an address register |
| of the source overlaps the destination. */ |
| if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) |
| { |
| if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) |
| collisions++; |
| if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) |
| collisions++; |
| if (nparts == 3 |
| && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) |
| collisions++; |
| |
| /* Collision in the middle part can be handled by reordering. */ |
| if (collisions == 1 && nparts == 3 |
| && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) |
| { |
| rtx tmp; |
| tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; |
| tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; |
| } |
| |
| /* If there are more collisions, we can't handle it by reordering. |
| Do an lea to the last part and use only one colliding move. */ |
| else if (collisions > 1) |
| { |
| rtx base; |
| |
| collisions = 1; |
| |
| base = part[0][nparts - 1]; |
| |
| /* Handle the case when the last part isn't valid for lea. |
| Happens in 64-bit mode storing the 12-byte XFmode. */ |
| if (GET_MODE (base) != Pmode) |
| base = gen_rtx_REG (Pmode, REGNO (base)); |
| |
| emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); |
| part[1][0] = replace_equiv_address (part[1][0], base); |
| part[1][1] = replace_equiv_address (part[1][1], |
| plus_constant (base, UNITS_PER_WORD)); |
| if (nparts == 3) |
| part[1][2] = replace_equiv_address (part[1][2], |
| plus_constant (base, 8)); |
| } |
| } |
| |
| if (push) |
| { |
| if (!TARGET_64BIT) |
| { |
| if (nparts == 3) |
| { |
| if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) |
| emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); |
| emit_move_insn (part[0][2], part[1][2]); |
| } |
| } |
| else |
| { |
| /* In 64bit mode we don't have 32bit push available. In case this is |
| register, it is OK - we will just use larger counterpart. We also |
| retype memory - these comes from attempt to avoid REX prefix on |
| moving of second half of TFmode value. */ |
| if (GET_MODE (part[1][1]) == SImode) |
| { |
| switch (GET_CODE (part[1][1])) |
| { |
| case MEM: |
| part[1][1] = adjust_address (part[1][1], DImode, 0); |
| break; |
| |
| case REG: |
| part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (GET_MODE (part[1][0]) == SImode) |
| part[1][0] = part[1][1]; |
| } |
| } |
| emit_move_insn (part[0][1], part[1][1]); |
| emit_move_insn (part[0][0], part[1][0]); |
| return; |
| } |
| |
| /* Choose correct order to not overwrite the source before it is copied. */ |
| if ((REG_P (part[0][0]) |
| && REG_P (part[1][1]) |
| && (REGNO (part[0][0]) == REGNO (part[1][1]) |
| || (nparts == 3 |
| && REGNO (part[0][0]) == REGNO (part[1][2])))) |
| || (collisions > 0 |
| && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) |
| { |
| if (nparts == 3) |
| { |
| operands[2] = part[0][2]; |
| operands[3] = part[0][1]; |
| operands[4] = part[0][0]; |
| operands[5] = part[1][2]; |
| operands[6] = part[1][1]; |
| operands[7] = part[1][0]; |
| } |
| else |
| { |
| operands[2] = part[0][1]; |
| operands[3] = part[0][0]; |
| operands[5] = part[1][1]; |
| operands[6] = part[1][0]; |
| } |
| } |
| else |
| { |
| if (nparts == 3) |
| { |
| operands[2] = part[0][0]; |
| operands[3] = part[0][1]; |
| operands[4] = part[0][2]; |
| operands[5] = part[1][0]; |
| operands[6] = part[1][1]; |
| operands[7] = part[1][2]; |
| } |
| else |
| { |
| operands[2] = part[0][0]; |
| operands[3] = part[0][1]; |
| operands[5] = part[1][0]; |
| operands[6] = part[1][1]; |
| } |
| } |
| |
| /* If optimizing for size, attempt to locally unCSE nonzero constants. */ |
| if (optimize_size) |
| { |
| if (GET_CODE (operands[5]) == CONST_INT |
| && operands[5] != const0_rtx |
| && REG_P (operands[2])) |
| { |
| if (GET_CODE (operands[6]) == CONST_INT |
| && INTVAL (operands[6]) == INTVAL (operands[5])) |
| operands[6] = operands[2]; |
| |
| if (nparts == 3 |
| && GET_CODE (operands[7]) == CONST_INT |
| && INTVAL (operands[7]) == INTVAL (operands[5])) |
| operands[7] = operands[2]; |
| } |
| |
| if (nparts == 3 |
| && GET_CODE (operands[6]) == CONST_INT |
| && operands[6] != const0_rtx |
| && REG_P (operands[3]) |
| && GET_CODE (operands[7]) == CONST_INT |
| && INTVAL (operands[7]) == INTVAL (operands[6])) |
| operands[7] = operands[3]; |
| } |
| |
| emit_move_insn (operands[2], operands[5]); |
| emit_move_insn (operands[3], operands[6]); |
| if (nparts == 3) |
| emit_move_insn (operands[4], operands[7]); |
| |
| return; |
| } |
| |
| /* Helper function of ix86_split_ashl used to generate an SImode/DImode |
| left shift by a constant, either using a single shift or |
| a sequence of add instructions. */ |
| |
| static void |
| ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) |
| { |
| if (count == 1) |
| { |
| emit_insn ((mode == DImode |
| ? gen_addsi3 |
| : gen_adddi3) (operand, operand, operand)); |
| } |
| else if (!optimize_size |
| && count * ix86_cost->add <= ix86_cost->shift_const) |
| { |
| int i; |
| for (i=0; i<count; i++) |
| { |
| emit_insn ((mode == DImode |
| ? gen_addsi3 |
| : gen_adddi3) (operand, operand, operand)); |
| } |
| } |
| else |
| emit_insn ((mode == DImode |
| ? gen_ashlsi3 |
| : gen_ashldi3) (operand, operand, GEN_INT (count))); |
| } |
| |
| void |
| ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) |
| { |
| rtx low[2], high[2]; |
| int count; |
| const int single_width = mode == DImode ? 32 : 64; |
| |
| if (GET_CODE (operands[2]) == CONST_INT) |
| { |
| (mode == DImode ? split_di : split_ti) (operands, 2, low, high); |
| count = INTVAL (operands[2]) & (single_width * 2 - 1); |
| |
| if (count >= single_width) |
| { |
| emit_move_insn (high[0], low[1]); |
| emit_move_insn (low[0], const0_rtx); |
| |
| if (count > single_width) |
| ix86_expand_ashl_const (high[0], count - single_width, mode); |
| } |
| else |
| { |
| if (!rtx_equal_p (operands[0], operands[1])) |
| emit_move_insn (operands[0], operands[1]); |
| emit_insn ((mode == DImode |
| ? gen_x86_shld_1 |
| : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); |
| ix86_expand_ashl_const (low[0], count, mode); |
| } |
| return; |
| } |
| |
| (mode == DImode ? split_di : split_ti) (operands, 1, low, high); |
| |
| if (operands[1] == const1_rtx) |
| { |
| /* Assuming we've chosen a QImode capable registers, then 1 << N |
| can be done with two 32/64-bit shifts, no branches, no cmoves. */ |
| if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) |
| { |
| rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); |
| |
| ix86_expand_clear (low[0]); |
| ix86_expand_clear (high[0]); |
| emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); |
| |
| d = gen_lowpart (QImode, low[0]); |
| d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); |
| s = gen_rtx_EQ (QImode, flags, const0_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, d, s)); |
| |
| d = gen_lowpart (QImode, high[0]); |
| d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); |
| s = gen_rtx_NE (QImode, flags, const0_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, d, s)); |
| } |
| |
| /* Otherwise, we can get the same results by manually performing |
| a bit extract operation on bit 5/6, and then performing the two |
| shifts. The two methods of getting 0/1 into low/high are exactly |
| the same size. Avoiding the shift in the bit extract case helps |
| pentium4 a bit; no one else seems to care much either way. */ |
| else |
| { |
| rtx x; |
| |
| if (TARGET_PARTIAL_REG_STALL && !optimize_size) |
| x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); |
| else |
| x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); |
| emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); |
| |
| emit_insn ((mode == DImode |
| ? gen_lshrsi3 |
| : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); |
| emit_insn ((mode == DImode |
| ? gen_andsi3 |
| : gen_anddi3) (high[0], high[0], GEN_INT (1))); |
| emit_move_insn (low[0], high[0]); |
| emit_insn ((mode == DImode |
| ? gen_xorsi3 |
| : gen_xordi3) (low[0], low[0], GEN_INT (1))); |
| } |
| |
| emit_insn ((mode == DImode |
| ? gen_ashlsi3 |
| : gen_ashldi3) (low[0], low[0], operands[2])); |
| emit_insn ((mode == DImode |
| ? gen_ashlsi3 |
| : gen_ashldi3) (high[0], high[0], operands[2])); |
| return; |
| } |
| |
| if (operands[1] == constm1_rtx) |
| { |
| /* For -1 << N, we can avoid the shld instruction, because we |
| know that we're shifting 0...31/63 ones into a -1. */ |
| emit_move_insn (low[0], constm1_rtx); |
| if (optimize_size) |
| emit_move_insn (high[0], low[0]); |
| else |
| emit_move_insn (high[0], constm1_rtx); |
| } |
| else |
| { |
| if (!rtx_equal_p (operands[0], operands[1])) |
| emit_move_insn (operands[0], operands[1]); |
| |
| (mode == DImode ? split_di : split_ti) (operands, 1, low, high); |
| emit_insn ((mode == DImode |
| ? gen_x86_shld_1 |
| : gen_x86_64_shld) (high[0], low[0], operands[2])); |
| } |
| |
| emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); |
| |
| if (TARGET_CMOVE && scratch) |
| { |
| ix86_expand_clear (scratch); |
| emit_insn ((mode == DImode |
| ? gen_x86_shift_adj_1 |
| : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); |
| } |
| else |
| emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); |
| } |
| |
| void |
| ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) |
| { |
| rtx low[2], high[2]; |
| int count; |
| const int single_width = mode == DImode ? 32 : 64; |
| |
| if (GET_CODE (operands[2]) == CONST_INT) |
| { |
| (mode == DImode ? split_di : split_ti) (operands, 2, low, high); |
| count = INTVAL (operands[2]) & (single_width * 2 - 1); |
| |
| if (count == single_width * 2 - 1) |
| { |
| emit_move_insn (high[0], high[1]); |
| emit_insn ((mode == DImode |
| ? gen_ashrsi3 |
| : gen_ashrdi3) (high[0], high[0], |
| GEN_INT (single_width - 1))); |
| emit_move_insn (low[0], high[0]); |
| |
| } |
| else if (count >= single_width) |
| { |
| emit_move_insn (low[0], high[1]); |
| emit_move_insn (high[0], low[0]); |
| emit_insn ((mode == DImode |
| ? gen_ashrsi3 |
| : gen_ashrdi3) (high[0], high[0], |
| GEN_INT (single_width - 1))); |
| if (count > single_width) |
| emit_insn ((mode == DImode |
| ? gen_ashrsi3 |
| : gen_ashrdi3) (low[0], low[0], |
| GEN_INT (count - single_width))); |
| } |
| else |
| { |
| if (!rtx_equal_p (operands[0], operands[1])) |
| emit_move_insn (operands[0], operands[1]); |
| emit_insn ((mode == DImode |
| ? gen_x86_shrd_1 |
| : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); |
| emit_insn ((mode == DImode |
| ? gen_ashrsi3 |
| : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); |
| } |
| } |
| else |
| { |
| if (!rtx_equal_p (operands[0], operands[1])) |
| emit_move_insn (operands[0], operands[1]); |
| |
| (mode == DImode ? split_di : split_ti) (operands, 1, low, high); |
| |
| emit_insn ((mode == DImode |
| ? gen_x86_shrd_1 |
| : gen_x86_64_shrd) (low[0], high[0], operands[2])); |
| emit_insn ((mode == DImode |
| ? gen_ashrsi3 |
| : gen_ashrdi3) (high[0], high[0], operands[2])); |
| |
| if (TARGET_CMOVE && scratch) |
| { |
| emit_move_insn (scratch, high[0]); |
| emit_insn ((mode == DImode |
| ? gen_ashrsi3 |
| : gen_ashrdi3) (scratch, scratch, |
| GEN_INT (single_width - 1))); |
| emit_insn ((mode == DImode |
| ? gen_x86_shift_adj_1 |
| : gen_x86_64_shift_adj) (low[0], high[0], operands[2], |
| scratch)); |
| } |
| else |
| emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); |
| } |
| } |
| |
| void |
| ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) |
| { |
| rtx low[2], high[2]; |
| int count; |
| const int single_width = mode == DImode ? 32 : 64; |
| |
| if (GET_CODE (operands[2]) == CONST_INT) |
| { |
| (mode == DImode ? split_di : split_ti) (operands, 2, low, high); |
| count = INTVAL (operands[2]) & (single_width * 2 - 1); |
| |
| if (count >= single_width) |
| { |
| emit_move_insn (low[0], high[1]); |
| ix86_expand_clear (high[0]); |
| |
| if (count > single_width) |
| emit_insn ((mode == DImode |
| ? gen_lshrsi3 |
| : gen_lshrdi3) (low[0], low[0], |
| GEN_INT (count - single_width))); |
| } |
| else |
| { |
| if (!rtx_equal_p (operands[0], operands[1])) |
| emit_move_insn (operands[0], operands[1]); |
| emit_insn ((mode == DImode |
| ? gen_x86_shrd_1 |
| : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); |
| emit_insn ((mode == DImode |
| ? gen_lshrsi3 |
| : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); |
| } |
| } |
| else |
| { |
| if (!rtx_equal_p (operands[0], operands[1])) |
| emit_move_insn (operands[0], operands[1]); |
| |
| (mode == DImode ? split_di : split_ti) (operands, 1, low, high); |
| |
| emit_insn ((mode == DImode |
| ? gen_x86_shrd_1 |
| : gen_x86_64_shrd) (low[0], high[0], operands[2])); |
| emit_insn ((mode == DImode |
| ? gen_lshrsi3 |
| : gen_lshrdi3) (high[0], high[0], operands[2])); |
| |
| /* Heh. By reversing the arguments, we can reuse this pattern. */ |
| if (TARGET_CMOVE && scratch) |
| { |
| ix86_expand_clear (scratch); |
| emit_insn ((mode == DImode |
| ? gen_x86_shift_adj_1 |
| : gen_x86_64_shift_adj) (low[0], high[0], operands[2], |
| scratch)); |
| } |
| else |
| emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); |
| } |
| } |
| |
| /* Helper function for the string operations below. Dest VARIABLE whether |
| it is aligned to VALUE bytes. If true, jump to the label. */ |
| static rtx |
| ix86_expand_aligntest (rtx variable, int value) |
| { |
| rtx label = gen_label_rtx (); |
| rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); |
| if (GET_MODE (variable) == DImode) |
| emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); |
| else |
| emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); |
| emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), |
| 1, label); |
| return label; |
| } |
| |
| /* Adjust COUNTER by the VALUE. */ |
| static void |
| ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) |
| { |
| if (GET_MODE (countreg) == DImode) |
| emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); |
| else |
| emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); |
| } |
| |
| /* Zero extend possibly SImode EXP to Pmode register. */ |
| rtx |
| ix86_zero_extend_to_Pmode (rtx exp) |
| { |
| rtx r; |
| if (GET_MODE (exp) == VOIDmode) |
| return force_reg (Pmode, exp); |
| if (GET_MODE (exp) == Pmode) |
| return copy_to_mode_reg (Pmode, exp); |
| r = gen_reg_rtx (Pmode); |
| emit_insn (gen_zero_extendsidi2 (r, exp)); |
| return r; |
| } |
| |
| /* Expand string move (memcpy) operation. Use i386 string operations when |
| profitable. expand_clrmem contains similar code. */ |
| int |
| ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) |
| { |
| rtx srcreg, destreg, countreg, srcexp, destexp; |
| enum machine_mode counter_mode; |
| HOST_WIDE_INT align = 0; |
| unsigned HOST_WIDE_INT count = 0; |
| |
| if (GET_CODE (align_exp) == CONST_INT) |
| align = INTVAL (align_exp); |
| |
| /* Can't use any of this if the user has appropriated esi or edi. */ |
| if (global_regs[4] || global_regs[5]) |
| return 0; |
| |
| /* This simple hack avoids all inlining code and simplifies code below. */ |
| if (!TARGET_ALIGN_STRINGOPS) |
| align = 64; |
| |
| if (GET_CODE (count_exp) == CONST_INT) |
| { |
| count = INTVAL (count_exp); |
| if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) |
| return 0; |
| } |
| |
| /* Figure out proper mode for counter. For 32bits it is always SImode, |
| for 64bits use SImode when possible, otherwise DImode. |
| Set count to number of bytes copied when known at compile time. */ |
| if (!TARGET_64BIT |
| || GET_MODE (count_exp) == SImode |
| || x86_64_zext_immediate_operand (count_exp, VOIDmode)) |
| counter_mode = SImode; |
| else |
| counter_mode = DImode; |
| |
| gcc_assert (counter_mode == SImode || counter_mode == DImode); |
| |
| destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); |
| if (destreg != XEXP (dst, 0)) |
| dst = replace_equiv_address_nv (dst, destreg); |
| srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); |
| if (srcreg != XEXP (src, 0)) |
| src = replace_equiv_address_nv (src, srcreg); |
| |
| /* When optimizing for size emit simple rep ; movsb instruction for |
| counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? |
| sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. |
| Sice of (movsl;)*(movsw;)?(movsb;)? sequence is |
| count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, |
| but we don't know whether upper 24 (resp. 56) bits of %ecx will be |
| known to be zero or not. The rep; movsb sequence causes higher |
| register pressure though, so take that into account. */ |
| |
| if ((!optimize || optimize_size) |
| && (count == 0 |
| || ((count & 0x03) |
| && (!optimize_size |
| || count > 5 * 4 |
| || (count & 3) + count / 4 > 6)))) |
| { |
| emit_insn (gen_cld ()); |
| countreg = ix86_zero_extend_to_Pmode (count_exp); |
| destexp = gen_rtx_PLUS (Pmode, destreg, countreg); |
| srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); |
| emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, |
| destexp, srcexp)); |
| } |
| |
| /* For constant aligned (or small unaligned) copies use rep movsl |
| followed by code copying the rest. For PentiumPro ensure 8 byte |
| alignment to allow rep movsl acceleration. */ |
| |
| else if (count != 0 |
| && (align >= 8 |
| || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) |
| || optimize_size || count < (unsigned int) 64)) |
| { |
| unsigned HOST_WIDE_INT offset = 0; |
| int size = TARGET_64BIT && !optimize_size ? 8 : 4; |
| rtx srcmem, dstmem; |
| |
| emit_insn (gen_cld ()); |
| if (count & ~(size - 1)) |
| { |
| if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) |
| { |
| enum machine_mode movs_mode = size == 4 ? SImode : DImode; |
| |
| while (offset < (count & ~(size - 1))) |
| { |
| srcmem = adjust_automodify_address_nv (src, movs_mode, |
| srcreg, offset); |
| dstmem = adjust_automodify_address_nv (dst, movs_mode, |
| destreg, offset); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| offset += size; |
| } |
| } |
| else |
| { |
| countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) |
| & (TARGET_64BIT ? -1 : 0x3fffffff)); |
| countreg = copy_to_mode_reg (counter_mode, countreg); |
| countreg = ix86_zero_extend_to_Pmode (countreg); |
| |
| destexp = gen_rtx_ASHIFT (Pmode, countreg, |
| GEN_INT (size == 4 ? 2 : 3)); |
| srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); |
| destexp = gen_rtx_PLUS (Pmode, destexp, destreg); |
| |
| emit_insn (gen_rep_mov (destreg, dst, srcreg, src, |
| countreg, destexp, srcexp)); |
| offset = count & ~(size - 1); |
| } |
| } |
| if (size == 8 && (count & 0x04)) |
| { |
| srcmem = adjust_automodify_address_nv (src, SImode, srcreg, |
| offset); |
| dstmem = adjust_automodify_address_nv (dst, SImode, destreg, |
| offset); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| offset += 4; |
| } |
| if (count & 0x02) |
| { |
| srcmem = adjust_automodify_address_nv (src, HImode, srcreg, |
| offset); |
| dstmem = adjust_automodify_address_nv (dst, HImode, destreg, |
| offset); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| offset += 2; |
| } |
| if (count & 0x01) |
| { |
| srcmem = adjust_automodify_address_nv (src, QImode, srcreg, |
| offset); |
| dstmem = adjust_automodify_address_nv (dst, QImode, destreg, |
| offset); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| } |
| } |
| /* The generic code based on the glibc implementation: |
| - align destination to 4 bytes (8 byte alignment is used for PentiumPro |
| allowing accelerated copying there) |
| - copy the data using rep movsl |
| - copy the rest. */ |
| else |
| { |
| rtx countreg2; |
| rtx label = NULL; |
| rtx srcmem, dstmem; |
| int desired_alignment = (TARGET_PENTIUMPRO |
| && (count == 0 || count >= (unsigned int) 260) |
| ? 8 : UNITS_PER_WORD); |
| /* Get rid of MEM_OFFSETs, they won't be accurate. */ |
| dst = change_address (dst, BLKmode, destreg); |
| src = change_address (src, BLKmode, srcreg); |
| |
| /* In case we don't know anything about the alignment, default to |
| library version, since it is usually equally fast and result in |
| shorter code. |
| |
| Also emit call when we know that the count is large and call overhead |
| will not be important. */ |
| if (!TARGET_INLINE_ALL_STRINGOPS |
| && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) |
| return 0; |
| |
| if (TARGET_SINGLE_STRINGOP) |
| emit_insn (gen_cld ()); |
| |
| countreg2 = gen_reg_rtx (Pmode); |
| countreg = copy_to_mode_reg (counter_mode, count_exp); |
| |
| /* We don't use loops to align destination and to copy parts smaller |
| than 4 bytes, because gcc is able to optimize such code better (in |
| the case the destination or the count really is aligned, gcc is often |
| able to predict the branches) and also it is friendlier to the |
| hardware branch prediction. |
| |
| Using loops is beneficial for generic case, because we can |
| handle small counts using the loops. Many CPUs (such as Athlon) |
| have large REP prefix setup costs. |
| |
| This is quite costly. Maybe we can revisit this decision later or |
| add some customizability to this code. */ |
| |
| if (count == 0 && align < desired_alignment) |
| { |
| label = gen_label_rtx (); |
| emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), |
| LEU, 0, counter_mode, 1, label); |
| } |
| if (align <= 1) |
| { |
| rtx label = ix86_expand_aligntest (destreg, 1); |
| srcmem = change_address (src, QImode, srcreg); |
| dstmem = change_address (dst, QImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| ix86_adjust_counter (countreg, 1); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align <= 2) |
| { |
| rtx label = ix86_expand_aligntest (destreg, 2); |
| srcmem = change_address (src, HImode, srcreg); |
| dstmem = change_address (dst, HImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| ix86_adjust_counter (countreg, 2); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align <= 4 && desired_alignment > 4) |
| { |
| rtx label = ix86_expand_aligntest (destreg, 4); |
| srcmem = change_address (src, SImode, srcreg); |
| dstmem = change_address (dst, SImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| ix86_adjust_counter (countreg, 4); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| |
| if (label && desired_alignment > 4 && !TARGET_64BIT) |
| { |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| label = NULL_RTX; |
| } |
| if (!TARGET_SINGLE_STRINGOP) |
| emit_insn (gen_cld ()); |
| if (TARGET_64BIT) |
| { |
| emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), |
| GEN_INT (3))); |
| destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); |
| } |
| else |
| { |
| emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); |
| destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); |
| } |
| srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); |
| destexp = gen_rtx_PLUS (Pmode, destexp, destreg); |
| emit_insn (gen_rep_mov (destreg, dst, srcreg, src, |
| countreg2, destexp, srcexp)); |
| |
| if (label) |
| { |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) |
| { |
| srcmem = change_address (src, SImode, srcreg); |
| dstmem = change_address (dst, SImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| } |
| if ((align <= 4 || count == 0) && TARGET_64BIT) |
| { |
| rtx label = ix86_expand_aligntest (countreg, 4); |
| srcmem = change_address (src, SImode, srcreg); |
| dstmem = change_address (dst, SImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align > 2 && count != 0 && (count & 2)) |
| { |
| srcmem = change_address (src, HImode, srcreg); |
| dstmem = change_address (dst, HImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| } |
| if (align <= 2 || count == 0) |
| { |
| rtx label = ix86_expand_aligntest (countreg, 2); |
| srcmem = change_address (src, HImode, srcreg); |
| dstmem = change_address (dst, HImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align > 1 && count != 0 && (count & 1)) |
| { |
| srcmem = change_address (src, QImode, srcreg); |
| dstmem = change_address (dst, QImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| } |
| if (align <= 1 || count == 0) |
| { |
| rtx label = ix86_expand_aligntest (countreg, 1); |
| srcmem = change_address (src, QImode, srcreg); |
| dstmem = change_address (dst, QImode, destreg); |
| emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| } |
| |
| return 1; |
| } |
| |
| /* Expand string clear operation (bzero). Use i386 string operations when |
| profitable. expand_movmem contains similar code. */ |
| int |
| ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) |
| { |
| rtx destreg, zeroreg, countreg, destexp; |
| enum machine_mode counter_mode; |
| HOST_WIDE_INT align = 0; |
| unsigned HOST_WIDE_INT count = 0; |
| |
| if (GET_CODE (align_exp) == CONST_INT) |
| align = INTVAL (align_exp); |
| |
| /* Can't use any of this if the user has appropriated esi. */ |
| if (global_regs[4]) |
| return 0; |
| |
| /* This simple hack avoids all inlining code and simplifies code below. */ |
| if (!TARGET_ALIGN_STRINGOPS) |
| align = 32; |
| |
| if (GET_CODE (count_exp) == CONST_INT) |
| { |
| count = INTVAL (count_exp); |
| if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) |
| return 0; |
| } |
| /* Figure out proper mode for counter. For 32bits it is always SImode, |
| for 64bits use SImode when possible, otherwise DImode. |
| Set count to number of bytes copied when known at compile time. */ |
| if (!TARGET_64BIT |
| || GET_MODE (count_exp) == SImode |
| || x86_64_zext_immediate_operand (count_exp, VOIDmode)) |
| counter_mode = SImode; |
| else |
| counter_mode = DImode; |
| |
| destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); |
| if (destreg != XEXP (dst, 0)) |
| dst = replace_equiv_address_nv (dst, destreg); |
| |
| |
| /* When optimizing for size emit simple rep ; movsb instruction for |
| counts not divisible by 4. The movl $N, %ecx; rep; stosb |
| sequence is 7 bytes long, so if optimizing for size and count is |
| small enough that some stosl, stosw and stosb instructions without |
| rep are shorter, fall back into the next if. */ |
| |
| if ((!optimize || optimize_size) |
| && (count == 0 |
| || ((count & 0x03) |
| && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) |
| { |
| emit_insn (gen_cld ()); |
| |
| countreg = ix86_zero_extend_to_Pmode (count_exp); |
| zeroreg = copy_to_mode_reg (QImode, const0_rtx); |
| destexp = gen_rtx_PLUS (Pmode, destreg, countreg); |
| emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); |
| } |
| else if (count != 0 |
| && (align >= 8 |
| || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) |
| || optimize_size || count < (unsigned int) 64)) |
| { |
| int size = TARGET_64BIT && !optimize_size ? 8 : 4; |
| unsigned HOST_WIDE_INT offset = 0; |
| |
| emit_insn (gen_cld ()); |
| |
| zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); |
| if (count & ~(size - 1)) |
| { |
| unsigned HOST_WIDE_INT repcount; |
| unsigned int max_nonrep; |
| |
| repcount = count >> (size == 4 ? 2 : 3); |
| if (!TARGET_64BIT) |
| repcount &= 0x3fffffff; |
| |
| /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. |
| movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN |
| bytes. In both cases the latter seems to be faster for small |
| values of N. */ |
| max_nonrep = size == 4 ? 7 : 4; |
| if (!optimize_size) |
| switch (ix86_tune) |
| { |
| case PROCESSOR_PENTIUM4: |
| case PROCESSOR_NOCONA: |
| max_nonrep = 3; |
| break; |
| default: |
| break; |
| } |
| |
| if (repcount <= max_nonrep) |
| while (repcount-- > 0) |
| { |
| rtx mem = adjust_automodify_address_nv (dst, |
| GET_MODE (zeroreg), |
| destreg, offset); |
| emit_insn (gen_strset (destreg, mem, zeroreg)); |
| offset += size; |
| } |
| else |
| { |
| countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); |
| countreg = ix86_zero_extend_to_Pmode (countreg); |
| destexp = gen_rtx_ASHIFT (Pmode, countreg, |
| GEN_INT (size == 4 ? 2 : 3)); |
| destexp = gen_rtx_PLUS (Pmode, destexp, destreg); |
| emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, |
| destexp)); |
| offset = count & ~(size - 1); |
| } |
| } |
| if (size == 8 && (count & 0x04)) |
| { |
| rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, |
| offset); |
| emit_insn (gen_strset (destreg, mem, |
| gen_rtx_SUBREG (SImode, zeroreg, 0))); |
| offset += 4; |
| } |
| if (count & 0x02) |
| { |
| rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, |
| offset); |
| emit_insn (gen_strset (destreg, mem, |
| gen_rtx_SUBREG (HImode, zeroreg, 0))); |
| offset += 2; |
| } |
| if (count & 0x01) |
| { |
| rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, |
| offset); |
| emit_insn (gen_strset (destreg, mem, |
| gen_rtx_SUBREG (QImode, zeroreg, 0))); |
| } |
| } |
| else |
| { |
| rtx countreg2; |
| rtx label = NULL; |
| /* Compute desired alignment of the string operation. */ |
| int desired_alignment = (TARGET_PENTIUMPRO |
| && (count == 0 || count >= (unsigned int) 260) |
| ? 8 : UNITS_PER_WORD); |
| |
| /* In case we don't know anything about the alignment, default to |
| library version, since it is usually equally fast and result in |
| shorter code. |
| |
| Also emit call when we know that the count is large and call overhead |
| will not be important. */ |
| if (!TARGET_INLINE_ALL_STRINGOPS |
| && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) |
| return 0; |
| |
| if (TARGET_SINGLE_STRINGOP) |
| emit_insn (gen_cld ()); |
| |
| countreg2 = gen_reg_rtx (Pmode); |
| countreg = copy_to_mode_reg (counter_mode, count_exp); |
| zeroreg = copy_to_mode_reg (Pmode, const0_rtx); |
| /* Get rid of MEM_OFFSET, it won't be accurate. */ |
| dst = change_address (dst, BLKmode, destreg); |
| |
| if (count == 0 && align < desired_alignment) |
| { |
| label = gen_label_rtx (); |
| emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), |
| LEU, 0, counter_mode, 1, label); |
| } |
| if (align <= 1) |
| { |
| rtx label = ix86_expand_aligntest (destreg, 1); |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (QImode, zeroreg, 0))); |
| ix86_adjust_counter (countreg, 1); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align <= 2) |
| { |
| rtx label = ix86_expand_aligntest (destreg, 2); |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (HImode, zeroreg, 0))); |
| ix86_adjust_counter (countreg, 2); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align <= 4 && desired_alignment > 4) |
| { |
| rtx label = ix86_expand_aligntest (destreg, 4); |
| emit_insn (gen_strset (destreg, dst, |
| (TARGET_64BIT |
| ? gen_rtx_SUBREG (SImode, zeroreg, 0) |
| : zeroreg))); |
| ix86_adjust_counter (countreg, 4); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| |
| if (label && desired_alignment > 4 && !TARGET_64BIT) |
| { |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| label = NULL_RTX; |
| } |
| |
| if (!TARGET_SINGLE_STRINGOP) |
| emit_insn (gen_cld ()); |
| if (TARGET_64BIT) |
| { |
| emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), |
| GEN_INT (3))); |
| destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); |
| } |
| else |
| { |
| emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); |
| destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); |
| } |
| destexp = gen_rtx_PLUS (Pmode, destexp, destreg); |
| emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); |
| |
| if (label) |
| { |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| |
| if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (SImode, zeroreg, 0))); |
| if (TARGET_64BIT && (align <= 4 || count == 0)) |
| { |
| rtx label = ix86_expand_aligntest (countreg, 4); |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (SImode, zeroreg, 0))); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align > 2 && count != 0 && (count & 2)) |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (HImode, zeroreg, 0))); |
| if (align <= 2 || count == 0) |
| { |
| rtx label = ix86_expand_aligntest (countreg, 2); |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (HImode, zeroreg, 0))); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| if (align > 1 && count != 0 && (count & 1)) |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (QImode, zeroreg, 0))); |
| if (align <= 1 || count == 0) |
| { |
| rtx label = ix86_expand_aligntest (countreg, 1); |
| emit_insn (gen_strset (destreg, dst, |
| gen_rtx_SUBREG (QImode, zeroreg, 0))); |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| } |
| } |
| return 1; |
| } |
| |
| /* Expand strlen. */ |
| int |
| ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) |
| { |
| rtx addr, scratch1, scratch2, scratch3, scratch4; |
| |
| /* The generic case of strlen expander is long. Avoid it's |
| expanding unless TARGET_INLINE_ALL_STRINGOPS. */ |
| |
| if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 |
| && !TARGET_INLINE_ALL_STRINGOPS |
| && !optimize_size |
| && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) |
| return 0; |
| |
| addr = force_reg (Pmode, XEXP (src, 0)); |
| scratch1 = gen_reg_rtx (Pmode); |
| |
| if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 |
| && !optimize_size) |
| { |
| /* Well it seems that some optimizer does not combine a call like |
| foo(strlen(bar), strlen(bar)); |
| when the move and the subtraction is done here. It does calculate |
| the length just once when these instructions are done inside of |
| output_strlen_unroll(). But I think since &bar[strlen(bar)] is |
| often used and I use one fewer register for the lifetime of |
| output_strlen_unroll() this is better. */ |
| |
| emit_move_insn (out, addr); |
| |
| ix86_expand_strlensi_unroll_1 (out, src, align); |
| |
| /* strlensi_unroll_1 returns the address of the zero at the end of |
| the string, like memchr(), so compute the length by subtracting |
| the start address. */ |
| if (TARGET_64BIT) |
| emit_insn (gen_subdi3 (out, out, addr)); |
| else |
| emit_insn (gen_subsi3 (out, out, addr)); |
| } |
| else |
| { |
| rtx unspec; |
| scratch2 = gen_reg_rtx (Pmode); |
| scratch3 = gen_reg_rtx (Pmode); |
| scratch4 = force_reg (Pmode, constm1_rtx); |
| |
| emit_move_insn (scratch3, addr); |
| eoschar = force_reg (QImode, eoschar); |
| |
| emit_insn (gen_cld ()); |
| src = replace_equiv_address_nv (src, scratch3); |
| |
| /* If .md starts supporting :P, this can be done in .md. */ |
| unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, |
| scratch4), UNSPEC_SCAS); |
| emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); |
| if (TARGET_64BIT) |
| { |
| emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); |
| emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); |
| } |
| else |
| { |
| emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); |
| emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); |
| } |
| } |
| return 1; |
| } |
| |
| /* Expand the appropriate insns for doing strlen if not just doing |
| repnz; scasb |
| |
| out = result, initialized with the start address |
| align_rtx = alignment of the address. |
| scratch = scratch register, initialized with the startaddress when |
| not aligned, otherwise undefined |
| |
| This is just the body. It needs the initializations mentioned above and |
| some address computing at the end. These things are done in i386.md. */ |
| |
| static void |
| ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) |
| { |
| int align; |
| rtx tmp; |
| rtx align_2_label = NULL_RTX; |
| rtx align_3_label = NULL_RTX; |
| rtx align_4_label = gen_label_rtx (); |
| rtx end_0_label = gen_label_rtx (); |
| rtx mem; |
| rtx tmpreg = gen_reg_rtx (SImode); |
| rtx scratch = gen_reg_rtx (SImode); |
| rtx cmp; |
| |
| align = 0; |
| if (GET_CODE (align_rtx) == CONST_INT) |
| align = INTVAL (align_rtx); |
| |
| /* Loop to check 1..3 bytes for null to get an aligned pointer. */ |
| |
| /* Is there a known alignment and is it less than 4? */ |
| if (align < 4) |
| { |
| rtx scratch1 = gen_reg_rtx (Pmode); |
| emit_move_insn (scratch1, out); |
| /* Is there a known alignment and is it not 2? */ |
| if (align != 2) |
| { |
| align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ |
| align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ |
| |
| /* Leave just the 3 lower bits. */ |
| align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), |
| NULL_RTX, 0, OPTAB_WIDEN); |
| |
| emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, |
| Pmode, 1, align_4_label); |
| emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, |
| Pmode, 1, align_2_label); |
| emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, |
| Pmode, 1, align_3_label); |
| } |
| else |
| { |
| /* Since the alignment is 2, we have to check 2 or 0 bytes; |
| check if is aligned to 4 - byte. */ |
| |
| align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, |
| NULL_RTX, 0, OPTAB_WIDEN); |
| |
| emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, |
| Pmode, 1, align_4_label); |
| } |
| |
| mem = change_address (src, QImode, out); |
| |
| /* Now compare the bytes. */ |
| |
| /* Compare the first n unaligned byte on a byte per byte basis. */ |
| emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, |
| QImode, 1, end_0_label); |
| |
| /* Increment the address. */ |
| if (TARGET_64BIT) |
| emit_insn (gen_adddi3 (out, out, const1_rtx)); |
| else |
| emit_insn (gen_addsi3 (out, out, const1_rtx)); |
| |
| /* Not needed with an alignment of 2 */ |
| if (align != 2) |
| { |
| emit_label (align_2_label); |
| |
| emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, |
| end_0_label); |
| |
| if (TARGET_64BIT) |
| emit_insn (gen_adddi3 (out, out, const1_rtx)); |
| else |
| emit_insn (gen_addsi3 (out, out, const1_rtx)); |
| |
| emit_label (align_3_label); |
| } |
| |
| emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, |
| end_0_label); |
| |
| if (TARGET_64BIT) |
| emit_insn (gen_adddi3 (out, out, const1_rtx)); |
| else |
| emit_insn (gen_addsi3 (out, out, const1_rtx)); |
| } |
| |
| /* Generate loop to check 4 bytes at a time. It is not a good idea to |
| align this loop. It gives only huge programs, but does not help to |
| speed up. */ |
| emit_label (align_4_label); |
| |
| mem = change_address (src, SImode, out); |
| emit_move_insn (scratch, mem); |
| if (TARGET_64BIT) |
| emit_insn (gen_adddi3 (out, out, GEN_INT (4))); |
| else |
| emit_insn (gen_addsi3 (out, out, GEN_INT (4))); |
| |
| /* This formula yields a nonzero result iff one of the bytes is zero. |
| This saves three branches inside loop and many cycles. */ |
| |
| emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); |
| emit_insn (gen_one_cmplsi2 (scratch, scratch)); |
| emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); |
| emit_insn (gen_andsi3 (tmpreg, tmpreg, |
| gen_int_mode (0x80808080, SImode))); |
| emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, |
| align_4_label); |
| |
| if (TARGET_CMOVE) |
| { |
| rtx reg = gen_reg_rtx (SImode); |
| rtx reg2 = gen_reg_rtx (Pmode); |
| emit_move_insn (reg, tmpreg); |
| emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); |
| |
| /* If zero is not in the first two bytes, move two bytes forward. */ |
| emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); |
| tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); |
| tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, tmpreg, |
| gen_rtx_IF_THEN_ELSE (SImode, tmp, |
| reg, |
| tmpreg))); |
| /* Emit lea manually to avoid clobbering of flags. */ |
| emit_insn (gen_rtx_SET (SImode, reg2, |
| gen_rtx_PLUS (Pmode, out, const2_rtx))); |
| |
| tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); |
| tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, out, |
| gen_rtx_IF_THEN_ELSE (Pmode, tmp, |
| reg2, |
| out))); |
| |
| } |
| else |
| { |
| rtx end_2_label = gen_label_rtx (); |
| /* Is zero in the first two bytes? */ |
| |
| emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); |
| tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); |
| tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); |
| tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, |
| gen_rtx_LABEL_REF (VOIDmode, end_2_label), |
| pc_rtx); |
| tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); |
| JUMP_LABEL (tmp) = end_2_label; |
| |
| /* Not in the first two. Move two bytes forward. */ |
| emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); |
| if (TARGET_64BIT) |
| emit_insn (gen_adddi3 (out, out, const2_rtx)); |
| else |
| emit_insn (gen_addsi3 (out, out, const2_rtx)); |
| |
| emit_label (end_2_label); |
| |
| } |
| |
| /* Avoid branch in fixing the byte. */ |
| tmpreg = gen_lowpart (QImode, tmpreg); |
| emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); |
| cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); |
| if (TARGET_64BIT) |
| emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); |
| else |
| emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); |
| |
| emit_label (end_0_label); |
| } |
| |
| void |
| ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, |
| rtx callarg2 ATTRIBUTE_UNUSED, |
| rtx pop, int sibcall) |
| { |
| rtx use = NULL, call; |
| |
| if (pop == const0_rtx) |
| pop = NULL; |
| gcc_assert (!TARGET_64BIT || !pop); |
| |
| if (TARGET_MACHO && !TARGET_64BIT) |
| { |
| #if TARGET_MACHO |
| if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) |
| fnaddr = machopic_indirect_call_target (fnaddr); |
| #endif |
| } |
| else |
| { |
| /* Static functions and indirect calls don't need the pic register. */ |
| if (! TARGET_64BIT && flag_pic |
| && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF |
| && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) |
| use_reg (&use, pic_offset_table_rtx); |
| } |
| |
| if (TARGET_64BIT && INTVAL (callarg2) >= 0) |
| { |
| rtx al = gen_rtx_REG (QImode, 0); |
| emit_move_insn (al, callarg2); |
| use_reg (&use, al); |
| } |
| |
| if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) |
| { |
| fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); |
| fnaddr = gen_rtx_MEM (QImode, fnaddr); |
| } |
| if (sibcall && TARGET_64BIT |
| && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) |
| { |
| rtx addr; |
| addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); |
| fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); |
| emit_move_insn (fnaddr, addr); |
| fnaddr = gen_rtx_MEM (QImode, fnaddr); |
| } |
| |
| call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); |
| if (retval) |
| call = gen_rtx_SET (VOIDmode, retval, call); |
| if (pop) |
| { |
| pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); |
| pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); |
| call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); |
| } |
| |
| call = emit_call_insn (call); |
| if (use) |
| CALL_INSN_FUNCTION_USAGE (call) = use; |
| } |
| |
| |
| /* Clear stack slot assignments remembered from previous functions. |
| This is called from INIT_EXPANDERS once before RTL is emitted for each |
| function. */ |
| |
| static struct machine_function * |
| ix86_init_machine_status (void) |
| { |
| struct machine_function *f; |
| |
| f = ggc_alloc_cleared (sizeof (struct machine_function)); |
| f->use_fast_prologue_epilogue_nregs = -1; |
| f->tls_descriptor_call_expanded_p = 0; |
| |
| return f; |
| } |
| |
| /* Return a MEM corresponding to a stack slot with mode MODE. |
| Allocate a new slot if necessary. |
| |
| The RTL for a function can have several slots available: N is |
| which slot to use. */ |
| |
| rtx |
| assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) |
| { |
| struct stack_local_entry *s; |
| |
| gcc_assert (n < MAX_386_STACK_LOCALS); |
| |
| /* Virtual slot is valid only before vregs are instantiated. */ |
| gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); |
| |
| for (s = ix86_stack_locals; s; s = s->next) |
| if (s->mode == mode && s->n == n) |
| return s->rtl; |
| |
| s = (struct stack_local_entry *) |
| ggc_alloc (sizeof (struct stack_local_entry)); |
| s->n = n; |
| s->mode = mode; |
| s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); |
| |
| s->next = ix86_stack_locals; |
| ix86_stack_locals = s; |
| return s->rtl; |
| } |
| |
| /* Construct the SYMBOL_REF for the tls_get_addr function. */ |
| |
| static GTY(()) rtx ix86_tls_symbol; |
| rtx |
| ix86_tls_get_addr (void) |
| { |
| |
| if (!ix86_tls_symbol) |
| { |
| ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, |
| (TARGET_ANY_GNU_TLS |
| && !TARGET_64BIT) |
| ? "___tls_get_addr" |
| : "__tls_get_addr"); |
| } |
| |
| return ix86_tls_symbol; |
| } |
| |
| /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ |
| |
| static GTY(()) rtx ix86_tls_module_base_symbol; |
| rtx |
| ix86_tls_module_base (void) |
| { |
| |
| if (!ix86_tls_module_base_symbol) |
| { |
| ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, |
| "_TLS_MODULE_BASE_"); |
| SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) |
| |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; |
| } |
| |
| return ix86_tls_module_base_symbol; |
| } |
| |
| /* Calculate the length of the memory address in the instruction |
| encoding. Does not include the one-byte modrm, opcode, or prefix. */ |
| |
| int |
| memory_address_length (rtx addr) |
| { |
| struct ix86_address parts; |
| rtx base, index, disp; |
| int len; |
| int ok; |
| |
| if (GET_CODE (addr) == PRE_DEC |
| || GET_CODE (addr) == POST_INC |
| || GET_CODE (addr) == PRE_MODIFY |
| || GET_CODE (addr) == POST_MODIFY) |
| return 0; |
| |
| ok = ix86_decompose_address (addr, &parts); |
| gcc_assert (ok); |
| |
| if (parts.base && GET_CODE (parts.base) == SUBREG) |
| parts.base = SUBREG_REG (parts.base); |
| if (parts.index && GET_CODE (parts.index) == SUBREG) |
| parts.index = SUBREG_REG (parts.index); |
| |
| base = parts.base; |
| index = parts.index; |
| disp = parts.disp; |
| len = 0; |
| |
| /* Rule of thumb: |
| - esp as the base always wants an index, |
| - ebp as the base always wants a displacement. */ |
| |
| /* Register Indirect. */ |
| if (base && !index && !disp) |
| { |
| /* esp (for its index) and ebp (for its displacement) need |
| the two-byte modrm form. */ |
| if (addr == stack_pointer_rtx |
| || addr == arg_pointer_rtx |
| || addr == frame_pointer_rtx |
| || addr == hard_frame_pointer_rtx) |
| len = 1; |
| } |
| |
| /* Direct Addressing. */ |
| else if (disp && !base && !index) |
| len = 4; |
| |
| else |
| { |
| /* Find the length of the displacement constant. */ |
| if (disp) |
| { |
| if (base && satisfies_constraint_K (disp)) |
| len = 1; |
| else |
| len = 4; |
| } |
| /* ebp always wants a displacement. */ |
| else if (base == hard_frame_pointer_rtx) |
| len = 1; |
| |
| /* An index requires the two-byte modrm form.... */ |
| if (index |
| /* ...like esp, which always wants an index. */ |
| || base == stack_pointer_rtx |
| || base == arg_pointer_rtx |
| || base == frame_pointer_rtx) |
| len += 1; |
| } |
| |
| return len; |
| } |
| |
| /* Compute default value for "length_immediate" attribute. When SHORTFORM |
| is set, expect that insn have 8bit immediate alternative. */ |
| int |
| ix86_attr_length_immediate_default (rtx insn, int shortform) |
| { |
| int len = 0; |
| int i; |
| extract_insn_cached (insn); |
| for (i = recog_data.n_operands - 1; i >= 0; --i) |
| if (CONSTANT_P (recog_data.operand[i])) |
| { |
| gcc_assert (!len); |
| if (shortform && satisfies_constraint_K (recog_data.operand[i])) |
| len = 1; |
| else |
| { |
| switch (get_attr_mode (insn)) |
| { |
| case MODE_QI: |
| len+=1; |
| break; |
| case MODE_HI: |
| len+=2; |
| break; |
| case MODE_SI: |
| len+=4; |
| break; |
| /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ |
| case MODE_DI: |
| len+=4; |
| break; |
| default: |
| fatal_insn ("unknown insn mode", insn); |
| } |
| } |
| } |
| return len; |
| } |
| /* Compute default value for "length_address" attribute. */ |
| int |
| ix86_attr_length_address_default (rtx insn) |
| { |
| int i; |
| |
| if (get_attr_type (insn) == TYPE_LEA) |
| { |
| rtx set = PATTERN (insn); |
| |
| if (GET_CODE (set) == PARALLEL) |
| set = XVECEXP (set, 0, 0); |
| |
| gcc_assert (GET_CODE (set) == SET); |
| |
| return memory_address_length (SET_SRC (set)); |
| } |
| |
| extract_insn_cached (insn); |
| for (i = recog_data.n_operands - 1; i >= 0; --i) |
| if (GET_CODE (recog_data.operand[i]) == MEM) |
| { |
| return memory_address_length (XEXP (recog_data.operand[i], 0)); |
| break; |
| } |
| return 0; |
| } |
| |
| /* Return the maximum number of instructions a cpu can issue. */ |
| |
| static int |
| ix86_issue_rate (void) |
| { |
| switch (ix86_tune) |
| { |
| case PROCESSOR_PENTIUM: |
| case PROCESSOR_K6: |
| return 2; |
| |
| case PROCESSOR_PENTIUMPRO: |
| case PROCESSOR_PENTIUM4: |
| case PROCESSOR_ATHLON: |
| case PROCESSOR_K8: |
| case PROCESSOR_NOCONA: |
| case PROCESSOR_GENERIC32: |
| case PROCESSOR_GENERIC64: |
| return 3; |
| /* APPLE LOCAL begin mainline */ |
| case PROCESSOR_CORE2: |
| return 4; |
| /* APPLE LOCAL end mainline */ |
| |
| default: |
| return 1; |
| } |
| } |
| |
| /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set |
| by DEP_INSN and nothing set by DEP_INSN. */ |
| |
| static int |
| ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) |
| { |
| rtx set, set2; |
| |
| /* Simplify the test for uninteresting insns. */ |
| if (insn_type != TYPE_SETCC |
| && insn_type != TYPE_ICMOV |
| && insn_type != TYPE_FCMOV |
| && insn_type != TYPE_IBR) |
| return 0; |
| |
| if ((set = single_set (dep_insn)) != 0) |
| { |
| set = SET_DEST (set); |
| set2 = NULL_RTX; |
| } |
| else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL |
| && XVECLEN (PATTERN (dep_insn), 0) == 2 |
| && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET |
| && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) |
| { |
| set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); |
| set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); |
| } |
| else |
| return 0; |
| |
| if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) |
| return 0; |
| |
| /* This test is true if the dependent insn reads the flags but |
| not any other potentially set register. */ |
| if (!reg_overlap_mentioned_p (set, PATTERN (insn))) |
| return 0; |
| |
| if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory |
| address with operands set by DEP_INSN. */ |
| |
| static int |
| ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) |
| { |
| rtx addr; |
| |
| if (insn_type == TYPE_LEA |
| && TARGET_PENTIUM) |
| { |
| addr = PATTERN (insn); |
| |
| if (GET_CODE (addr) == PARALLEL) |
| addr = XVECEXP (addr, 0, 0); |
| |
| gcc_assert (GET_CODE (addr) == SET); |
| |
| addr = SET_SRC (addr); |
| } |
| else |
| { |
| int i; |
| extract_insn_cached (insn); |
| for (i = recog_data.n_operands - 1; i >= 0; --i) |
| if (GET_CODE (recog_data.operand[i]) == MEM) |
| { |
| addr = XEXP (recog_data.operand[i], 0); |
| goto found; |
| } |
| return 0; |
| found:; |
| } |
| |
| return modified_in_p (addr, dep_insn); |
| } |
| |
| static int |
| ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
| { |
| enum attr_type insn_type, dep_insn_type; |
| enum attr_memory memory; |
| rtx set, set2; |
| int dep_insn_code_number; |
| |
| /* Anti and output dependencies have zero cost on all CPUs. */ |
| if (REG_NOTE_KIND (link) != 0) |
| return 0; |
| |
| dep_insn_code_number = recog_memoized (dep_insn); |
| |
| /* If we can't recognize the insns, we can't really do anything. */ |
| if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) |
| return cost; |
| |
| insn_type = get_attr_type (insn); |
| dep_insn_type = get_attr_type (dep_insn); |
| |
| switch (ix86_tune) |
| { |
| case PROCESSOR_PENTIUM: |
| /* Address Generation Interlock adds a cycle of latency. */ |
| if (ix86_agi_dependent (insn, dep_insn, insn_type)) |
| cost += 1; |
| |
| /* ??? Compares pair with jump/setcc. */ |
| if (ix86_flags_dependent (insn, dep_insn, insn_type)) |
| cost = 0; |
| |
| /* Floating point stores require value to be ready one cycle earlier. */ |
| if (insn_type == TYPE_FMOV |
| && get_attr_memory (insn) == MEMORY_STORE |
| && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
| cost += 1; |
| break; |
| |
| case PROCESSOR_PENTIUMPRO: |
| memory = get_attr_memory (insn); |
| |
| /* INT->FP conversion is expensive. */ |
| if (get_attr_fp_int_src (dep_insn)) |
| cost += 5; |
| |
| /* There is one cycle extra latency between an FP op and a store. */ |
| if (insn_type == TYPE_FMOV |
| && (set = single_set (dep_insn)) != NULL_RTX |
| && (set2 = single_set (insn)) != NULL_RTX |
| && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) |
| && GET_CODE (SET_DEST (set2)) == MEM) |
| cost += 1; |
| |
| /* Show ability of reorder buffer to hide latency of load by executing |
| in parallel with previous instruction in case |
| previous instruction is not needed to compute the address. */ |
| if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
| && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
| { |
| /* Claim moves to take one cycle, as core can issue one load |
| at time and the next load can start cycle later. */ |
| if (dep_insn_type == TYPE_IMOV |
| || dep_insn_type == TYPE_FMOV) |
| cost = 1; |
| else if (cost > 1) |
| cost--; |
| } |
| break; |
| |
| case PROCESSOR_K6: |
| memory = get_attr_memory (insn); |
| |
| /* The esp dependency is resolved before the instruction is really |
| finished. */ |
| if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) |
| && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) |
| return 1; |
| |
| /* INT->FP conversion is expensive. */ |
| if (get_attr_fp_int_src (dep_insn)) |
| cost += 5; |
| |
| /* Show ability of reorder buffer to hide latency of load by executing |
| in parallel with previous instruction in case |
| previous instruction is not needed to compute the address. */ |
| if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
| && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
| { |
| /* Claim moves to take one cycle, as core can issue one load |
| at time and the next load can start cycle later. */ |
| if (dep_insn_type == TYPE_IMOV |
| || dep_insn_type == TYPE_FMOV) |
| cost = 1; |
| else if (cost > 2) |
| cost -= 2; |
| else |
| cost = 1; |
| } |
| break; |
| |
| case PROCESSOR_ATHLON: |
| case PROCESSOR_K8: |
| case PROCESSOR_GENERIC32: |
| case PROCESSOR_GENERIC64: |
| memory = get_attr_memory (insn); |
| |
| /* Show ability of reorder buffer to hide latency of load by executing |
| in parallel with previous instruction in case |
| previous instruction is not needed to compute the address. */ |
| if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
| && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
| { |
| enum attr_unit unit = get_attr_unit (insn); |
| int loadcost = 3; |
| |
| /* Because of the difference between the length of integer and |
| floating unit pipeline preparation stages, the memory operands |
| for floating point are cheaper. |
| |
| ??? For Athlon it the difference is most probably 2. */ |
| if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) |
| loadcost = 3; |
| else |
| loadcost = TARGET_ATHLON ? 2 : 0; |
| |
| if (cost >= loadcost) |
| cost -= loadcost; |
| else |
| cost = 0; |
| } |
| |
| default: |
| break; |
| } |
| |
| return cost; |
| } |
| |
| /* How many alternative schedules to try. This should be as wide as the |
| scheduling freedom in the DFA, but no wider. Making this value too |
| large results extra work for the scheduler. */ |
| |
| static int |
| ia32_multipass_dfa_lookahead (void) |
| { |
| if (ix86_tune == PROCESSOR_PENTIUM) |
| return 2; |
| |
| if (ix86_tune == PROCESSOR_PENTIUMPRO |
| || ix86_tune == PROCESSOR_K6) |
| return 1; |
| |
| else |
| return 0; |
| } |
| |
| |
| /* Compute the alignment given to a constant that is being placed in memory. |
| EXP is the constant and ALIGN is the alignment that the object would |
| ordinarily have. |
| The value of this function is used instead of that alignment to align |
| the object. */ |
| |
| int |
| ix86_constant_alignment (tree exp, int align) |
| { |
| if (TREE_CODE (exp) == REAL_CST) |
| { |
| if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) |
| return 64; |
| else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) |
| return 128; |
| } |
| else if (!optimize_size && TREE_CODE (exp) == STRING_CST |
| && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) |
| return BITS_PER_WORD; |
| |
| /* APPLE LOCAL begin 4090661 */ |
| #if TARGET_MACHO |
| /* Without this, static arrays initialized to strings get aligned |
| to 32 bytes. These go in cstring, so would result in a lot of extra |
| padding in files with a couple of small strings. 4090661. */ |
| else if (TREE_CODE (exp) == STRING_CST) |
| { |
| if (TREE_STRING_LENGTH (exp) >= 31 && !optimize_size) |
| return BITS_PER_WORD; |
| else |
| return 8; |
| } |
| #endif |
| /* APPLE LOCAL end 4090661 */ |
| return align; |
| } |
| |
| /* Compute the alignment for a static variable. |
| TYPE is the data type, and ALIGN is the alignment that |
| the object would ordinarily have. The value of this function is used |
| instead of that alignment to align the object. */ |
| |
| int |
| ix86_data_alignment (tree type, int align) |
| { |
| int max_align = optimize_size ? BITS_PER_WORD : 256; |
| |
| if (AGGREGATE_TYPE_P (type) |
| && TYPE_SIZE (type) |
| && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
| && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align |
| || TREE_INT_CST_HIGH (TYPE_SIZE (type))) |
| && align < max_align) |
| align = max_align; |
| |
| /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
| to 16byte boundary. */ |
| if (TARGET_64BIT) |
| { |
| if (AGGREGATE_TYPE_P (type) |
| && TYPE_SIZE (type) |
| && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
| && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 |
| || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) |
| return 128; |
| } |
| |
| if (TREE_CODE (type) == ARRAY_TYPE) |
| { |
| if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
| return 64; |
| if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
| return 128; |
| } |
| else if (TREE_CODE (type) == COMPLEX_TYPE) |
| { |
| |
| if (TYPE_MODE (type) == DCmode && align < 64) |
| return 64; |
| if (TYPE_MODE (type) == XCmode && align < 128) |
| return 128; |
| } |
| else if ((TREE_CODE (type) == RECORD_TYPE |
| || TREE_CODE (type) == UNION_TYPE |
| || TREE_CODE (type) == QUAL_UNION_TYPE) |
| && TYPE_FIELDS (type)) |
| { |
| if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
| return 64; |
| if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
| return 128; |
| } |
| else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE |
| || TREE_CODE (type) == INTEGER_TYPE) |
| { |
| if (TYPE_MODE (type) == DFmode && align < 64) |
| return 64; |
| if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
| return 128; |
| } |
| |
| return align; |
| } |
| |
| /* Compute the alignment for a local variable. |
| TYPE is the data type, and ALIGN is the alignment that |
| the object would ordinarily have. The value of this macro is used |
| instead of that alignment to align the object. */ |
| |
| int |
| ix86_local_alignment (tree type, int align) |
| { |
| /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
| to 16byte boundary. */ |
| if (TARGET_64BIT) |
| { |
| if (AGGREGATE_TYPE_P (type) |
| && TYPE_SIZE (type) |
| && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
| && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 |
| || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) |
| return 128; |
| } |
| if (TREE_CODE (type) == ARRAY_TYPE) |
| { |
| if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) |
| return 64; |
| if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) |
| return 128; |
| } |
| else if (TREE_CODE (type) == COMPLEX_TYPE) |
| { |
| if (TYPE_MODE (type) == DCmode && align < 64) |
| return 64; |
| if (TYPE_MODE (type) == XCmode && align < 128) |
| return 128; |
| } |
| else if ((TREE_CODE (type) == RECORD_TYPE |
| || TREE_CODE (type) == UNION_TYPE |
| || TREE_CODE (type) == QUAL_UNION_TYPE) |
| && TYPE_FIELDS (type)) |
| { |
| if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) |
| return 64; |
| if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) |
| return 128; |
| } |
| else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE |
| || TREE_CODE (type) == INTEGER_TYPE) |
| { |
| |
| if (TYPE_MODE (type) == DFmode && align < 64) |
| return 64; |
| if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) |
| return 128; |
| } |
| return align; |
| } |
| |
| /* Emit RTL insns to initialize the variable parts of a trampoline. |
| FNADDR is an RTX for the address of the function's pure code. |
| CXT is an RTX for the static chain value for the function. */ |
| void |
| x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) |
| { |
| if (!TARGET_64BIT) |
| { |
| /* Compute offset from the end of the jmp to the target function. */ |
| rtx disp = expand_binop (SImode, sub_optab, fnaddr, |
| plus_constant (tramp, 10), |
| NULL_RTX, 1, OPTAB_DIRECT); |
| emit_move_insn (gen_rtx_MEM (QImode, tramp), |
| gen_int_mode (0xb9, QImode)); |
| emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); |
| emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), |
| gen_int_mode (0xe9, QImode)); |
| emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); |
| } |
| else |
| { |
| int offset = 0; |
| /* Try to load address using shorter movl instead of movabs. |
| We may want to support movq for kernel mode, but kernel does not use |
| trampolines at the moment. */ |
| if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) |
| { |
| fnaddr = copy_to_mode_reg (DImode, fnaddr); |
| emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
| gen_int_mode (0xbb41, HImode)); |
| emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), |
| gen_lowpart (SImode, fnaddr)); |
| offset += 6; |
| } |
| else |
| { |
| emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
| gen_int_mode (0xbb49, HImode)); |
| emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), |
| fnaddr); |
| offset += 10; |
| } |
| /* Load static chain using movabs to r10. */ |
| emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
| gen_int_mode (0xba49, HImode)); |
| emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), |
| cxt); |
| offset += 10; |
| /* Jump to the r11 */ |
| emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
| gen_int_mode (0xff49, HImode)); |
| emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), |
| gen_int_mode (0xe3, QImode)); |
| offset += 3; |
| gcc_assert (offset <= TRAMPOLINE_SIZE); |
| } |
| |
| #ifdef ENABLE_EXECUTE_STACK |
| emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), |
| LCT_NORMAL, VOIDmode, 1, tramp, Pmode); |
| #endif |
| } |
| |
| /* LLVM LOCAL begin */ |
| /* Move ix86_builtins to i386.h */ |
| /* LLVM LOCAL end */ |
| #define def_builtin(MASK, NAME, TYPE, CODE) \ |
| do { \ |
| if ((MASK) & target_flags \ |
| && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ |
| lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ |
| NULL, NULL_TREE); \ |
| } while (0) |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Like def_builtin, but also marks the function decl "const". */ |
| |
| static inline tree |
| def_builtin_const (int mask, const char *name, tree type, |
| enum ix86_builtins code) |
| { |
| tree decl = NULL_TREE; |
| if ((mask) & target_flags |
| && (!((mask) & MASK_64BIT) || TARGET_64BIT)) |
| decl = lang_hooks.builtin_function (name, type, code, BUILT_IN_MD, |
| NULL, NULL_TREE); |
| |
| if (decl) |
| TREE_READONLY (decl) = 1; |
| return decl; |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* Bits for builtin_description.flag. */ |
| |
| /* Set when we don't support the comparison natively, and should |
| swap_comparison in order to support it. */ |
| #define BUILTIN_DESC_SWAP_OPERANDS 1 |
| |
| struct builtin_description |
| { |
| const unsigned int mask; |
| const enum insn_code icode; |
| const char *const name; |
| const enum ix86_builtins code; |
| const enum rtx_code comparison; |
| const unsigned int flag; |
| }; |
| |
| /* APPLE LOCAL begin 4299257 */ |
| static const struct builtin_description bdesc_comi[] = |
| { |
| { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, |
| { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, |
| }; |
| static const struct builtin_description bdesc_ucomi[] = |
| { |
| { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, |
| { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, |
| }; |
| /* APPLE LOCAL end 4299257 */ |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| static const struct builtin_description bdesc_ptest[] = |
| { |
| /* SSE4.1 */ |
| { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 }, |
| }; |
| |
| static const struct builtin_description bdesc_pcmpestr[] = |
| { |
| /* SSE4.2 */ |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode }, |
| }; |
| |
| static const struct builtin_description bdesc_pcmpistr[] = |
| { |
| /* SSE4.2 */ |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, |
| }; |
| |
| static const struct builtin_description bdesc_crc32[] = |
| { |
| /* SSE4.2 */ |
| { MASK_SSE4_2 | MASK_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 }, |
| { MASK_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 }, |
| }; |
| |
| /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */ |
| static const struct builtin_description bdesc_sse_3arg[] = |
| { |
| /* SSE4.1 */ |
| { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 }, |
| }; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| static const struct builtin_description bdesc_2arg[] = |
| { |
| /* SSE */ |
| { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, |
| |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, |
| |
| { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, |
| |
| { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, |
| |
| { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, |
| |
| /* MMX */ |
| { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| { MASK_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, |
| |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, |
| |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, |
| |
| /* Special. */ |
| { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, |
| |
| { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, |
| { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashlv4hi2si, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashlv2si2si, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashlv1di3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashlv1di2si, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_lshrv4hi2si, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_lshrv2si2si, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_lshrv1di3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_lshrv1di2si, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashrv4hi2si, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_ashrv2si2si, 0, IX86_BUILTIN_PSRADI, 0, 0 }, |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, |
| { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, |
| |
| /* SSE2 */ |
| { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, |
| BUILTIN_DESC_SWAP_OPERANDS }, |
| { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, |
| |
| /* SSE2 MMX */ |
| { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, |
| |
| { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, |
| { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, |
| |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, |
| { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, |
| |
| /* SSE3 MMX */ |
| { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, |
| { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, |
| { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, |
| { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, |
| { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, |
| /* APPLE LOCAL begin mainline */ |
| { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }, |
| |
| /* SSSE3 MMX */ |
| { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 }, |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }, |
| /* APPLE LOCAL end mainline */ |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* SSE4.1 */ |
| { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 }, |
| |
| /* SSE4.2 */ |
| { MASK_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 }, |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| }; |
| |
| static const struct builtin_description bdesc_1arg[] = |
| { |
| { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, |
| |
| { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, |
| |
| { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, |
| { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, |
| { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, |
| { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, |
| { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, |
| { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, |
| |
| { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, |
| { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, |
| |
| /* SSE3 */ |
| { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, |
| { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, |
| /* APPLE LOCAL begin mainline */ |
| |
| /* SSSE3 */ |
| { MASK_SSSE3, CODE_FOR_ssse3_pabsv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pabsv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, |
| { MASK_SSSE3, CODE_FOR_ssse3_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, |
| /* APPLE LOCAL end mainline */ |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* SSE4.1 */ |
| { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 }, |
| |
| /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */ |
| { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 }, |
| { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 }, |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| }; |
| |
| static void |
| ix86_init_builtins (void) |
| { |
| if (TARGET_MMX) |
| ix86_init_mmx_sse_builtins (); |
| |
| /* APPLE LOCAL begin constant cfstrings */ |
| #ifdef SUBTARGET_INIT_BUILTINS |
| SUBTARGET_INIT_BUILTINS; |
| #endif |
| /* APPLE LOCAL end constant cfstrings */ |
| } |
| |
| /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX |
| is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX |
| builtins. */ |
| static void |
| ix86_init_mmx_sse_builtins (void) |
| { |
| const struct builtin_description * d; |
| size_t i; |
| |
| tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode); |
| tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); |
| tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); |
| tree V2DI_type_node |
| = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); |
| tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); |
| tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); |
| tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); |
| tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); |
| tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); |
| tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); |
| /* APPLE LOCAL 4656532 use V1DImode for _m64 */ |
| tree V1DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); |
| |
| tree pchar_type_node = build_pointer_type (char_type_node); |
| tree pcchar_type_node = build_pointer_type ( |
| build_type_variant (char_type_node, 1, 0)); |
| tree pfloat_type_node = build_pointer_type (float_type_node); |
| tree pcfloat_type_node = build_pointer_type ( |
| build_type_variant (float_type_node, 1, 0)); |
| tree pv2si_type_node = build_pointer_type (V2SI_type_node); |
| tree pv2di_type_node = build_pointer_type (V2DI_type_node); |
| /* APPLE LOCAL 4656532 use V1DImode for _m64 */ |
| tree pv1di_type_node = build_pointer_type (V1DI_type_node); |
| |
| /* Comparisons. */ |
| tree int_ftype_v4sf_v4sf |
| = build_function_type_list (integer_type_node, |
| V4SF_type_node, V4SF_type_node, NULL_TREE); |
| tree v4si_ftype_v4sf_v4sf |
| = build_function_type_list (V4SI_type_node, |
| V4SF_type_node, V4SF_type_node, NULL_TREE); |
| /* MMX/SSE/integer conversions. */ |
| tree int_ftype_v4sf |
| = build_function_type_list (integer_type_node, |
| V4SF_type_node, NULL_TREE); |
| tree int64_ftype_v4sf |
| = build_function_type_list (long_long_integer_type_node, |
| V4SF_type_node, NULL_TREE); |
| tree int_ftype_v8qi |
| = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); |
| tree v4sf_ftype_v4sf_int |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, integer_type_node, NULL_TREE); |
| tree v4sf_ftype_v4sf_int64 |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, long_long_integer_type_node, |
| NULL_TREE); |
| tree v4sf_ftype_v4sf_v2si |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, V2SI_type_node, NULL_TREE); |
| |
| /* Miscellaneous. */ |
| tree v8qi_ftype_v4hi_v4hi |
| = build_function_type_list (V8QI_type_node, |
| V4HI_type_node, V4HI_type_node, NULL_TREE); |
| tree v4hi_ftype_v2si_v2si |
| = build_function_type_list (V4HI_type_node, |
| V2SI_type_node, V2SI_type_node, NULL_TREE); |
| tree v4sf_ftype_v4sf_v4sf_int |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, V4SF_type_node, |
| integer_type_node, NULL_TREE); |
| tree v2si_ftype_v4hi_v4hi |
| = build_function_type_list (V2SI_type_node, |
| V4HI_type_node, V4HI_type_node, NULL_TREE); |
| tree v4hi_ftype_v4hi_int |
| = build_function_type_list (V4HI_type_node, |
| V4HI_type_node, integer_type_node, NULL_TREE); |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| tree v4hi_ftype_v4hi_v1di |
| = build_function_type_list (V4HI_type_node, |
| V4HI_type_node, V1DI_type_node, |
| NULL_TREE); |
| tree v2si_ftype_v2si_int |
| = build_function_type_list (V2SI_type_node, |
| V2SI_type_node, integer_type_node, NULL_TREE); |
| tree v2si_ftype_v2si_v1di |
| = build_function_type_list (V2SI_type_node, |
| V2SI_type_node, V1DI_type_node, NULL_TREE); |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| |
| tree void_ftype_void |
| = build_function_type (void_type_node, void_list_node); |
| tree void_ftype_unsigned |
| = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); |
| tree void_ftype_unsigned_unsigned |
| = build_function_type_list (void_type_node, unsigned_type_node, |
| unsigned_type_node, NULL_TREE); |
| tree void_ftype_pcvoid_unsigned_unsigned |
| = build_function_type_list (void_type_node, const_ptr_type_node, |
| unsigned_type_node, unsigned_type_node, |
| NULL_TREE); |
| tree unsigned_ftype_void |
| = build_function_type (unsigned_type_node, void_list_node); |
| tree v2si_ftype_v4sf |
| = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); |
| /* Loads/stores. */ |
| tree void_ftype_v8qi_v8qi_pchar |
| = build_function_type_list (void_type_node, |
| V8QI_type_node, V8QI_type_node, |
| pchar_type_node, NULL_TREE); |
| tree v4sf_ftype_pcfloat |
| = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); |
| /* @@@ the type is bogus */ |
| tree v4sf_ftype_v4sf_pv2si |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, pv2si_type_node, NULL_TREE); |
| tree void_ftype_pv2si_v4sf |
| = build_function_type_list (void_type_node, |
| pv2si_type_node, V4SF_type_node, NULL_TREE); |
| tree void_ftype_pfloat_v4sf |
| = build_function_type_list (void_type_node, |
| pfloat_type_node, V4SF_type_node, NULL_TREE); |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| tree void_ftype_pv1di_v1di |
| = build_function_type_list (void_type_node, |
| pv1di_type_node, V1DI_type_node, NULL_TREE); |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| tree void_ftype_pv2di_v2di |
| = build_function_type_list (void_type_node, |
| pv2di_type_node, V2DI_type_node, NULL_TREE); |
| /* Normal vector unops. */ |
| tree v4sf_ftype_v4sf |
| = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); |
| /* APPLE LOCAL begin mainline */ |
| tree v16qi_ftype_v16qi |
| = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); |
| tree v8hi_ftype_v8hi |
| = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); |
| tree v4si_ftype_v4si |
| = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); |
| tree v8qi_ftype_v8qi |
| = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); |
| tree v4hi_ftype_v4hi |
| = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); |
| /* APPLE LOCAL end mainline */ |
| |
| /* Normal vector binops. */ |
| tree v4sf_ftype_v4sf_v4sf |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, V4SF_type_node, NULL_TREE); |
| tree v8qi_ftype_v8qi_v8qi |
| = build_function_type_list (V8QI_type_node, |
| V8QI_type_node, V8QI_type_node, NULL_TREE); |
| tree v4hi_ftype_v4hi_v4hi |
| = build_function_type_list (V4HI_type_node, |
| V4HI_type_node, V4HI_type_node, NULL_TREE); |
| tree v2si_ftype_v2si_v2si |
| = build_function_type_list (V2SI_type_node, |
| V2SI_type_node, V2SI_type_node, NULL_TREE); |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| tree v1di_ftype_v1di_v1di |
| = build_function_type_list (V1DI_type_node, |
| V1DI_type_node, V1DI_type_node, NULL_TREE); |
| /* APPLE LOCAL begin 4684674 */ |
| tree v1di_ftype_v1di_int |
| = build_function_type_list (V1DI_type_node, |
| V1DI_type_node, integer_type_node, NULL_TREE); |
| /* APPLE LOCAL end 4684674 */ |
| /* APPLE LOCAL begin 4656532 */ |
| tree v1di_ftype_v1di_v1di_int |
| = build_function_type_list (V1DI_type_node, |
| V1DI_type_node, |
| V1DI_type_node, |
| integer_type_node, NULL_TREE); |
| /* APPLE LOCAL end 4656532 */ |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| |
| tree v2si_ftype_v2sf |
| = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); |
| tree v2sf_ftype_v2si |
| = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); |
| tree v2si_ftype_v2si |
| = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); |
| tree v2sf_ftype_v2sf |
| = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); |
| tree v2sf_ftype_v2sf_v2sf |
| = build_function_type_list (V2SF_type_node, |
| V2SF_type_node, V2SF_type_node, NULL_TREE); |
| tree v2si_ftype_v2sf_v2sf |
| = build_function_type_list (V2SI_type_node, |
| V2SF_type_node, V2SF_type_node, NULL_TREE); |
| tree pint_type_node = build_pointer_type (integer_type_node); |
| tree pdouble_type_node = build_pointer_type (double_type_node); |
| tree pcdouble_type_node = build_pointer_type ( |
| build_type_variant (double_type_node, 1, 0)); |
| tree int_ftype_v2df_v2df |
| = build_function_type_list (integer_type_node, |
| V2DF_type_node, V2DF_type_node, NULL_TREE); |
| |
| tree void_ftype_pcvoid |
| = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); |
| tree v4sf_ftype_v4si |
| = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); |
| tree v4si_ftype_v4sf |
| = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); |
| tree v2df_ftype_v4si |
| = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); |
| tree v4si_ftype_v2df |
| = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); |
| tree v2si_ftype_v2df |
| = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); |
| tree v4sf_ftype_v2df |
| = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); |
| tree v2df_ftype_v2si |
| = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); |
| tree v2df_ftype_v4sf |
| = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); |
| tree int_ftype_v2df |
| = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); |
| tree int64_ftype_v2df |
| = build_function_type_list (long_long_integer_type_node, |
| V2DF_type_node, NULL_TREE); |
| tree v2df_ftype_v2df_int |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, integer_type_node, NULL_TREE); |
| tree v2df_ftype_v2df_int64 |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, long_long_integer_type_node, |
| NULL_TREE); |
| tree v4sf_ftype_v4sf_v2df |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, V2DF_type_node, NULL_TREE); |
| tree v2df_ftype_v2df_v4sf |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, V4SF_type_node, NULL_TREE); |
| tree v2df_ftype_v2df_v2df_int |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, V2DF_type_node, |
| integer_type_node, |
| NULL_TREE); |
| tree v2df_ftype_v2df_pcdouble |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, pcdouble_type_node, NULL_TREE); |
| tree void_ftype_pdouble_v2df |
| = build_function_type_list (void_type_node, |
| pdouble_type_node, V2DF_type_node, NULL_TREE); |
| tree void_ftype_pint_int |
| = build_function_type_list (void_type_node, |
| pint_type_node, integer_type_node, NULL_TREE); |
| tree void_ftype_v16qi_v16qi_pchar |
| = build_function_type_list (void_type_node, |
| V16QI_type_node, V16QI_type_node, |
| pchar_type_node, NULL_TREE); |
| tree v2df_ftype_pcdouble |
| = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); |
| tree v2df_ftype_v2df_v2df |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, V2DF_type_node, NULL_TREE); |
| tree v16qi_ftype_v16qi_v16qi |
| = build_function_type_list (V16QI_type_node, |
| V16QI_type_node, V16QI_type_node, NULL_TREE); |
| tree v8hi_ftype_v8hi_v8hi |
| = build_function_type_list (V8HI_type_node, |
| V8HI_type_node, V8HI_type_node, NULL_TREE); |
| tree v4si_ftype_v4si_v4si |
| = build_function_type_list (V4SI_type_node, |
| V4SI_type_node, V4SI_type_node, NULL_TREE); |
| tree v2di_ftype_v2di_v2di |
| = build_function_type_list (V2DI_type_node, |
| V2DI_type_node, V2DI_type_node, NULL_TREE); |
| tree v2di_ftype_v2df_v2df |
| = build_function_type_list (V2DI_type_node, |
| V2DF_type_node, V2DF_type_node, NULL_TREE); |
| tree v2df_ftype_v2df |
| = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); |
| tree v2di_ftype_v2di_int |
| = build_function_type_list (V2DI_type_node, |
| V2DI_type_node, integer_type_node, NULL_TREE); |
| /* APPLE LOCAL begin mainline */ |
| tree v2di_ftype_v2di_v2di_int |
| = build_function_type_list (V2DI_type_node, V2DI_type_node, |
| V2DI_type_node, integer_type_node, NULL_TREE); |
| /* APPLE LOCAL end mainline */ |
| tree v4si_ftype_v4si_int |
| = build_function_type_list (V4SI_type_node, |
| V4SI_type_node, integer_type_node, NULL_TREE); |
| tree v8hi_ftype_v8hi_int |
| = build_function_type_list (V8HI_type_node, |
| V8HI_type_node, integer_type_node, NULL_TREE); |
| tree v4si_ftype_v8hi_v8hi |
| = build_function_type_list (V4SI_type_node, |
| V8HI_type_node, V8HI_type_node, NULL_TREE); |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| tree v1di_ftype_v8qi_v8qi |
| = build_function_type_list (V1DI_type_node, |
| V8QI_type_node, V8QI_type_node, NULL_TREE); |
| tree v1di_ftype_v2si_v2si |
| = build_function_type_list (V1DI_type_node, |
| V2SI_type_node, V2SI_type_node, NULL_TREE); |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| tree v2di_ftype_v16qi_v16qi |
| = build_function_type_list (V2DI_type_node, |
| V16QI_type_node, V16QI_type_node, NULL_TREE); |
| tree v2di_ftype_v4si_v4si |
| = build_function_type_list (V2DI_type_node, |
| V4SI_type_node, V4SI_type_node, NULL_TREE); |
| tree int_ftype_v16qi |
| = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); |
| tree v16qi_ftype_pcchar |
| = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); |
| tree void_ftype_pchar_v16qi |
| = build_function_type_list (void_type_node, |
| pchar_type_node, V16QI_type_node, NULL_TREE); |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| tree v2di_ftype_v2di_unsigned_unsigned |
| = build_function_type_list (V2DI_type_node, V2DI_type_node, |
| unsigned_type_node, unsigned_type_node, |
| NULL_TREE); |
| tree v2di_ftype_v2di_v2di_unsigned_unsigned |
| = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, |
| unsigned_type_node, unsigned_type_node, |
| NULL_TREE); |
| tree v2di_ftype_v2di_v16qi |
| = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, |
| NULL_TREE); |
| tree v2df_ftype_v2df_v2df_v2df |
| = build_function_type_list (V2DF_type_node, |
| V2DF_type_node, V2DF_type_node, |
| V2DF_type_node, NULL_TREE); |
| tree v4sf_ftype_v4sf_v4sf_v4sf |
| = build_function_type_list (V4SF_type_node, |
| V4SF_type_node, V4SF_type_node, |
| V4SF_type_node, NULL_TREE); |
| tree v8hi_ftype_v16qi |
| = build_function_type_list (V8HI_type_node, V16QI_type_node, |
| NULL_TREE); |
| tree v4si_ftype_v16qi |
| = build_function_type_list (V4SI_type_node, V16QI_type_node, |
| NULL_TREE); |
| tree v2di_ftype_v16qi |
| = build_function_type_list (V2DI_type_node, V16QI_type_node, |
| NULL_TREE); |
| tree v4si_ftype_v8hi |
| = build_function_type_list (V4SI_type_node, V8HI_type_node, |
| NULL_TREE); |
| tree v2di_ftype_v8hi |
| = build_function_type_list (V2DI_type_node, V8HI_type_node, |
| NULL_TREE); |
| tree v2di_ftype_v4si |
| = build_function_type_list (V2DI_type_node, V4SI_type_node, |
| NULL_TREE); |
| tree v2di_ftype_pv2di |
| = build_function_type_list (V2DI_type_node, pv2di_type_node, |
| NULL_TREE); |
| tree v16qi_ftype_v16qi_v16qi_int |
| = build_function_type_list (V16QI_type_node, V16QI_type_node, |
| V16QI_type_node, integer_type_node, |
| NULL_TREE); |
| tree v16qi_ftype_v16qi_v16qi_v16qi |
| = build_function_type_list (V16QI_type_node, V16QI_type_node, |
| V16QI_type_node, V16QI_type_node, |
| NULL_TREE); |
| tree v8hi_ftype_v8hi_v8hi_int |
| = build_function_type_list (V8HI_type_node, V8HI_type_node, |
| V8HI_type_node, integer_type_node, |
| NULL_TREE); |
| tree v4si_ftype_v4si_v4si_int |
| = build_function_type_list (V4SI_type_node, V4SI_type_node, |
| V4SI_type_node, integer_type_node, |
| NULL_TREE); |
| tree int_ftype_v2di_v2di |
| = build_function_type_list (integer_type_node, |
| V2DI_type_node, V2DI_type_node, |
| NULL_TREE); |
| tree int_ftype_v16qi_int_v16qi_int_int |
| = build_function_type_list (integer_type_node, |
| V16QI_type_node, |
| integer_type_node, |
| V16QI_type_node, |
| integer_type_node, |
| integer_type_node, |
| NULL_TREE); |
| tree v16qi_ftype_v16qi_int_v16qi_int_int |
| = build_function_type_list (V16QI_type_node, |
| V16QI_type_node, |
| integer_type_node, |
| V16QI_type_node, |
| integer_type_node, |
| integer_type_node, |
| NULL_TREE); |
| tree int_ftype_v16qi_v16qi_int |
| = build_function_type_list (integer_type_node, |
| V16QI_type_node, |
| V16QI_type_node, |
| integer_type_node, |
| NULL_TREE); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| tree float80_type; |
| tree float128_type; |
| tree ftype; |
| |
| /* LLVM LOCAL begin */ |
| #ifdef ENABLE_LLVM |
| /* LLVM doesn't initialize the RTL backend, so build_vector_type will assign |
| all of these types BLKmode. This interferes with i386.c-specific |
| argument passing routines. As such, give them the correct modes here |
| manually. */ |
| TYPE_MODE (V16QI_type_node) = V16QImode; |
| TYPE_MODE (V2SI_type_node) = V2SImode; |
| TYPE_MODE (V2SF_type_node) = V2SFmode; |
| TYPE_MODE (V2DI_type_node) = V2DImode; |
| TYPE_MODE (V2DF_type_node) = V2DFmode; |
| TYPE_MODE (V4SF_type_node) = V4SFmode; |
| TYPE_MODE (V4SI_type_node) = V4SImode; |
| TYPE_MODE (V4HI_type_node) = V4HImode; |
| TYPE_MODE (V8QI_type_node) = V8QImode; |
| TYPE_MODE (V8HI_type_node) = V8HImode; |
| TYPE_MODE (V1DI_type_node) = V1DImode; |
| #endif |
| /* LLVM LOCAL end */ |
| |
| /* The __float80 type. */ |
| if (TYPE_MODE (long_double_type_node) == XFmode) |
| (*lang_hooks.types.register_builtin_type) (long_double_type_node, |
| "__float80"); |
| else |
| { |
| /* The __float80 type. */ |
| float80_type = make_node (REAL_TYPE); |
| TYPE_PRECISION (float80_type) = 80; |
| layout_type (float80_type); |
| (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); |
| } |
| |
| if (TARGET_64BIT) |
| { |
| float128_type = make_node (REAL_TYPE); |
| TYPE_PRECISION (float128_type) = 128; |
| layout_type (float128_type); |
| (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Add all SSE builtins that are more or less simple operations on |
| three operands. */ |
| for (i = 0, d = bdesc_sse_3arg; |
| i < ARRAY_SIZE (bdesc_sse_3arg); |
| i++, d++) |
| { |
| /* Use one of the operands; the target can have a different mode for |
| mask-generating compares. */ |
| enum machine_mode mode; |
| tree type; |
| |
| if (d->name == 0) |
| continue; |
| mode = insn_data[d->icode].operand[1].mode; |
| |
| switch (mode) |
| { |
| case V16QImode: |
| type = v16qi_ftype_v16qi_v16qi_int; |
| break; |
| case V8HImode: |
| type = v8hi_ftype_v8hi_v8hi_int; |
| break; |
| case V4SImode: |
| type = v4si_ftype_v4si_v4si_int; |
| break; |
| case V2DImode: |
| type = v2di_ftype_v2di_v2di_int; |
| break; |
| case V2DFmode: |
| type = v2df_ftype_v2df_v2df_int; |
| break; |
| case V4SFmode: |
| type = v4sf_ftype_v4sf_v4sf_int; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Override for variable blends. */ |
| switch (d->icode) |
| { |
| case CODE_FOR_sse4_1_blendvpd: |
| type = v2df_ftype_v2df_v2df_v2df; |
| break; |
| case CODE_FOR_sse4_1_blendvps: |
| type = v4sf_ftype_v4sf_v4sf_v4sf; |
| break; |
| case CODE_FOR_sse4_1_pblendvb: |
| type = v16qi_ftype_v16qi_v16qi_v16qi; |
| break; |
| default: |
| break; |
| } |
| |
| def_builtin (d->mask, d->name, type, d->code); |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* Add all builtins that are more or less simple operations on two |
| operands. */ |
| for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) |
| { |
| /* Use one of the operands; the target can have a different mode for |
| mask-generating compares. */ |
| enum machine_mode mode; |
| tree type; |
| |
| if (d->name == 0) |
| continue; |
| mode = insn_data[d->icode].operand[1].mode; |
| |
| switch (mode) |
| { |
| case V16QImode: |
| type = v16qi_ftype_v16qi_v16qi; |
| break; |
| case V8HImode: |
| type = v8hi_ftype_v8hi_v8hi; |
| break; |
| case V4SImode: |
| type = v4si_ftype_v4si_v4si; |
| break; |
| case V2DImode: |
| type = v2di_ftype_v2di_v2di; |
| break; |
| case V2DFmode: |
| type = v2df_ftype_v2df_v2df; |
| break; |
| case V4SFmode: |
| type = v4sf_ftype_v4sf_v4sf; |
| break; |
| case V8QImode: |
| type = v8qi_ftype_v8qi_v8qi; |
| break; |
| case V4HImode: |
| type = v4hi_ftype_v4hi_v4hi; |
| break; |
| case V2SImode: |
| type = v2si_ftype_v2si_v2si; |
| break; |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| case V1DImode: |
| type = v1di_ftype_v1di_v1di; |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Override for comparisons. */ |
| if (d->icode == CODE_FOR_sse_maskcmpv4sf3 |
| || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) |
| type = v4si_ftype_v4sf_v4sf; |
| |
| if (d->icode == CODE_FOR_sse2_maskcmpv2df3 |
| || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) |
| type = v2di_ftype_v2df_v2df; |
| |
| def_builtin (d->mask, d->name, type, d->code); |
| } |
| /* APPLE LOCAL begin mainline */ |
| /* Add all builtins that are more or less simple operations on 1 operand. */ |
| for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) |
| { |
| enum machine_mode mode; |
| tree type; |
| |
| if (d->name == 0) |
| continue; |
| mode = insn_data[d->icode].operand[1].mode; |
| |
| switch (mode) |
| { |
| case V16QImode: |
| type = v16qi_ftype_v16qi; |
| break; |
| case V8HImode: |
| type = v8hi_ftype_v8hi; |
| break; |
| case V4SImode: |
| type = v4si_ftype_v4si; |
| break; |
| case V2DFmode: |
| type = v2df_ftype_v2df; |
| break; |
| case V4SFmode: |
| type = v4sf_ftype_v4sf; |
| break; |
| case V8QImode: |
| type = v8qi_ftype_v8qi; |
| break; |
| case V4HImode: |
| type = v4hi_ftype_v4hi; |
| break; |
| case V2SImode: |
| type = v2si_ftype_v2si; |
| break; |
| |
| default: |
| abort (); |
| } |
| |
| def_builtin (d->mask, d->name, type, d->code); |
| } |
| /* APPLE LOCAL end mainline */ |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* pcmpestr[im] insns. */ |
| for (i = 0, d = bdesc_pcmpestr; |
| i < ARRAY_SIZE (bdesc_pcmpestr); |
| i++, d++) |
| { |
| if (d->code == IX86_BUILTIN_PCMPESTRM128) |
| ftype = v16qi_ftype_v16qi_int_v16qi_int_int; |
| else |
| ftype = int_ftype_v16qi_int_v16qi_int_int; |
| def_builtin (d->mask, d->name, ftype, d->code); |
| } |
| |
| /* pcmpistr[im] insns. */ |
| for (i = 0, d = bdesc_pcmpistr; |
| i < ARRAY_SIZE (bdesc_pcmpistr); |
| i++, d++) |
| { |
| if (d->code == IX86_BUILTIN_PCMPISTRM128) |
| ftype = v16qi_ftype_v16qi_v16qi_int; |
| else |
| ftype = int_ftype_v16qi_v16qi_int; |
| def_builtin (d->mask, d->name, ftype, d->code); |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| /* Add the remaining MMX insns with somewhat more complicated types. */ |
| def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSLLW); |
| def_builtin (MASK_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI); |
| def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSLLD); |
| def_builtin (MASK_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI); |
| def_builtin (MASK_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ); |
| def_builtin (MASK_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI); |
| |
| def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRLW); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRLD); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI); |
| |
| def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRAW); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI); |
| def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRAD); |
| def_builtin (MASK_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI); |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); |
| def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); |
| |
| /* APPLE LOCAL 4299257 */ |
| /* comi insns. */ |
| for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) |
| if (d->mask == MASK_SSE2) |
| def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); |
| else |
| def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); |
| |
| /* APPLE LOCAL begin 4299257 */ |
| /* ucomi insns. */ |
| for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++) |
| if (d->mask == MASK_SSE2) |
| def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); |
| else |
| def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); |
| /* APPLE LOCAL end 4299257 */ |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* ptest insns. */ |
| for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) |
| def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); |
| def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); |
| def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); |
| |
| def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); |
| def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); |
| def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); |
| def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); |
| def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); |
| def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); |
| def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); |
| def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); |
| def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); |
| def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); |
| def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); |
| |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); |
| |
| def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); |
| |
| def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); |
| |
| def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); |
| def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pv1di_v1di, IX86_BUILTIN_MOVNTQ); |
| |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); |
| |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| |
| def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); |
| def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); |
| def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); |
| def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); |
| |
| def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); |
| |
| /* Original 3DNow! */ |
| def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); |
| def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); |
| |
| /* 3DNow! extension as used in the Athlon CPU. */ |
| def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); |
| def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); |
| def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); |
| def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); |
| def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); |
| def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); |
| |
| /* SSE2 */ |
| def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); |
| def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); |
| def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); |
| def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); |
| def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); |
| def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); |
| def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); |
| def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); |
| def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); |
| |
| /* APPLE LOCAL 4656532 use V1DImode for _m64 */ |
| def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); |
| def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); |
| /* APPLE LOCAL 5919583 */ |
| def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128_BYTESHIFT); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); |
| /* APPLE LOCAL 5919583 */ |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128_BYTESHIFT); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); |
| def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); |
| |
| def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); |
| |
| /* Prescott New Instructions. */ |
| def_builtin (MASK_SSE3, "__builtin_ia32_monitor", |
| void_ftype_pcvoid_unsigned_unsigned, |
| IX86_BUILTIN_MONITOR); |
| def_builtin (MASK_SSE3, "__builtin_ia32_mwait", |
| void_ftype_unsigned_unsigned, |
| IX86_BUILTIN_MWAIT); |
| def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", |
| v4sf_ftype_v4sf, |
| IX86_BUILTIN_MOVSHDUP); |
| def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", |
| v4sf_ftype_v4sf, |
| IX86_BUILTIN_MOVSLDUP); |
| def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", |
| v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); |
| |
| /* APPLE LOCAL begin 4099020 */ |
| ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); |
| def_builtin (MASK_SSE, "__builtin_ia32_movqv4si", ftype, IX86_BUILTIN_MOVQ); |
| ftype = build_function_type_list (V4SI_type_node, pv2si_type_node, NULL_TREE); |
| def_builtin (MASK_SSE, "__builtin_ia32_loadlv4si", ftype, IX86_BUILTIN_LOADQ); |
| ftype = build_function_type_list (void_type_node, pv2si_type_node, V4SI_type_node, NULL_TREE); |
| def_builtin (MASK_SSE, "__builtin_ia32_storelv4si", ftype, IX86_BUILTIN_STOREQ); |
| /* APPLE LOCAL end 4099020 */ |
| |
| /* APPLE LOCAL begin 4656532 */ |
| /* Merom New Instructions. */ |
| def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", |
| v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); |
| /* LLVM LOCAL - This is an MMX builtin */ |
| def_builtin (MASK_MMX, "__builtin_ia32_palignr", v1di_ftype_v1di_v1di_int, |
| IX86_BUILTIN_PALIGNR); |
| |
| /* APPLE LOCAL end 4656532 */ |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* SSE4.1. */ |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128); |
| def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD); |
| def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS); |
| def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD); |
| def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS); |
| |
| /* SSE4.2. */ |
| ftype = build_function_type_list (unsigned_type_node, |
| unsigned_type_node, |
| unsigned_char_type_node, |
| NULL_TREE); |
| def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI); |
| ftype = build_function_type_list (unsigned_type_node, |
| unsigned_type_node, |
| short_unsigned_type_node, |
| NULL_TREE); |
| def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI); |
| ftype = build_function_type_list (unsigned_type_node, |
| unsigned_type_node, |
| unsigned_type_node, |
| NULL_TREE); |
| def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI); |
| ftype = build_function_type_list (long_long_unsigned_type_node, |
| long_long_unsigned_type_node, |
| long_long_unsigned_type_node, |
| NULL_TREE); |
| def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI); |
| |
| /* AMDFAM10 SSE4A New built-ins */ |
| def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); |
| def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); |
| def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI); |
| def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ); |
| def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI); |
| def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| /* Access to the vec_init patterns. */ |
| ftype = build_function_type_list (V2SI_type_node, integer_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", |
| ftype, IX86_BUILTIN_VEC_INIT_V2SI); |
| |
| ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, |
| short_integer_type_node, |
| short_integer_type_node, |
| short_integer_type_node, NULL_TREE); |
| def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", |
| ftype, IX86_BUILTIN_VEC_INIT_V4HI); |
| |
| ftype = build_function_type_list (V8QI_type_node, char_type_node, |
| char_type_node, char_type_node, |
| char_type_node, char_type_node, |
| char_type_node, char_type_node, |
| char_type_node, NULL_TREE); |
| def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", |
| ftype, IX86_BUILTIN_VEC_INIT_V8QI); |
| |
| /* Access to the vec_extract patterns. */ |
| ftype = build_function_type_list (double_type_node, V2DF_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", |
| ftype, IX86_BUILTIN_VEC_EXT_V2DF); |
| |
| ftype = build_function_type_list (long_long_integer_type_node, |
| V2DI_type_node, integer_type_node, |
| NULL_TREE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", |
| ftype, IX86_BUILTIN_VEC_EXT_V2DI); |
| |
| ftype = build_function_type_list (float_type_node, V4SF_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", |
| ftype, IX86_BUILTIN_VEC_EXT_V4SF); |
| |
| ftype = build_function_type_list (intSI_type_node, V4SI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", |
| ftype, IX86_BUILTIN_VEC_EXT_V4SI); |
| |
| /* APPLE LOCAL begin radar 4469713 */ |
| /* The return type of the builtin function should be an unsigned instead |
| of a signed type. */ |
| ftype = build_function_type_list (unsigned_intHI_type_node, V8HI_type_node, |
| /* APPLE LOCAL end radar 4469713 */ |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", |
| ftype, IX86_BUILTIN_VEC_EXT_V8HI); |
| |
| ftype = build_function_type_list (intHI_type_node, V4HI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", |
| ftype, IX86_BUILTIN_VEC_EXT_V4HI); |
| |
| ftype = build_function_type_list (intSI_type_node, V2SI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", |
| ftype, IX86_BUILTIN_VEC_EXT_V2SI); |
| |
| ftype = build_function_type_list (intQI_type_node, V16QI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Access to the vec_set patterns. */ |
| ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, |
| intDI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI); |
| |
| ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, |
| float_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF); |
| |
| ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, |
| intSI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, |
| intHI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", |
| ftype, IX86_BUILTIN_VEC_SET_V8HI); |
| |
| ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, |
| intHI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", |
| ftype, IX86_BUILTIN_VEC_SET_V4HI); |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, |
| intQI_type_node, |
| integer_type_node, NULL_TREE); |
| def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| } |
| |
| /* Errors in the source file can cause expand_expr to return const0_rtx |
| where we expect a vector. To avoid crashing, use one of the vector |
| clear instructions. */ |
| static rtx |
| safe_vector_operand (rtx x, enum machine_mode mode) |
| { |
| if (x == const0_rtx) |
| x = CONST0_RTX (mode); |
| return x; |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Subroutine of ix86_expand_builtin to take care of SSE insns with |
| 4 operands. The third argument must be a constant smaller than 8 |
| bits or xmm0. */ |
| |
| static rtx |
| ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp, |
| rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (exp); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); |
| tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp))); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| rtx op2 = expand_normal (arg2); |
| enum machine_mode tmode = insn_data[icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[icode].operand[1].mode; |
| enum machine_mode mode2 = insn_data[icode].operand[2].mode; |
| enum machine_mode mode3 = insn_data[icode].operand[3].mode; |
| |
| if (VECTOR_MODE_P (mode1)) |
| op0 = safe_vector_operand (op0, mode1); |
| if (VECTOR_MODE_P (mode2)) |
| op1 = safe_vector_operand (op1, mode2); |
| if (VECTOR_MODE_P (mode3)) |
| op2 = safe_vector_operand (op2, mode3); |
| |
| if (optimize |
| || target == 0 |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_mode_reg (mode1, op0); |
| if ((optimize && !register_operand (op1, mode2)) |
| || !(*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| op1 = copy_to_mode_reg (mode2, op1); |
| |
| if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) |
| switch (icode) |
| { |
| case CODE_FOR_sse4_1_blendvpd: |
| case CODE_FOR_sse4_1_blendvps: |
| case CODE_FOR_sse4_1_pblendvb: |
| op2 = copy_to_mode_reg (mode3, op2); |
| break; |
| |
| case CODE_FOR_sse4_1_roundsd: |
| case CODE_FOR_sse4_1_roundss: |
| error ("the third argument must be a 4-bit immediate"); |
| return const0_rtx; |
| |
| default: |
| error ("the third argument must be an 8-bit immediate"); |
| return const0_rtx; |
| } |
| |
| pat = GEN_FCN (icode) (target, op0, op1, op2); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */ |
| |
| static rtx |
| ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (exp); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| enum machine_mode tmode = insn_data[icode].operand[0].mode; |
| enum machine_mode mode0 = insn_data[icode].operand[1].mode; |
| enum machine_mode mode1 = insn_data[icode].operand[2].mode; |
| |
| if (optimize |
| || !target |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) |
| { |
| op1 = copy_to_reg (op1); |
| op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0); |
| } |
| |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* Subroutine of ix86_expand_builtin to take care of binop insns. */ |
| |
| static rtx |
| ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) |
| { |
| rtx pat, xops[3]; |
| tree arg0 = TREE_VALUE (arglist); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| enum machine_mode tmode = insn_data[icode].operand[0].mode; |
| enum machine_mode mode0 = insn_data[icode].operand[1].mode; |
| enum machine_mode mode1 = insn_data[icode].operand[2].mode; |
| |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| if (VECTOR_MODE_P (mode1)) |
| op1 = safe_vector_operand (op1, mode1); |
| |
| if (optimize || !target |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| if (GET_MODE (op1) == SImode && mode1 == TImode) |
| { |
| rtx x = gen_reg_rtx (V4SImode); |
| emit_insn (gen_sse2_loadd (x, op1)); |
| op1 = gen_lowpart (TImode, x); |
| } |
| |
| /* The insn must want input operands in the same modes as the |
| result. */ |
| gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) |
| && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); |
| |
| if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) |
| op1 = copy_to_mode_reg (mode1, op1); |
| |
| /* ??? Using ix86_fixup_binary_operands is problematic when |
| we've got mismatched modes. Fake it. */ |
| |
| xops[0] = target; |
| xops[1] = op0; |
| xops[2] = op1; |
| |
| if (tmode == mode0 && tmode == mode1) |
| { |
| target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); |
| op0 = xops[1]; |
| op1 = xops[2]; |
| } |
| else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) |
| { |
| op0 = force_reg (mode0, op0); |
| op1 = force_reg (mode1, op1); |
| target = gen_reg_rtx (tmode); |
| } |
| |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of stores. */ |
| |
| static rtx |
| ix86_expand_store_builtin (enum insn_code icode, tree arglist) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (arglist); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| enum machine_mode mode0 = insn_data[icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[icode].operand[1].mode; |
| |
| if (VECTOR_MODE_P (mode1)) |
| op1 = safe_vector_operand (op1, mode1); |
| |
| op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); |
| op1 = copy_to_mode_reg (mode1, op1); |
| |
| pat = GEN_FCN (icode) (op0, op1); |
| if (pat) |
| emit_insn (pat); |
| return 0; |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of unop insns. */ |
| |
| static rtx |
| ix86_expand_unop_builtin (enum insn_code icode, tree arglist, |
| rtx target, int do_load) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (arglist); |
| rtx op0 = expand_normal (arg0); |
| enum machine_mode tmode = insn_data[icode].operand[0].mode; |
| enum machine_mode mode0 = insn_data[icode].operand[1].mode; |
| |
| if (optimize || !target |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| if (do_load) |
| op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); |
| else |
| { |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| |
| if ((optimize && !register_operand (op0, mode0)) |
| || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| switch (icode) |
| { |
| case CODE_FOR_sse4_1_roundpd: |
| case CODE_FOR_sse4_1_roundps: |
| { |
| tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| rtx op1 = expand_normal (arg1); |
| enum machine_mode mode1 = insn_data[icode].operand[2].mode; |
| |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) |
| { |
| error ("the second argument must be a 4-bit immediate"); |
| return const0_rtx; |
| } |
| pat = GEN_FCN (icode) (target, op0, op1); |
| } |
| break; |
| default: |
| pat = GEN_FCN (icode) (target, op0); |
| break; |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of three special unop insns: |
| sqrtss, rsqrtss, rcpss. */ |
| |
| static rtx |
| ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (arglist); |
| rtx op1, op0 = expand_normal (arg0); |
| enum machine_mode tmode = insn_data[icode].operand[0].mode; |
| enum machine_mode mode0 = insn_data[icode].operand[1].mode; |
| |
| if (optimize || !target |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| |
| if ((optimize && !register_operand (op0, mode0)) |
| || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| |
| op1 = op0; |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) |
| op1 = copy_to_mode_reg (mode0, op1); |
| |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of comparison insns. */ |
| |
| static rtx |
| ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, |
| rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (arglist); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| rtx op2; |
| enum machine_mode tmode = insn_data[d->icode].operand[0].mode; |
| enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; |
| enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; |
| enum rtx_code comparison = d->comparison; |
| |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| if (VECTOR_MODE_P (mode1)) |
| op1 = safe_vector_operand (op1, mode1); |
| |
| /* Swap operands if we have a comparison that isn't available in |
| hardware. */ |
| if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) |
| { |
| rtx tmp = gen_reg_rtx (mode1); |
| emit_move_insn (tmp, op1); |
| op1 = op0; |
| op0 = tmp; |
| } |
| |
| if (optimize || !target |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| if ((optimize && !register_operand (op0, mode0)) |
| || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if ((optimize && !register_operand (op1, mode1)) |
| || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) |
| op1 = copy_to_mode_reg (mode1, op1); |
| |
| op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); |
| pat = GEN_FCN (d->icode) (target, op0, op1, op2); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of comi insns. */ |
| |
| static rtx |
| ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, |
| rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (arglist); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| rtx op2; |
| enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; |
| enum rtx_code comparison = d->comparison; |
| |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| if (VECTOR_MODE_P (mode1)) |
| op1 = safe_vector_operand (op1, mode1); |
| |
| /* Swap operands if we have a comparison that isn't available in |
| hardware. */ |
| if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) |
| { |
| rtx tmp = op1; |
| op1 = op0; |
| op0 = tmp; |
| } |
| |
| target = gen_reg_rtx (SImode); |
| emit_move_insn (target, const0_rtx); |
| target = gen_rtx_SUBREG (QImode, target, 0); |
| |
| if ((optimize && !register_operand (op0, mode0)) |
| || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if ((optimize && !register_operand (op1, mode1)) |
| || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) |
| op1 = copy_to_mode_reg (mode1, op1); |
| |
| op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); |
| pat = GEN_FCN (d->icode) (op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| emit_insn (gen_rtx_SET (VOIDmode, |
| gen_rtx_STRICT_LOW_PART (VOIDmode, target), |
| gen_rtx_fmt_ee (comparison, QImode, |
| SET_DEST (pat), |
| const0_rtx))); |
| |
| return SUBREG_REG (target); |
| } |
| |
| /* APPLE LOCAL begin 4299257 */ |
| /* Subroutine of ix86_expand_builtin to take care of ucomi insns. */ |
| |
| static rtx |
| ix86_expand_sse_ucomi (const struct builtin_description *d, tree arglist, |
| rtx target) |
| { |
| tree arg0 = TREE_VALUE (arglist); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; |
| enum machine_mode scalar_mode; |
| enum rtx_code comparison = d->comparison; |
| |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| if (VECTOR_MODE_P (mode1)) |
| op1 = safe_vector_operand (op1, mode1); |
| |
| /* Swap operands if we have a comparison that isn't available in |
| hardware. */ |
| if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) |
| { |
| rtx tmp = op1; |
| op1 = op0; |
| op0 = tmp; |
| } |
| |
| target = gen_reg_rtx (SImode); |
| emit_move_insn (target, const0_rtx); |
| target = gen_rtx_SUBREG (QImode, target, 0); |
| |
| gcc_assert (mode0 == V4SFmode || mode0 == V2DFmode); |
| gcc_assert (mode1 == V4SFmode || mode1 == V2DFmode); |
| |
| scalar_mode = (mode0 == V4SFmode) ? SFmode : DFmode; |
| op0 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode0, op0), 0); |
| op1 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode1, op1), 0); |
| |
| ix86_compare_op0 = op0; |
| ix86_compare_op1 = op1; |
| if (ix86_expand_setcc (comparison, target)) |
| return SUBREG_REG (target); |
| |
| return NULL_RTX; |
| } |
| /* APPLE LOCAL end 4299257 */ |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| /* Subroutine of ix86_expand_builtin to take care of ptest insns. */ |
| |
| static rtx |
| ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, |
| rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (exp); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; |
| enum rtx_code comparison = d->comparison; |
| |
| if (VECTOR_MODE_P (mode0)) |
| op0 = safe_vector_operand (op0, mode0); |
| if (VECTOR_MODE_P (mode1)) |
| op1 = safe_vector_operand (op1, mode1); |
| |
| target = gen_reg_rtx (SImode); |
| emit_move_insn (target, const0_rtx); |
| target = gen_rtx_SUBREG (QImode, target, 0); |
| |
| if ((optimize && !register_operand (op0, mode0)) |
| || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if ((optimize && !register_operand (op1, mode1)) |
| || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) |
| op1 = copy_to_mode_reg (mode1, op1); |
| |
| pat = GEN_FCN (d->icode) (op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| emit_insn (gen_rtx_SET (VOIDmode, |
| gen_rtx_STRICT_LOW_PART (VOIDmode, target), |
| gen_rtx_fmt_ee (comparison, QImode, |
| SET_DEST (pat), |
| const0_rtx))); |
| |
| return SUBREG_REG (target); |
| } |
| |
| /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ |
| |
| static rtx |
| ix86_expand_sse_pcmpestr (const struct builtin_description *d, |
| tree exp, rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (exp); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); |
| tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp))); |
| tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp)))); |
| tree arg4 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp))))); |
| rtx scratch0, scratch1; |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| rtx op2 = expand_normal (arg2); |
| rtx op3 = expand_normal (arg3); |
| rtx op4 = expand_normal (arg4); |
| enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; |
| |
| tmode0 = insn_data[d->icode].operand[0].mode; |
| tmode1 = insn_data[d->icode].operand[1].mode; |
| modev2 = insn_data[d->icode].operand[2].mode; |
| modei3 = insn_data[d->icode].operand[3].mode; |
| modev4 = insn_data[d->icode].operand[4].mode; |
| modei5 = insn_data[d->icode].operand[5].mode; |
| modeimm = insn_data[d->icode].operand[6].mode; |
| |
| if (VECTOR_MODE_P (modev2)) |
| op0 = safe_vector_operand (op0, modev2); |
| if (VECTOR_MODE_P (modev4)) |
| op2 = safe_vector_operand (op2, modev4); |
| |
| if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) |
| op0 = copy_to_mode_reg (modev2, op0); |
| if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3)) |
| op1 = copy_to_mode_reg (modei3, op1); |
| if ((optimize && !register_operand (op2, modev4)) |
| || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4)) |
| op2 = copy_to_mode_reg (modev4, op2); |
| if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5)) |
| op3 = copy_to_mode_reg (modei5, op3); |
| |
| if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm)) |
| { |
| error ("the fifth argument must be a 8-bit immediate"); |
| return const0_rtx; |
| } |
| |
| if (d->code == IX86_BUILTIN_PCMPESTRI128) |
| { |
| if (optimize || !target |
| || GET_MODE (target) != tmode0 |
| || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) |
| target = gen_reg_rtx (tmode0); |
| |
| scratch1 = gen_reg_rtx (tmode1); |
| |
| pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); |
| } |
| else if (d->code == IX86_BUILTIN_PCMPESTRM128) |
| { |
| if (optimize || !target |
| || GET_MODE (target) != tmode1 |
| || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) |
| target = gen_reg_rtx (tmode1); |
| |
| scratch0 = gen_reg_rtx (tmode0); |
| |
| pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); |
| } |
| else |
| { |
| gcc_assert (d->flag); |
| |
| scratch0 = gen_reg_rtx (tmode0); |
| scratch1 = gen_reg_rtx (tmode1); |
| |
| pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); |
| } |
| |
| if (! pat) |
| return 0; |
| |
| emit_insn (pat); |
| |
| if (d->flag) |
| { |
| target = gen_reg_rtx (SImode); |
| emit_move_insn (target, const0_rtx); |
| target = gen_rtx_SUBREG (QImode, target, 0); |
| |
| emit_insn |
| (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), |
| gen_rtx_fmt_ee (EQ, QImode, |
| gen_rtx_REG ((enum machine_mode) d->flag, |
| FLAGS_REG), |
| const0_rtx))); |
| return SUBREG_REG (target); |
| } |
| else |
| return target; |
| } |
| |
| |
| /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ |
| |
| static rtx |
| ix86_expand_sse_pcmpistr (const struct builtin_description *d, |
| tree exp, rtx target) |
| { |
| rtx pat; |
| tree arg0 = TREE_VALUE (exp); |
| tree arg1 = TREE_VALUE (TREE_CHAIN (exp)); |
| tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp))); |
| rtx scratch0, scratch1; |
| rtx op0 = expand_normal (arg0); |
| rtx op1 = expand_normal (arg1); |
| rtx op2 = expand_normal (arg2); |
| enum machine_mode tmode0, tmode1, modev2, modev3, modeimm; |
| |
| tmode0 = insn_data[d->icode].operand[0].mode; |
| tmode1 = insn_data[d->icode].operand[1].mode; |
| modev2 = insn_data[d->icode].operand[2].mode; |
| modev3 = insn_data[d->icode].operand[3].mode; |
| modeimm = insn_data[d->icode].operand[4].mode; |
| |
| if (VECTOR_MODE_P (modev2)) |
| op0 = safe_vector_operand (op0, modev2); |
| if (VECTOR_MODE_P (modev3)) |
| op1 = safe_vector_operand (op1, modev3); |
| |
| if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) |
| op0 = copy_to_mode_reg (modev2, op0); |
| if ((optimize && !register_operand (op1, modev3)) |
| || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3)) |
| op1 = copy_to_mode_reg (modev3, op1); |
| |
| if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm)) |
| { |
| error ("the third argument must be a 8-bit immediate"); |
| return const0_rtx; |
| } |
| |
| if (d->code == IX86_BUILTIN_PCMPISTRI128) |
| { |
| if (optimize || !target |
| || GET_MODE (target) != tmode0 |
| || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) |
| target = gen_reg_rtx (tmode0); |
| |
| scratch1 = gen_reg_rtx (tmode1); |
| |
| pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); |
| } |
| else if (d->code == IX86_BUILTIN_PCMPISTRM128) |
| { |
| if (optimize || !target |
| || GET_MODE (target) != tmode1 |
| || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) |
| target = gen_reg_rtx (tmode1); |
| |
| scratch0 = gen_reg_rtx (tmode0); |
| |
| pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); |
| } |
| else |
| { |
| gcc_assert (d->flag); |
| |
| scratch0 = gen_reg_rtx (tmode0); |
| scratch1 = gen_reg_rtx (tmode1); |
| |
| pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); |
| } |
| |
| if (! pat) |
| return 0; |
| |
| emit_insn (pat); |
| |
| if (d->flag) |
| { |
| target = gen_reg_rtx (SImode); |
| emit_move_insn (target, const0_rtx); |
| target = gen_rtx_SUBREG (QImode, target, 0); |
| |
| emit_insn |
| (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), |
| gen_rtx_fmt_ee (EQ, QImode, |
| gen_rtx_REG ((enum machine_mode) d->flag, |
| FLAGS_REG), |
| const0_rtx))); |
| return SUBREG_REG (target); |
| } |
| else |
| return target; |
| } |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| /* Return the integer constant in ARG. Constrain it to be in the range |
| of the subparts of VEC_TYPE; issue an error if not. */ |
| |
| static int |
| get_element_number (tree vec_type, tree arg) |
| { |
| unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; |
| |
| if (!host_integerp (arg, 1) |
| || (elt = tree_low_cst (arg, 1), elt > max)) |
| { |
| error ("selector must be an integer constant in the range 0..%wi", max); |
| return 0; |
| } |
| |
| return elt; |
| } |
| |
| /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around |
| ix86_expand_vector_init. We DO have language-level syntax for this, in |
| the form of (type){ init-list }. Except that since we can't place emms |
| instructions from inside the compiler, we can't allow the use of MMX |
| registers unless the user explicitly asks for it. So we do *not* define |
| vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead |
| we have builtins invoked by mmintrin.h that gives us license to emit |
| these sorts of instructions. */ |
| |
| static rtx |
| ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) |
| { |
| enum machine_mode tmode = TYPE_MODE (type); |
| enum machine_mode inner_mode = GET_MODE_INNER (tmode); |
| int i, n_elt = GET_MODE_NUNITS (tmode); |
| rtvec v = rtvec_alloc (n_elt); |
| |
| gcc_assert (VECTOR_MODE_P (tmode)); |
| |
| for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) |
| { |
| rtx x = expand_normal (TREE_VALUE (arglist)); |
| RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); |
| } |
| |
| gcc_assert (arglist == NULL); |
| |
| if (!target || !register_operand (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); |
| return target; |
| } |
| |
| /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around |
| ix86_expand_vector_extract. They would be redundant (for non-MMX) if we |
| had a language-level syntax for referencing vector elements. */ |
| |
| static rtx |
| ix86_expand_vec_ext_builtin (tree arglist, rtx target) |
| { |
| enum machine_mode tmode, mode0; |
| tree arg0, arg1; |
| int elt; |
| rtx op0; |
| |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| |
| op0 = expand_normal (arg0); |
| elt = get_element_number (TREE_TYPE (arg0), arg1); |
| |
| tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); |
| mode0 = TYPE_MODE (TREE_TYPE (arg0)); |
| gcc_assert (VECTOR_MODE_P (mode0)); |
| |
| op0 = force_reg (mode0, op0); |
| |
| if (optimize || !target || !register_operand (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| |
| ix86_expand_vector_extract (true, target, op0, elt); |
| |
| return target; |
| } |
| |
| /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around |
| ix86_expand_vector_set. They would be redundant (for non-MMX) if we had |
| a language-level syntax for referencing vector elements. */ |
| |
| static rtx |
| ix86_expand_vec_set_builtin (tree arglist) |
| { |
| enum machine_mode tmode, mode1; |
| tree arg0, arg1, arg2; |
| int elt; |
| rtx op0, op1, target; |
| |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| |
| tmode = TYPE_MODE (TREE_TYPE (arg0)); |
| mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); |
| gcc_assert (VECTOR_MODE_P (tmode)); |
| |
| op0 = expand_expr (arg0, NULL_RTX, tmode, 0); |
| op1 = expand_expr (arg1, NULL_RTX, mode1, 0); |
| elt = get_element_number (TREE_TYPE (arg0), arg2); |
| |
| if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) |
| op1 = convert_modes (mode1, GET_MODE (op1), op1, true); |
| |
| op0 = force_reg (tmode, op0); |
| op1 = force_reg (mode1, op1); |
| |
| /* OP0 is the source of these builtin functions and shouldn't be |
| modified. Create a copy, use it and return it as target. */ |
| target = gen_reg_rtx (tmode); |
| emit_move_insn (target, op0); |
| ix86_expand_vector_set (true, target, op1, elt); |
| |
| return target; |
| } |
| |
| /* Expand an expression EXP that calls a built-in function, |
| with result going to TARGET if that's convenient |
| (and in mode MODE if that's convenient). |
| SUBTARGET may be used as the target for computing one of EXP's operands. |
| IGNORE is nonzero if the value is to be ignored. */ |
| |
| static rtx |
| ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| int ignore ATTRIBUTE_UNUSED) |
| { |
| const struct builtin_description *d; |
| size_t i; |
| enum insn_code icode; |
| tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); |
| tree arglist = TREE_OPERAND (exp, 1); |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| tree arg0, arg1, arg2, arg3; |
| rtx op0, op1, op2, op3, pat; |
| /* APPLE LOCAL ssse3 */ |
| enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| unsigned int fcode = DECL_FUNCTION_CODE (fndecl); |
| |
| switch (fcode) |
| { |
| case IX86_BUILTIN_EMMS: |
| emit_insn (gen_mmx_emms ()); |
| return 0; |
| |
| case IX86_BUILTIN_SFENCE: |
| emit_insn (gen_sse_sfence ()); |
| return 0; |
| |
| case IX86_BUILTIN_MASKMOVQ: |
| case IX86_BUILTIN_MASKMOVDQU: |
| icode = (fcode == IX86_BUILTIN_MASKMOVQ |
| ? CODE_FOR_mmx_maskmovq |
| : CODE_FOR_sse2_maskmovdqu); |
| /* Note the arg order is different from the operand order. */ |
| arg1 = TREE_VALUE (arglist); |
| arg2 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| op2 = expand_normal (arg2); |
| mode0 = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| |
| op0 = force_reg (Pmode, op0); |
| op0 = gen_rtx_MEM (mode1, op0); |
| |
| if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) |
| op1 = copy_to_mode_reg (mode1, op1); |
| if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) |
| op2 = copy_to_mode_reg (mode2, op2); |
| pat = GEN_FCN (icode) (op0, op1, op2); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return 0; |
| |
| case IX86_BUILTIN_SQRTSS: |
| return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); |
| case IX86_BUILTIN_RSQRTSS: |
| return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); |
| case IX86_BUILTIN_RCPSS: |
| return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); |
| |
| /* APPLE LOCAL begin 4099020 */ |
| case IX86_BUILTIN_LOADQ: |
| return ix86_expand_unop_builtin (CODE_FOR_sse_loadqv4si, arglist, target, 1); |
| |
| case IX86_BUILTIN_MOVQ: |
| return ix86_expand_unop_builtin (CODE_FOR_sse_movqv4si, arglist, target, 0); |
| |
| case IX86_BUILTIN_STOREQ: |
| return ix86_expand_store_builtin (CODE_FOR_sse_storeqv4si, arglist); |
| /* APPLE LOCAL end 4099020 */ |
| |
| case IX86_BUILTIN_LOADUPS: |
| return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); |
| |
| case IX86_BUILTIN_STOREUPS: |
| return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); |
| |
| case IX86_BUILTIN_LOADHPS: |
| case IX86_BUILTIN_LOADLPS: |
| case IX86_BUILTIN_LOADHPD: |
| case IX86_BUILTIN_LOADLPD: |
| icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps |
| : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps |
| : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd |
| : CODE_FOR_sse2_loadlpd); |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| tmode = insn_data[icode].operand[0].mode; |
| mode0 = insn_data[icode].operand[1].mode; |
| mode1 = insn_data[icode].operand[2].mode; |
| |
| op0 = force_reg (mode0, op0); |
| op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); |
| if (optimize || target == 0 |
| || GET_MODE (target) != tmode |
| || !register_operand (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_STOREHPS: |
| case IX86_BUILTIN_STORELPS: |
| icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps |
| : CODE_FOR_sse_storelps); |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| mode0 = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| |
| op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); |
| op1 = force_reg (mode1, op1); |
| |
| pat = GEN_FCN (icode) (op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return const0_rtx; |
| |
| case IX86_BUILTIN_MOVNTPS: |
| return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); |
| case IX86_BUILTIN_MOVNTQ: |
| /* APPLE LOCAL 4656532 use V1DImode for _m64 */ |
| return ix86_expand_store_builtin (CODE_FOR_sse_movntv1di, arglist); |
| case IX86_BUILTIN_LDMXCSR: |
| op0 = expand_normal (TREE_VALUE (arglist)); |
| target = assign_386_stack_local (SImode, SLOT_VIRTUAL); |
| emit_move_insn (target, op0); |
| emit_insn (gen_sse_ldmxcsr (target)); |
| return 0; |
| |
| case IX86_BUILTIN_STMXCSR: |
| target = assign_386_stack_local (SImode, SLOT_VIRTUAL); |
| emit_insn (gen_sse_stmxcsr (target)); |
| return copy_to_mode_reg (SImode, target); |
| |
| case IX86_BUILTIN_SHUFPS: |
| case IX86_BUILTIN_SHUFPD: |
| icode = (fcode == IX86_BUILTIN_SHUFPS |
| ? CODE_FOR_sse_shufps |
| : CODE_FOR_sse2_shufpd); |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| op2 = expand_normal (arg2); |
| tmode = insn_data[icode].operand[0].mode; |
| mode0 = insn_data[icode].operand[1].mode; |
| mode1 = insn_data[icode].operand[2].mode; |
| mode2 = insn_data[icode].operand[3].mode; |
| |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) |
| op0 = copy_to_mode_reg (mode0, op0); |
| if ((optimize && !register_operand (op1, mode1)) |
| || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) |
| op1 = copy_to_mode_reg (mode1, op1); |
| if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) |
| { |
| /* @@@ better error message */ |
| error ("mask must be an immediate"); |
| return gen_reg_rtx (tmode); |
| } |
| if (optimize || target == 0 |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1, op2); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_PSHUFW: |
| case IX86_BUILTIN_PSHUFD: |
| case IX86_BUILTIN_PSHUFHW: |
| case IX86_BUILTIN_PSHUFLW: |
| icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw |
| : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw |
| : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd |
| : CODE_FOR_mmx_pshufw); |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_mode_reg (mode1, op0); |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| { |
| /* @@@ better error message */ |
| error ("mask must be an immediate"); |
| return const0_rtx; |
| } |
| if (target == 0 |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_PSLLWI128: |
| icode = CODE_FOR_ashlv8hi3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSLLDI128: |
| icode = CODE_FOR_ashlv4si3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSLLQI128: |
| icode = CODE_FOR_ashlv2di3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSRAWI128: |
| icode = CODE_FOR_ashrv8hi3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSRADI128: |
| icode = CODE_FOR_ashrv4si3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSRLWI128: |
| icode = CODE_FOR_lshrv8hi3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSRLDI128: |
| icode = CODE_FOR_lshrv4si3; |
| goto do_pshifti; |
| case IX86_BUILTIN_PSRLQI128: |
| icode = CODE_FOR_lshrv2di3; |
| goto do_pshifti; |
| do_pshifti: |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); |
| op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); |
| |
| /* APPLE LOCAL begin radar 5543378 mainline candidate */ |
| if (GET_CODE (op1) == CONST_INT) |
| { |
| if (INTVAL (op1) < 0 || INTVAL (op1) > 255) |
| op1 = GEN_INT (255); |
| } |
| else |
| { |
| mode2 = insn_data[icode].operand[2].mode; |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| { |
| op1 = copy_to_reg (op1); |
| if (GET_MODE (op1) != mode2) |
| op1 = convert_to_mode (mode2, op1, 0); |
| } |
| } |
| /* APPLE LOCAL end radar 5543378 mainline candidate */ |
| |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_reg (op0); |
| |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (!pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_PSLLW128: |
| icode = CODE_FOR_ashlv8hi3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSLLD128: |
| icode = CODE_FOR_ashlv4si3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSLLQ128: |
| icode = CODE_FOR_ashlv2di3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSRAW128: |
| icode = CODE_FOR_ashrv8hi3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSRAD128: |
| icode = CODE_FOR_ashrv4si3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSRLW128: |
| icode = CODE_FOR_lshrv8hi3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSRLD128: |
| icode = CODE_FOR_lshrv4si3; |
| goto do_pshift; |
| case IX86_BUILTIN_PSRLQ128: |
| icode = CODE_FOR_lshrv2di3; |
| goto do_pshift; |
| do_pshift: |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); |
| op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); |
| |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_reg (op0); |
| |
| op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); |
| if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) |
| op1 = copy_to_reg (op1); |
| |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (!pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| /* APPLE LOCAL begin 5919583 */ |
| case IX86_BUILTIN_PSLLDQI128: |
| case IX86_BUILTIN_PSRLDQI128: |
| case IX86_BUILTIN_PSLLDQI128_BYTESHIFT: |
| case IX86_BUILTIN_PSRLDQI128_BYTESHIFT: |
| icode = ((fcode == IX86_BUILTIN_PSLLDQI128 |
| || fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT) |
| ? CODE_FOR_sse2_ashlti3 |
| : CODE_FOR_sse2_lshrti3); |
| /* APPLE LOCAL end 5919583 */ |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| |
| /* APPLE LOCAL begin 591583 */ |
| if (! CONST_INT_P (op1)) |
| { |
| error ("shift must be an immediate"); |
| return const0_rtx; |
| } |
| /* The _mm_srli_si128/_mm_slli_si128 primitives are defined with |
| a byte-shift count; inside of GCC, we prefer to specify the |
| width of a shift in bits. The original non-BYTESHIFT |
| primitives were problematic due to the "*8" in their macro |
| bodies; we have moved the "*8" here to resolve this. The |
| original builtins are still supported because many developers |
| rely upon them. */ |
| if (fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT |
| || fcode == IX86_BUILTIN_PSRLDQI128_BYTESHIFT) |
| op1 = gen_rtx_CONST_INT (SImode, INTVAL (op1) * 8); |
| /* APPLE LOCAL end 591583 */ |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| { |
| op0 = copy_to_reg (op0); |
| op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); |
| } |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| { |
| error ("shift must be an immediate"); |
| return const0_rtx; |
| } |
| target = gen_reg_rtx (V2DImode); |
| pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), |
| op0, op1); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_FEMMS: |
| emit_insn (gen_mmx_femms ()); |
| return NULL_RTX; |
| |
| case IX86_BUILTIN_PAVGUSB: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); |
| |
| case IX86_BUILTIN_PF2ID: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); |
| |
| case IX86_BUILTIN_PFACC: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFADD: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFCMPEQ: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFCMPGE: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFCMPGT: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFMAX: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFMIN: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFMUL: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFRCP: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); |
| |
| case IX86_BUILTIN_PFRCPIT1: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFRCPIT2: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFRSQIT1: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFRSQRT: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); |
| |
| case IX86_BUILTIN_PFSUB: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFSUBR: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PI2FD: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); |
| |
| case IX86_BUILTIN_PMULHRW: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); |
| |
| case IX86_BUILTIN_PF2IW: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); |
| |
| case IX86_BUILTIN_PFNACC: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PFPNACC: |
| return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); |
| |
| case IX86_BUILTIN_PI2FW: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); |
| |
| case IX86_BUILTIN_PSWAPDSI: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); |
| |
| case IX86_BUILTIN_PSWAPDSF: |
| return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); |
| |
| case IX86_BUILTIN_SQRTSD: |
| return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); |
| case IX86_BUILTIN_LOADUPD: |
| return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); |
| case IX86_BUILTIN_STOREUPD: |
| return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); |
| |
| case IX86_BUILTIN_MFENCE: |
| emit_insn (gen_sse2_mfence ()); |
| return 0; |
| case IX86_BUILTIN_LFENCE: |
| emit_insn (gen_sse2_lfence ()); |
| return 0; |
| |
| case IX86_BUILTIN_CLFLUSH: |
| arg0 = TREE_VALUE (arglist); |
| op0 = expand_normal (arg0); |
| icode = CODE_FOR_sse2_clflush; |
| if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) |
| op0 = copy_to_mode_reg (Pmode, op0); |
| |
| emit_insn (gen_sse2_clflush (op0)); |
| return 0; |
| |
| case IX86_BUILTIN_MOVNTPD: |
| return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); |
| case IX86_BUILTIN_MOVNTDQ: |
| return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); |
| case IX86_BUILTIN_MOVNTI: |
| return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); |
| |
| case IX86_BUILTIN_LOADDQU: |
| return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); |
| case IX86_BUILTIN_STOREDQU: |
| return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); |
| |
| case IX86_BUILTIN_MONITOR: |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| op2 = expand_normal (arg2); |
| if (!REG_P (op0)) |
| op0 = copy_to_mode_reg (Pmode, op0); |
| if (!REG_P (op1)) |
| op1 = copy_to_mode_reg (SImode, op1); |
| if (!REG_P (op2)) |
| op2 = copy_to_mode_reg (SImode, op2); |
| if (!TARGET_64BIT) |
| emit_insn (gen_sse3_monitor (op0, op1, op2)); |
| else |
| emit_insn (gen_sse3_monitor64 (op0, op1, op2)); |
| return 0; |
| |
| case IX86_BUILTIN_MWAIT: |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| if (!REG_P (op0)) |
| op0 = copy_to_mode_reg (SImode, op0); |
| if (!REG_P (op1)) |
| op1 = copy_to_mode_reg (SImode, op1); |
| emit_insn (gen_sse3_mwait (op0, op1)); |
| return 0; |
| |
| case IX86_BUILTIN_LDDQU: |
| return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, |
| target, 1); |
| /* APPLE LOCAL begin mainline */ |
| case IX86_BUILTIN_PALIGNR: |
| case IX86_BUILTIN_PALIGNR128: |
| if (fcode == IX86_BUILTIN_PALIGNR) |
| { |
| /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */ |
| icode = CODE_FOR_ssse3_palignrv1di; |
| mode = V1DImode; |
| /* APPLE LOCAL end 4656532 use V1DImode for _m64 */ |
| } |
| else |
| { |
| icode = CODE_FOR_ssse3_palignrti; |
| mode = V2DImode; |
| } |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); |
| op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); |
| op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| mode3 = insn_data[icode].operand[3].mode; |
| |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| { |
| op0 = copy_to_reg (op0); |
| op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); |
| } |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| { |
| op1 = copy_to_reg (op1); |
| op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); |
| } |
| if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) |
| { |
| error ("shift must be an immediate"); |
| return const0_rtx; |
| } |
| target = gen_reg_rtx (mode); |
| pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), |
| op0, op1, op2); |
| if (! pat) |
| return 0; |
| emit_insn (pat); |
| return target; |
| |
| /* APPLE LOCAL end mainline */ |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| case IX86_BUILTIN_MOVNTDQA: |
| return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, arglist, |
| target, 1); |
| |
| case IX86_BUILTIN_MOVNTSD: |
| return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp); |
| |
| case IX86_BUILTIN_MOVNTSS: |
| return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp); |
| |
| case IX86_BUILTIN_INSERTQ: |
| case IX86_BUILTIN_EXTRQ: |
| icode = (fcode == IX86_BUILTIN_EXTRQ |
| ? CODE_FOR_sse4a_extrq |
| : CODE_FOR_sse4a_insertq); |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_mode_reg (mode1, op0); |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| op1 = copy_to_mode_reg (mode2, op1); |
| if (optimize || target == 0 |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1); |
| if (! pat) |
| return NULL_RTX; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_EXTRQI: |
| icode = CODE_FOR_sse4a_extrqi; |
| arg0 = TREE_VALUE (arglist); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| op2 = expand_normal (arg2); |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| mode3 = insn_data[icode].operand[3].mode; |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_mode_reg (mode1, op0); |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| { |
| error ("index mask must be an immediate"); |
| return gen_reg_rtx (tmode); |
| } |
| if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) |
| { |
| error ("length mask must be an immediate"); |
| return gen_reg_rtx (tmode); |
| } |
| if (optimize || target == 0 |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1, op2); |
| if (! pat) |
| return NULL_RTX; |
| emit_insn (pat); |
| return target; |
| |
| case IX86_BUILTIN_INSERTQI: |
| icode = CODE_FOR_sse4a_insertqi; |
| arg0 = TREE_VALUE (exp); |
| arg1 = TREE_VALUE (TREE_CHAIN (arglist)); |
| arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); |
| arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); |
| op0 = expand_normal (arg0); |
| op1 = expand_normal (arg1); |
| op2 = expand_normal (arg2); |
| op3 = expand_normal (arg3); |
| tmode = insn_data[icode].operand[0].mode; |
| mode1 = insn_data[icode].operand[1].mode; |
| mode2 = insn_data[icode].operand[2].mode; |
| mode3 = insn_data[icode].operand[3].mode; |
| mode4 = insn_data[icode].operand[4].mode; |
| |
| if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) |
| op0 = copy_to_mode_reg (mode1, op0); |
| |
| if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) |
| op1 = copy_to_mode_reg (mode2, op1); |
| |
| if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) |
| { |
| error ("index mask must be an immediate"); |
| return gen_reg_rtx (tmode); |
| } |
| if (! (*insn_data[icode].operand[4].predicate) (op3, mode4)) |
| { |
| error ("length mask must be an immediate"); |
| return gen_reg_rtx (tmode); |
| } |
| if (optimize || target == 0 |
| || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| pat = GEN_FCN (icode) (target, op0, op1, op2, op3); |
| if (! pat) |
| return NULL_RTX; |
| emit_insn (pat); |
| return target; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| case IX86_BUILTIN_VEC_INIT_V2SI: |
| case IX86_BUILTIN_VEC_INIT_V4HI: |
| case IX86_BUILTIN_VEC_INIT_V8QI: |
| return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); |
| |
| case IX86_BUILTIN_VEC_EXT_V2DF: |
| case IX86_BUILTIN_VEC_EXT_V2DI: |
| case IX86_BUILTIN_VEC_EXT_V4SF: |
| case IX86_BUILTIN_VEC_EXT_V4SI: |
| case IX86_BUILTIN_VEC_EXT_V8HI: |
| case IX86_BUILTIN_VEC_EXT_V16QI: |
| case IX86_BUILTIN_VEC_EXT_V2SI: |
| case IX86_BUILTIN_VEC_EXT_V4HI: |
| return ix86_expand_vec_ext_builtin (arglist, target); |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| case IX86_BUILTIN_VEC_SET_V2DI: |
| case IX86_BUILTIN_VEC_SET_V4SF: |
| case IX86_BUILTIN_VEC_SET_V4SI: |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| case IX86_BUILTIN_VEC_SET_V8HI: |
| case IX86_BUILTIN_VEC_SET_V4HI: |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| case IX86_BUILTIN_VEC_SET_V16QI: |
| return ix86_expand_vec_set_builtin (arglist); |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| case IX86_BUILTIN_INFQ: |
| { |
| REAL_VALUE_TYPE inf; |
| rtx tmp; |
| |
| real_inf (&inf); |
| tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode); |
| |
| tmp = validize_mem (force_const_mem (mode, tmp)); |
| |
| if (target == 0) |
| target = gen_reg_rtx (mode); |
| |
| emit_move_insn (target, tmp); |
| return target; |
| } |
| |
| case IX86_BUILTIN_FABSQ: |
| return ix86_expand_unop_builtin (CODE_FOR_abstf2, arglist, target, 0); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| default: |
| break; |
| } |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| for (i = 0, d = bdesc_sse_3arg; |
| i < ARRAY_SIZE (bdesc_sse_3arg); |
| i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_sse_4_operands_builtin (d->icode, |
| arglist, |
| target); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) |
| if (d->code == fcode) |
| { |
| /* Compares are treated specially. */ |
| if (d->icode == CODE_FOR_sse_maskcmpv4sf3 |
| || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 |
| || d->icode == CODE_FOR_sse2_maskcmpv2df3 |
| || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) |
| return ix86_expand_sse_compare (d, arglist, target); |
| |
| return ix86_expand_binop_builtin (d->icode, arglist, target); |
| } |
| |
| for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_unop_builtin (d->icode, arglist, target, 0); |
| |
| for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_sse_comi (d, arglist, target); |
| |
| /* APPLE LOCAL begin 4299257 */ |
| for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_sse_ucomi (d, arglist, target); |
| /* APPLE LOCAL end 4299257 */ |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_sse_ptest (d, arglist, target); |
| |
| for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_crc32 (d->icode, arglist, target); |
| |
| for (i = 0, d = bdesc_pcmpestr; |
| i < ARRAY_SIZE (bdesc_pcmpestr); |
| i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_sse_pcmpestr (d, arglist, target); |
| |
| for (i = 0, d = bdesc_pcmpistr; |
| i < ARRAY_SIZE (bdesc_pcmpistr); |
| i++, d++) |
| if (d->code == fcode) |
| return ix86_expand_sse_pcmpistr (d, arglist, target); |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| |
| gcc_unreachable (); |
| } |
| |
| /* Store OPERAND to the memory after reload is completed. This means |
| that we can't easily use assign_stack_local. */ |
| rtx |
| ix86_force_to_memory (enum machine_mode mode, rtx operand) |
| { |
| rtx result; |
| |
| gcc_assert (reload_completed); |
| if (TARGET_RED_ZONE) |
| { |
| result = gen_rtx_MEM (mode, |
| gen_rtx_PLUS (Pmode, |
| stack_pointer_rtx, |
| GEN_INT (-RED_ZONE_SIZE))); |
| emit_move_insn (result, operand); |
| } |
| else if (!TARGET_RED_ZONE && TARGET_64BIT) |
| { |
| switch (mode) |
| { |
| case HImode: |
| case SImode: |
| operand = gen_lowpart (DImode, operand); |
| /* FALLTHRU */ |
| case DImode: |
| emit_insn ( |
| gen_rtx_SET (VOIDmode, |
| gen_rtx_MEM (DImode, |
| gen_rtx_PRE_DEC (DImode, |
| stack_pointer_rtx)), |
| operand)); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| result = gen_rtx_MEM (mode, stack_pointer_rtx); |
| } |
| else |
| { |
| switch (mode) |
| { |
| case DImode: |
| { |
| rtx operands[2]; |
| split_di (&operand, 1, operands, operands + 1); |
| emit_insn ( |
| gen_rtx_SET (VOIDmode, |
| gen_rtx_MEM (SImode, |
| gen_rtx_PRE_DEC (Pmode, |
| stack_pointer_rtx)), |
| operands[1])); |
| emit_insn ( |
| gen_rtx_SET (VOIDmode, |
| gen_rtx_MEM (SImode, |
| gen_rtx_PRE_DEC (Pmode, |
| stack_pointer_rtx)), |
| operands[0])); |
| } |
| break; |
| case HImode: |
| /* Store HImodes as SImodes. */ |
| operand = gen_lowpart (SImode, operand); |
| /* FALLTHRU */ |
| case SImode: |
| emit_insn ( |
| gen_rtx_SET (VOIDmode, |
| gen_rtx_MEM (GET_MODE (operand), |
| gen_rtx_PRE_DEC (SImode, |
| stack_pointer_rtx)), |
| operand)); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| result = gen_rtx_MEM (mode, stack_pointer_rtx); |
| } |
| return result; |
| } |
| |
| /* Free operand from the memory. */ |
| void |
| ix86_free_from_memory (enum machine_mode mode) |
| { |
| if (!TARGET_RED_ZONE) |
| { |
| int size; |
| |
| if (mode == DImode || TARGET_64BIT) |
| size = 8; |
| else |
| size = 4; |
| /* Use LEA to deallocate stack space. In peephole2 it will be converted |
| to pop or add instruction if registers are available. */ |
| emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (size)))); |
| } |
| } |
| |
| /* Put float CONST_DOUBLE in the constant pool instead of fp regs. |
| QImode must go into class Q_REGS. |
| Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and |
| movdf to do mem-to-mem moves through integer regs. */ |
| enum reg_class |
| ix86_preferred_reload_class (rtx x, enum reg_class class) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| |
| /* We're only allowed to return a subclass of CLASS. Many of the |
| following checks fail for NO_REGS, so eliminate that early. */ |
| if (class == NO_REGS) |
| return NO_REGS; |
| |
| /* All classes can load zeros. */ |
| if (x == CONST0_RTX (mode)) |
| return class; |
| |
| /* Force constants into memory if we are loading a (nonzero) constant into |
| an MMX or SSE register. This is because there are no MMX/SSE instructions |
| to load from a constant. */ |
| if (CONSTANT_P (x) |
| && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) |
| return NO_REGS; |
| |
| /* APPLE LOCAL begin */ |
| /* MERGE FIXME - ensure that 3501055 is fixed. */ |
| /* MERGE FIXME - ensure that 4206991 is fixed. */ |
| /* APPLE LOCAL end */ |
| /* Prefer SSE regs only, if we can use them for math. */ |
| if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) |
| return SSE_CLASS_P (class) ? class : NO_REGS; |
| |
| /* Floating-point constants need more complex checks. */ |
| if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) |
| { |
| /* General regs can load everything. */ |
| if (reg_class_subset_p (class, GENERAL_REGS)) |
| return class; |
| |
| /* Floats can load 0 and 1 plus some others. Note that we eliminated |
| zero above. We only want to wind up preferring 80387 registers if |
| we plan on doing computation with them. */ |
| if (TARGET_80387 |
| && standard_80387_constant_p (x)) |
| { |
| /* Limit class to non-sse. */ |
| if (class == FLOAT_SSE_REGS) |
| return FLOAT_REGS; |
| if (class == FP_TOP_SSE_REGS) |
| return FP_TOP_REG; |
| if (class == FP_SECOND_SSE_REGS) |
| return FP_SECOND_REG; |
| if (class == FLOAT_INT_REGS || class == FLOAT_REGS) |
| return class; |
| } |
| |
| return NO_REGS; |
| } |
| |
| /* Generally when we see PLUS here, it's the function invariant |
| (plus soft-fp const_int). Which can only be computed into general |
| regs. */ |
| if (GET_CODE (x) == PLUS) |
| return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; |
| |
| /* QImode constants are easy to load, but non-constant QImode data |
| must go into Q_REGS. */ |
| if (GET_MODE (x) == QImode && !CONSTANT_P (x)) |
| { |
| if (reg_class_subset_p (class, Q_REGS)) |
| return class; |
| if (reg_class_subset_p (Q_REGS, class)) |
| return Q_REGS; |
| return NO_REGS; |
| } |
| |
| return class; |
| } |
| |
| /* Discourage putting floating-point values in SSE registers unless |
| SSE math is being used, and likewise for the 387 registers. */ |
| enum reg_class |
| ix86_preferred_output_reload_class (rtx x, enum reg_class class) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| |
| /* Restrict the output reload class to the register bank that we are doing |
| math on. If we would like not to return a subset of CLASS, reject this |
| alternative: if reload cannot do this, it will still use its choice. */ |
| mode = GET_MODE (x); |
| if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) |
| return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; |
| |
| if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) |
| { |
| if (class == FP_TOP_SSE_REGS) |
| return FP_TOP_REG; |
| else if (class == FP_SECOND_SSE_REGS) |
| return FP_SECOND_REG; |
| else |
| return FLOAT_CLASS_P (class) ? class : NO_REGS; |
| } |
| |
| return class; |
| } |
| |
| /* If we are copying between general and FP registers, we need a memory |
| location. The same is true for SSE and MMX registers. |
| |
| The macro can't work reliably when one of the CLASSES is class containing |
| registers from multiple units (SSE, MMX, integer). We avoid this by never |
| combining those units in single alternative in the machine description. |
| Ensure that this constraint holds to avoid unexpected surprises. |
| |
| When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not |
| enforce these sanity checks. */ |
| |
| int |
| ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, |
| enum machine_mode mode, int strict) |
| { |
| if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) |
| || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) |
| || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) |
| || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) |
| || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) |
| || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) |
| { |
| gcc_assert (!strict); |
| return true; |
| } |
| |
| if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) |
| return true; |
| |
| /* ??? This is a lie. We do have moves between mmx/general, and for |
| mmx/sse2. But by saying we need secondary memory we discourage the |
| register allocator from using the mmx registers unless needed. */ |
| if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) |
| return true; |
| |
| if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
| { |
| /* SSE1 doesn't have any direct moves from other classes. */ |
| if (!TARGET_SSE2) |
| return true; |
| |
| /* If the target says that inter-unit moves are more expensive |
| than moving through memory, then don't generate them. */ |
| if (!TARGET_INTER_UNIT_MOVES && !optimize_size) |
| return true; |
| |
| /* Between SSE and general, we have moves no larger than word size. */ |
| if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| return true; |
| |
| /* ??? For the cost of one register reformat penalty, we could use |
| the same instructions to move SFmode and DFmode data, but the |
| relevant move patterns don't support those alternatives. */ |
| if (mode == SFmode || mode == DFmode) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if the registers in CLASS cannot represent the change from |
| modes FROM to TO. */ |
| |
| bool |
| ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, |
| enum reg_class class) |
| { |
| if (from == to) |
| return false; |
| |
| /* x87 registers can't do subreg at all, as all values are reformatted |
| to extended precision. */ |
| if (MAYBE_FLOAT_CLASS_P (class)) |
| return true; |
| |
| if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) |
| { |
| /* Vector registers do not support QI or HImode loads. If we don't |
| disallow a change to these modes, reload will assume it's ok to |
| drop the subreg from (subreg:SI (reg:HI 100) 0). This affects |
| the vec_dupv4hi pattern. */ |
| if (GET_MODE_SIZE (from) < 4) |
| return true; |
| |
| /* Vector registers do not support subreg with nonzero offsets, which |
| are otherwise valid for integer registers. Since we can't see |
| whether we have a nonzero offset from here, prohibit all |
| nonparadoxical subregs changing size. */ |
| if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return the cost of moving data from a register in class CLASS1 to |
| one in class CLASS2. |
| |
| It is not required that the cost always equal 2 when FROM is the same as TO; |
| on some machines it is expensive to move between registers if they are not |
| general registers. */ |
| |
| int |
| ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, |
| enum reg_class class2) |
| { |
| /* In case we require secondary memory, compute cost of the store followed |
| by load. In order to avoid bad register allocation choices, we need |
| for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ |
| |
| if (ix86_secondary_memory_needed (class1, class2, mode, 0)) |
| { |
| int cost = 1; |
| |
| cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), |
| MEMORY_MOVE_COST (mode, class1, 1)); |
| cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), |
| MEMORY_MOVE_COST (mode, class2, 1)); |
| |
| /* In case of copying from general_purpose_register we may emit multiple |
| stores followed by single load causing memory size mismatch stall. |
| Count this as arbitrarily high cost of 20. */ |
| if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) |
| cost += 20; |
| |
| /* In the case of FP/MMX moves, the registers actually overlap, and we |
| have to switch modes in order to treat them differently. */ |
| if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) |
| || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) |
| cost += 20; |
| |
| return cost; |
| } |
| |
| /* Moves between SSE/MMX and integer unit are expensive. */ |
| if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) |
| || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) |
| return ix86_cost->mmxsse_to_integer; |
| if (MAYBE_FLOAT_CLASS_P (class1)) |
| return ix86_cost->fp_move; |
| if (MAYBE_SSE_CLASS_P (class1)) |
| return ix86_cost->sse_move; |
| if (MAYBE_MMX_CLASS_P (class1)) |
| return ix86_cost->mmx_move; |
| return 2; |
| } |
| |
| /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ |
| |
| bool |
| ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) |
| { |
| /* Flags and only flags can only hold CCmode values. */ |
| if (CC_REGNO_P (regno)) |
| return GET_MODE_CLASS (mode) == MODE_CC; |
| if (GET_MODE_CLASS (mode) == MODE_CC |
| || GET_MODE_CLASS (mode) == MODE_RANDOM |
| || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
| return 0; |
| if (FP_REGNO_P (regno)) |
| return VALID_FP_MODE_P (mode); |
| if (SSE_REGNO_P (regno)) |
| { |
| /* We implement the move patterns for all vector modes into and |
| out of SSE registers, even when no operation instructions |
| are available. */ |
| return (VALID_SSE_REG_MODE (mode) |
| || VALID_SSE2_REG_MODE (mode) |
| || VALID_MMX_REG_MODE (mode) |
| || VALID_MMX_REG_MODE_3DNOW (mode)); |
| } |
| if (MMX_REGNO_P (regno)) |
| { |
| /* We implement the move patterns for 3DNOW modes even in MMX mode, |
| so if the register is available at all, then we can move data of |
| the given mode into or out of it. */ |
| return (VALID_MMX_REG_MODE (mode) |
| || VALID_MMX_REG_MODE_3DNOW (mode)); |
| } |
| |
| if (mode == QImode) |
| { |
| /* Take care for QImode values - they can be in non-QI regs, |
| but then they do cause partial register stalls. */ |
| if (regno < 4 || TARGET_64BIT) |
| return 1; |
| if (!TARGET_PARTIAL_REG_STALL) |
| return 1; |
| return reload_in_progress || reload_completed; |
| } |
| /* We handle both integer and floats in the general purpose registers. */ |
| else if (VALID_INT_MODE_P (mode)) |
| return 1; |
| else if (VALID_FP_MODE_P (mode)) |
| return 1; |
| /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go |
| on to use that value in smaller contexts, this can easily force a |
| pseudo to be allocated to GENERAL_REGS. Since this is no worse than |
| supporting DImode, allow it. */ |
| else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a |
| tieable integer mode. */ |
| |
| static bool |
| ix86_tieable_integer_mode_p (enum machine_mode mode) |
| { |
| switch (mode) |
| { |
| case HImode: |
| case SImode: |
| return true; |
| |
| case QImode: |
| return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; |
| |
| case DImode: |
| /* APPLE LOCAL 5695218 convert int to logical bool */ |
| return !!TARGET_64BIT; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Return true if MODE1 is accessible in a register that can hold MODE2 |
| without copying. That is, all register classes that can hold MODE2 |
| can also hold MODE1. */ |
| |
| bool |
| ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) |
| { |
| if (mode1 == mode2) |
| return true; |
| |
| if (ix86_tieable_integer_mode_p (mode1) |
| && ix86_tieable_integer_mode_p (mode2)) |
| return true; |
| |
| /* MODE2 being XFmode implies fp stack or general regs, which means we |
| can tie any smaller floating point modes to it. Note that we do not |
| tie this with TFmode. */ |
| if (mode2 == XFmode) |
| return mode1 == SFmode || mode1 == DFmode; |
| |
| /* MODE2 being DFmode implies fp stack, general or sse regs, which means |
| that we can tie it with SFmode. */ |
| if (mode2 == DFmode) |
| return mode1 == SFmode; |
| |
| /* If MODE2 is only appropriate for an SSE register, then tie with |
| any other mode acceptable to SSE registers. */ |
| if (GET_MODE_SIZE (mode2) >= 8 |
| && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) |
| return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); |
| |
| /* If MODE2 is appropriate for an MMX (or SSE) register, then tie |
| with any other mode acceptable to MMX registers. */ |
| if (GET_MODE_SIZE (mode2) == 8 |
| && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) |
| return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); |
| |
| return false; |
| } |
| |
| /* Return the cost of moving data of mode M between a |
| register and memory. A value of 2 is the default; this cost is |
| relative to those in `REGISTER_MOVE_COST'. |
| |
| If moving between registers and memory is more expensive than |
| between two registers, you should define this macro to express the |
| relative cost. |
| |
| Model also increased moving costs of QImode registers in non |
| Q_REGS classes. |
| */ |
| int |
| ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) |
| { |
| if (FLOAT_CLASS_P (class)) |
| { |
| int index; |
| switch (mode) |
| { |
| case SFmode: |
| index = 0; |
| break; |
| case DFmode: |
| index = 1; |
| break; |
| case XFmode: |
| index = 2; |
| break; |
| default: |
| return 100; |
| } |
| return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; |
| } |
| if (SSE_CLASS_P (class)) |
| { |
| int index; |
| switch (GET_MODE_SIZE (mode)) |
| { |
| case 4: |
| index = 0; |
| break; |
| case 8: |
| index = 1; |
| break; |
| case 16: |
| index = 2; |
| break; |
| default: |
| return 100; |
| } |
| return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; |
| } |
| if (MMX_CLASS_P (class)) |
| { |
| int index; |
| switch (GET_MODE_SIZE (mode)) |
| { |
| case 4: |
| index = 0; |
| break; |
| case 8: |
| index = 1; |
| break; |
| default: |
| return 100; |
| } |
| return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; |
| } |
| switch (GET_MODE_SIZE (mode)) |
| { |
| case 1: |
| if (in) |
| return (Q_CLASS_P (class) ? ix86_cost->int_load[0] |
| : ix86_cost->movzbl_load); |
| else |
| return (Q_CLASS_P (class) ? ix86_cost->int_store[0] |
| : ix86_cost->int_store[0] + 4); |
| break; |
| case 2: |
| return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; |
| default: |
| /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ |
| if (mode == TFmode) |
| mode = XFmode; |
| return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) |
| * (((int) GET_MODE_SIZE (mode) |
| + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); |
| } |
| } |
| |
| /* Compute a (partial) cost for rtx X. Return true if the complete |
| cost has been computed, and false if subexpressions should be |
| scanned. In either case, *TOTAL contains the cost result. */ |
| |
| static bool |
| ix86_rtx_costs (rtx x, int code, int outer_code, int *total) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| |
| switch (code) |
| { |
| case CONST_INT: |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) |
| *total = 3; |
| else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) |
| *total = 2; |
| else if (flag_pic && SYMBOLIC_CONST (x) |
| && (!TARGET_64BIT |
| || (!GET_CODE (x) != LABEL_REF |
| && (GET_CODE (x) != SYMBOL_REF |
| || !SYMBOL_REF_LOCAL_P (x))))) |
| *total = 1; |
| else |
| *total = 0; |
| return true; |
| |
| case CONST_DOUBLE: |
| if (mode == VOIDmode) |
| *total = 0; |
| else |
| switch (standard_80387_constant_p (x)) |
| { |
| case 1: /* 0.0 */ |
| *total = 1; |
| break; |
| default: /* Other constants */ |
| *total = 2; |
| break; |
| case 0: |
| case -1: |
| /* Start with (MEM (SYMBOL_REF)), since that's where |
| it'll probably end up. Add a penalty for size. */ |
| *total = (COSTS_N_INSNS (1) |
| + (flag_pic != 0 && !TARGET_64BIT) |
| + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); |
| break; |
| } |
| return true; |
| |
| case ZERO_EXTEND: |
| /* The zero extensions is often completely free on x86_64, so make |
| it as cheap as possible. */ |
| if (TARGET_64BIT && mode == DImode |
| && GET_MODE (XEXP (x, 0)) == SImode) |
| *total = 1; |
| else if (TARGET_ZERO_EXTEND_WITH_AND) |
| *total = ix86_cost->add; |
| else |
| *total = ix86_cost->movzx; |
| return false; |
| |
| case SIGN_EXTEND: |
| *total = ix86_cost->movsx; |
| return false; |
| |
| case ASHIFT: |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT |
| && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) |
| { |
| HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
| if (value == 1) |
| { |
| *total = ix86_cost->add; |
| return false; |
| } |
| if ((value == 2 || value == 3) |
| && ix86_cost->lea <= ix86_cost->shift_const) |
| { |
| *total = ix86_cost->lea; |
| return false; |
| } |
| } |
| /* FALLTHRU */ |
| |
| case ROTATE: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| case ROTATERT: |
| if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) |
| { |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT) |
| { |
| if (INTVAL (XEXP (x, 1)) > 32) |
| *total = ix86_cost->shift_const + COSTS_N_INSNS (2); |
| else |
| *total = ix86_cost->shift_const * 2; |
| } |
| else |
| { |
| if (GET_CODE (XEXP (x, 1)) == AND) |
| *total = ix86_cost->shift_var * 2; |
| else |
| *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); |
| } |
| } |
| else |
| { |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT) |
| *total = ix86_cost->shift_const; |
| else |
| *total = ix86_cost->shift_var; |
| } |
| return false; |
| |
| case MULT: |
| if (FLOAT_MODE_P (mode)) |
| { |
| *total = ix86_cost->fmul; |
| return false; |
| } |
| else |
| { |
| rtx op0 = XEXP (x, 0); |
| rtx op1 = XEXP (x, 1); |
| int nbits; |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT) |
| { |
| unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); |
| for (nbits = 0; value != 0; value &= value - 1) |
| nbits++; |
| } |
| else |
| /* This is arbitrary. */ |
| nbits = 7; |
| |
| /* Compute costs correctly for widening multiplication. */ |
| if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) |
| && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 |
| == GET_MODE_SIZE (mode)) |
| { |
| int is_mulwiden = 0; |
| enum machine_mode inner_mode = GET_MODE (op0); |
| |
| if (GET_CODE (op0) == GET_CODE (op1)) |
| is_mulwiden = 1, op1 = XEXP (op1, 0); |
| else if (GET_CODE (op1) == CONST_INT) |
| { |
| if (GET_CODE (op0) == SIGN_EXTEND) |
| is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) |
| == INTVAL (op1); |
| else |
| is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); |
| } |
| |
| if (is_mulwiden) |
| op0 = XEXP (op0, 0), mode = GET_MODE (op0); |
| } |
| |
| *total = (ix86_cost->mult_init[MODE_INDEX (mode)] |
| + nbits * ix86_cost->mult_bit |
| + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); |
| |
| return true; |
| } |
| |
| case DIV: |
| case UDIV: |
| case MOD: |
| case UMOD: |
| if (FLOAT_MODE_P (mode)) |
| *total = ix86_cost->fdiv; |
| else |
| *total = ix86_cost->divide[MODE_INDEX (mode)]; |
| return false; |
| |
| case PLUS: |
| if (FLOAT_MODE_P (mode)) |
| *total = ix86_cost->fadd; |
| else if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) |
| { |
| if (GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT |
| && CONSTANT_P (XEXP (x, 1))) |
| { |
| HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); |
| if (val == 2 || val == 4 || val == 8) |
| { |
| *total = ix86_cost->lea; |
| *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); |
| *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), |
| outer_code); |
| *total += rtx_cost (XEXP (x, 1), outer_code); |
| return true; |
| } |
| } |
| else if (GET_CODE (XEXP (x, 0)) == MULT |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) |
| { |
| HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); |
| if (val == 2 || val == 4 || val == 8) |
| { |
| *total = ix86_cost->lea; |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); |
| *total += rtx_cost (XEXP (x, 1), outer_code); |
| return true; |
| } |
| } |
| else if (GET_CODE (XEXP (x, 0)) == PLUS) |
| { |
| *total = ix86_cost->lea; |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); |
| *total += rtx_cost (XEXP (x, 1), outer_code); |
| return true; |
| } |
| } |
| /* FALLTHRU */ |
| |
| case MINUS: |
| if (FLOAT_MODE_P (mode)) |
| { |
| *total = ix86_cost->fadd; |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case AND: |
| case IOR: |
| case XOR: |
| if (!TARGET_64BIT && mode == DImode) |
| { |
| *total = (ix86_cost->add * 2 |
| + (rtx_cost (XEXP (x, 0), outer_code) |
| << (GET_MODE (XEXP (x, 0)) != DImode)) |
| + (rtx_cost (XEXP (x, 1), outer_code) |
| << (GET_MODE (XEXP (x, 1)) != DImode))); |
| return true; |
| } |
| /* FALLTHRU */ |
| |
| case NEG: |
| if (FLOAT_MODE_P (mode)) |
| { |
| *total = ix86_cost->fchs; |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case NOT: |
| if (!TARGET_64BIT && mode == DImode) |
| *total = ix86_cost->add * 2; |
| else |
| *total = ix86_cost->add; |
| return false; |
| |
| case COMPARE: |
| if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT |
| && XEXP (XEXP (x, 0), 1) == const1_rtx |
| && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT |
| && XEXP (x, 1) == const0_rtx) |
| { |
| /* This kind of construct is implemented using test[bwl]. |
| Treat it as if we had an AND. */ |
| *total = (ix86_cost->add |
| + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) |
| + rtx_cost (const1_rtx, outer_code)); |
| return true; |
| } |
| return false; |
| |
| case FLOAT_EXTEND: |
| if (!TARGET_SSE_MATH |
| || mode == XFmode |
| || (mode == DFmode && !TARGET_SSE2)) |
| /* For standard 80387 constants, raise the cost to prevent |
| compress_float_constant() to generate load from memory. */ |
| switch (standard_80387_constant_p (XEXP (x, 0))) |
| { |
| case -1: |
| case 0: |
| *total = 0; |
| break; |
| case 1: /* 0.0 */ |
| *total = 1; |
| break; |
| default: |
| *total = (x86_ext_80387_constants & TUNEMASK |
| || optimize_size |
| ? 1 : 0); |
| } |
| return false; |
| |
| case ABS: |
| if (FLOAT_MODE_P (mode)) |
| *total = ix86_cost->fabs; |
| return false; |
| |
| case SQRT: |
| if (FLOAT_MODE_P (mode)) |
| *total = ix86_cost->fsqrt; |
| return false; |
| |
| case UNSPEC: |
| if (XINT (x, 1) == UNSPEC_TP) |
| *total = 0; |
| return false; |
| |
| default: |
| return false; |
| } |
| } |
| |
| #if TARGET_MACHO |
| |
| static int current_machopic_label_num; |
| |
| /* Given a symbol name and its associated stub, write out the |
| definition of the stub. */ |
| |
| void |
| machopic_output_stub (FILE *file, const char *symb, const char *stub) |
| { |
| unsigned int length; |
| char *binder_name, *symbol_name, lazy_ptr_name[32]; |
| int label = ++current_machopic_label_num; |
| |
| /* For 64-bit we shouldn't get here. */ |
| gcc_assert (!TARGET_64BIT); |
| |
| /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ |
| symb = (*targetm.strip_name_encoding) (symb); |
| |
| length = strlen (stub); |
| binder_name = alloca (length + 32); |
| GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); |
| |
| length = strlen (symb); |
| symbol_name = alloca (length + 32); |
| GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); |
| |
| sprintf (lazy_ptr_name, "L%d$lz", label); |
| |
| /* APPLE LOCAL begin deep branch prediction pic-base */ |
| /* APPLE LOCAL begin AT&T-style stub 4164563 */ |
| /* Choose one of four possible sections for this stub. */ |
| if (MACHOPIC_ATT_STUB) |
| switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); |
| else if (MACHOPIC_PURE) |
| /* APPLE LOCAL end AT&T-style stub 4164563 */ |
| { |
| if (TARGET_DEEP_BRANCH_PREDICTION) |
| switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); |
| else |
| switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); |
| } |
| else |
| /* APPLE LOCAL end deep branch prediction pic-base */ |
| switch_to_section (darwin_sections[machopic_symbol_stub_section]); |
| |
| fprintf (file, "%s:\n", stub); |
| fprintf (file, "\t.indirect_symbol %s\n", symbol_name); |
| |
| /* APPLE LOCAL begin use %ecx in stubs 4146993 */ |
| /* APPLE LOCAL begin deep branch prediction pic-base */ |
| /* APPLE LOCAL begin AT&T-style stub 4164563 */ |
| if (MACHOPIC_ATT_STUB) |
| { |
| fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); |
| } |
| else if (MACHOPIC_PURE) |
| /* APPLE LOCAL end AT&T-style stub 4164563 */ |
| { |
| /* PIC stub. */ |
| if (TARGET_DEEP_BRANCH_PREDICTION) |
| { |
| /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
| rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); |
| output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ |
| fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label); |
| } |
| else |
| { |
| /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */ |
| fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label); |
| fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label); |
| } |
| fprintf (file, "\tjmp\t*%%ecx\n"); |
| } |
| else /* 16-byte -mdynamic-no-pic stub. */ |
| fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); |
| |
| /* APPLE LOCAL begin AT&T-style stub 4164563 */ |
| /* The AT&T-style ("self-modifying") stub is not lazily bound, thus |
| it needs no stub-binding-helper. */ |
| if (MACHOPIC_ATT_STUB) |
| return; |
| /* APPLE LOCAL end AT&T-style stub 4164563 */ |
| |
| /* The "stub_binding_helper" is a fragment that gets executed only |
| once, the first time this stub is invoked (then it becomes "dead |
| code"). It asks the dynamic linker to set the |
| lazy_symbol_pointer to point at the function we want |
| (e.g. printf) so that subsequent invocations of this stub go |
| directly to that dynamically-linked callee. Other UN*X systems |
| use similar stubs, but those are generated by the static linker |
| and never appear in assembly files. */ |
| /* APPLE LOCAL end deep branch prediction pic-base */ |
| fprintf (file, "%s:\n", binder_name); |
| |
| /* APPLE LOCAL begin deep branch prediction pic-base * tabify insns */ |
| if (MACHOPIC_PURE) |
| { |
| fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); |
| fprintf (file, "\tpushl\t%%ecx\n"); |
| } |
| else |
| fprintf (file, "\t pushl\t$%s\n", lazy_ptr_name); |
| |
| fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); |
| /* APPLE LOCAL end deep branch prediction pic-base * tabify insns */ |
| /* APPLE LOCAL end use %ecx in stubs 4146993 */ |
| |
| /* APPLE LOCAL begin deep branch prediction pic-base. */ |
| /* N.B. Keep the correspondence of these |
| 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the |
| old-pic/new-pic/non-pic stubs; altering this will break |
| compatibility with existing dylibs. */ |
| if (MACHOPIC_PURE) |
| { |
| /* PIC stubs. */ |
| if (TARGET_DEEP_BRANCH_PREDICTION) |
| /* 25-byte PIC stub using "CALL get_pc_thunk". */ |
| switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); |
| else |
| /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */ |
| switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); |
| } |
| else |
| /* 16-byte -mdynamic-no-pic stub. */ |
| switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); |
| |
| fprintf (file, "%s:\n", lazy_ptr_name); |
| fprintf (file, "\t.indirect_symbol %s\n", symbol_name); |
| fprintf (file, "\t.long\t%s\n", binder_name); |
| } |
| /* APPLE LOCAL end deep branch prediction pic-base */ |
| |
| void |
| darwin_x86_file_end (void) |
| { |
| darwin_file_end (); |
| ix86_file_end (); |
| } |
| |
| /* APPLE LOCAL begin 4457939 stack alignment mishandled */ |
| void |
| ix86_darwin_init_expanders (void) |
| { |
| /* <rdar://problem/4471596> stack alignment is not handled properly |
| |
| Please remove this entire function when addressing this |
| Radar. Please be sure to delete the definition of INIT_EXPANDERS |
| in i386/darwin.h as well. */ |
| /* Darwin/x86_32 stack pointer will be 16-byte aligned at every |
| CALL, but the frame pointer, when used, will be 8-bytes offset |
| from a 16-byte alignment (the size of the return address and the |
| saved frame pointer). */ |
| if (cfun && cfun->emit |
| && cfun->emit->regno_pointer_align) |
| { |
| REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = STACK_BOUNDARY; |
| REGNO_POINTER_ALIGN (FRAME_POINTER_REGNUM) = BITS_PER_WORD; |
| REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_WORD; |
| REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = BITS_PER_WORD; |
| } |
| } |
| /* APPLE LOCAL end 4457939 stack alignment mishandled */ |
| #endif /* TARGET_MACHO */ |
| |
| /* Order the registers for register allocator. */ |
| |
| void |
| x86_order_regs_for_local_alloc (void) |
| { |
| int pos = 0; |
| int i; |
| |
| /* First allocate the local general purpose registers. */ |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| if (GENERAL_REGNO_P (i) && call_used_regs[i]) |
| reg_alloc_order [pos++] = i; |
| |
| /* Global general purpose registers. */ |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| if (GENERAL_REGNO_P (i) && !call_used_regs[i]) |
| reg_alloc_order [pos++] = i; |
| |
| /* x87 registers come first in case we are doing FP math |
| using them. */ |
| if (!TARGET_SSE_MATH) |
| for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
| reg_alloc_order [pos++] = i; |
| |
| /* SSE registers. */ |
| for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) |
| reg_alloc_order [pos++] = i; |
| for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
| reg_alloc_order [pos++] = i; |
| |
| /* x87 registers. */ |
| if (TARGET_SSE_MATH) |
| for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) |
| reg_alloc_order [pos++] = i; |
| |
| for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) |
| reg_alloc_order [pos++] = i; |
| |
| /* Initialize the rest of array as we do not allocate some registers |
| at all. */ |
| while (pos < FIRST_PSEUDO_REGISTER) |
| reg_alloc_order [pos++] = 0; |
| } |
| |
| /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in |
| struct attribute_spec.handler. */ |
| static tree |
| ix86_handle_struct_attribute (tree *node, tree name, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
| { |
| tree *type = NULL; |
| if (DECL_P (*node)) |
| { |
| if (TREE_CODE (*node) == TYPE_DECL) |
| type = &TREE_TYPE (*node); |
| } |
| else |
| type = node; |
| |
| if (!(type && (TREE_CODE (*type) == RECORD_TYPE |
| || TREE_CODE (*type) == UNION_TYPE))) |
| { |
| warning (OPT_Wattributes, "%qs attribute ignored", |
| IDENTIFIER_POINTER (name)); |
| *no_add_attrs = true; |
| } |
| |
| else if ((is_attribute_p ("ms_struct", name) |
| && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) |
| || ((is_attribute_p ("gcc_struct", name) |
| && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) |
| { |
| warning (OPT_Wattributes, "%qs incompatible attribute ignored", |
| IDENTIFIER_POINTER (name)); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| static bool |
| ix86_ms_bitfield_layout_p (tree record_type) |
| { |
| return (TARGET_MS_BITFIELD_LAYOUT && |
| !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) |
| || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); |
| } |
| |
| /* Returns an expression indicating where the this parameter is |
| located on entry to the FUNCTION. */ |
| |
| static rtx |
| x86_this_parameter (tree function) |
| { |
| tree type = TREE_TYPE (function); |
| |
| if (TARGET_64BIT) |
| { |
| int n = aggregate_value_p (TREE_TYPE (type), type) != 0; |
| return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); |
| } |
| |
| if (ix86_function_regparm (type, function) > 0) |
| { |
| tree parm; |
| |
| parm = TYPE_ARG_TYPES (type); |
| /* Figure out whether or not the function has a variable number of |
| arguments. */ |
| for (; parm; parm = TREE_CHAIN (parm)) |
| if (TREE_VALUE (parm) == void_type_node) |
| break; |
| /* If not, the this parameter is in the first argument. */ |
| if (parm) |
| { |
| int regno = 0; |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) |
| regno = 2; |
| return gen_rtx_REG (SImode, regno); |
| } |
| } |
| |
| if (aggregate_value_p (TREE_TYPE (type), type)) |
| return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); |
| else |
| return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); |
| } |
| |
| /* Determine whether x86_output_mi_thunk can succeed. */ |
| |
| static bool |
| x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, |
| HOST_WIDE_INT delta ATTRIBUTE_UNUSED, |
| HOST_WIDE_INT vcall_offset, tree function) |
| { |
| /* 64-bit can handle anything. */ |
| if (TARGET_64BIT) |
| return true; |
| |
| /* For 32-bit, everything's fine if we have one free register. */ |
| if (ix86_function_regparm (TREE_TYPE (function), function) < 3) |
| return true; |
| |
| /* Need a free register for vcall_offset. */ |
| if (vcall_offset) |
| return false; |
| |
| /* Need a free register for GOT references. */ |
| if (flag_pic && !(*targetm.binds_local_p) (function)) |
| return false; |
| |
| /* Otherwise ok. */ |
| return true; |
| } |
| |
| /* Output the assembler code for a thunk function. THUNK_DECL is the |
| declaration for the thunk function itself, FUNCTION is the decl for |
| the target function. DELTA is an immediate constant offset to be |
| added to THIS. If VCALL_OFFSET is nonzero, the word at |
| *(*this + vcall_offset) should be added to THIS. */ |
| |
| static void |
| x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, |
| tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, |
| HOST_WIDE_INT vcall_offset, tree function) |
| { |
| rtx xops[3]; |
| rtx this = x86_this_parameter (function); |
| rtx this_reg, tmp; |
| |
| /* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
| pull it in now and let DELTA benefit. */ |
| if (REG_P (this)) |
| this_reg = this; |
| else if (vcall_offset) |
| { |
| /* Put the this parameter into %eax. */ |
| xops[0] = this; |
| xops[1] = this_reg = gen_rtx_REG (Pmode, 0); |
| output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); |
| } |
| else |
| this_reg = NULL_RTX; |
| |
| /* Adjust the this parameter by a fixed constant. */ |
| if (delta) |
| { |
| xops[0] = GEN_INT (delta); |
| xops[1] = this_reg ? this_reg : this; |
| if (TARGET_64BIT) |
| { |
| if (!x86_64_general_operand (xops[0], DImode)) |
| { |
| tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); |
| xops[1] = tmp; |
| output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); |
| xops[0] = tmp; |
| xops[1] = this; |
| } |
| output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); |
| } |
| else |
| output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); |
| } |
| |
| /* Adjust the this parameter by a value stored in the vtable. */ |
| if (vcall_offset) |
| { |
| if (TARGET_64BIT) |
| tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); |
| else |
| { |
| int tmp_regno = 2 /* ECX */; |
| if (lookup_attribute ("fastcall", |
| TYPE_ATTRIBUTES (TREE_TYPE (function)))) |
| tmp_regno = 0 /* EAX */; |
| tmp = gen_rtx_REG (SImode, tmp_regno); |
| } |
| |
| xops[0] = gen_rtx_MEM (Pmode, this_reg); |
| xops[1] = tmp; |
| if (TARGET_64BIT) |
| output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); |
| else |
| output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); |
| |
| /* Adjust the this parameter. */ |
| xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); |
| if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) |
| { |
| rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); |
| xops[0] = GEN_INT (vcall_offset); |
| xops[1] = tmp2; |
| output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); |
| xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); |
| } |
| xops[1] = this_reg; |
| if (TARGET_64BIT) |
| output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); |
| else |
| output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); |
| } |
| |
| /* If necessary, drop THIS back to its stack slot. */ |
| if (this_reg && this_reg != this) |
| { |
| xops[0] = this_reg; |
| xops[1] = this; |
| output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); |
| } |
| |
| xops[0] = XEXP (DECL_RTL (function), 0); |
| if (TARGET_64BIT) |
| { |
| if (!flag_pic || (*targetm.binds_local_p) (function)) |
| output_asm_insn ("jmp\t%P0", xops); |
| else |
| { |
| tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); |
| tmp = gen_rtx_CONST (Pmode, tmp); |
| tmp = gen_rtx_MEM (QImode, tmp); |
| xops[0] = tmp; |
| output_asm_insn ("jmp\t%A0", xops); |
| } |
| } |
| else |
| { |
| if (!flag_pic || (*targetm.binds_local_p) (function)) |
| output_asm_insn ("jmp\t%P0", xops); |
| else |
| #if TARGET_MACHO |
| if (TARGET_MACHO) |
| { |
| rtx sym_ref = XEXP (DECL_RTL (function), 0); |
| /* APPLE LOCAL begin axe stubs 5571540 */ |
| if (darwin_stubs) |
| sym_ref = (gen_rtx_SYMBOL_REF |
| (Pmode, |
| machopic_indirection_name (sym_ref, /*stub_p=*/true))); |
| tmp = gen_rtx_MEM (QImode, sym_ref); |
| /* APPLE LOCAL end axe stubs 5571540 */ |
| xops[0] = tmp; |
| output_asm_insn ("jmp\t%0", xops); |
| } |
| else |
| #endif /* TARGET_MACHO */ |
| { |
| tmp = gen_rtx_REG (SImode, 2 /* ECX */); |
| output_set_got (tmp, NULL_RTX); |
| |
| xops[1] = tmp; |
| output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); |
| output_asm_insn ("jmp\t{*}%1", xops); |
| } |
| } |
| } |
| |
| static void |
| x86_file_start (void) |
| { |
| default_file_start (); |
| #if TARGET_MACHO |
| darwin_file_start (); |
| #endif |
| if (X86_FILE_START_VERSION_DIRECTIVE) |
| fputs ("\t.version\t\"01.01\"\n", asm_out_file); |
| if (X86_FILE_START_FLTUSED) |
| fputs ("\t.global\t__fltused\n", asm_out_file); |
| if (ix86_asm_dialect == ASM_INTEL) |
| fputs ("\t.intel_syntax\n", asm_out_file); |
| } |
| |
| int |
| x86_field_alignment (tree field, int computed) |
| { |
| enum machine_mode mode; |
| tree type = TREE_TYPE (field); |
| |
| if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) |
| return computed; |
| /* APPLE LOCAL begin mac68k alignment */ |
| #if TARGET_MACHO |
| if (OPTION_ALIGN_MAC68K) |
| { |
| if (computed >= 128) |
| return computed; |
| return MIN (computed, 16); |
| } |
| #endif |
| /* APPLE LOCAL end mac68k alignment */ |
| mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE |
| ? get_inner_array_type (type) : type); |
| if (mode == DFmode || mode == DCmode |
| || GET_MODE_CLASS (mode) == MODE_INT |
| || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) |
| return MIN (32, computed); |
| return computed; |
| } |
| |
| /* Output assembler code to FILE to increment profiler label # LABELNO |
| for profiling a function entry. */ |
| void |
| x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
| { |
| if (TARGET_64BIT) |
| if (flag_pic) |
| { |
| #ifndef NO_PROFILE_COUNTERS |
| fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); |
| #endif |
| fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); |
| } |
| else |
| { |
| #ifndef NO_PROFILE_COUNTERS |
| fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); |
| #endif |
| fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); |
| } |
| else if (flag_pic) |
| { |
| #ifndef NO_PROFILE_COUNTERS |
| fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", |
| LPREFIX, labelno, PROFILE_COUNT_REGISTER); |
| #endif |
| fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); |
| } |
| else |
| { |
| #ifndef NO_PROFILE_COUNTERS |
| fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, |
| PROFILE_COUNT_REGISTER); |
| #endif |
| fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); |
| } |
| } |
| |
| /* We don't have exact information about the insn sizes, but we may assume |
| quite safely that we are informed about all 1 byte insns and memory |
| address sizes. This is enough to eliminate unnecessary padding in |
| 99% of cases. */ |
| |
| static int |
| min_insn_size (rtx insn) |
| { |
| int l = 0; |
| |
| if (!INSN_P (insn) || !active_insn_p (insn)) |
| return 0; |
| |
| /* Discard alignments we've emit and jump instructions. */ |
| if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
| && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) |
| return 0; |
| if (GET_CODE (insn) == JUMP_INSN |
| && (GET_CODE (PATTERN (insn)) == ADDR_VEC |
| || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) |
| return 0; |
| |
| /* Important case - calls are always 5 bytes. |
| It is common to have many calls in the row. */ |
| if (GET_CODE (insn) == CALL_INSN |
| && symbolic_reference_mentioned_p (PATTERN (insn)) |
| && !SIBLING_CALL_P (insn)) |
| return 5; |
| if (get_attr_length (insn) <= 1) |
| return 1; |
| |
| /* For normal instructions we may rely on the sizes of addresses |
| and the presence of symbol to require 4 bytes of encoding. |
| This is not the case for jumps where references are PC relative. */ |
| if (GET_CODE (insn) != JUMP_INSN) |
| { |
| l = get_attr_length_address (insn); |
| if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) |
| l = 4; |
| } |
| if (l) |
| return 1+l; |
| else |
| return 2; |
| } |
| |
| /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte |
| window. */ |
| |
| static void |
| ix86_avoid_jump_misspredicts (void) |
| { |
| rtx insn, start = get_insns (); |
| int nbytes = 0, njumps = 0; |
| int isjump = 0; |
| |
| /* Look for all minimal intervals of instructions containing 4 jumps. |
| The intervals are bounded by START and INSN. NBYTES is the total |
| size of instructions in the interval including INSN and not including |
| START. When the NBYTES is smaller than 16 bytes, it is possible |
| that the end of START and INSN ends up in the same 16byte page. |
| |
| The smallest offset in the page INSN can start is the case where START |
| ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). |
| We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). |
| */ |
| for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) |
| { |
| |
| nbytes += min_insn_size (insn); |
| if (dump_file) |
| fprintf(dump_file, "Insn %i estimated to %i bytes\n", |
| INSN_UID (insn), min_insn_size (insn)); |
| if ((GET_CODE (insn) == JUMP_INSN |
| && GET_CODE (PATTERN (insn)) != ADDR_VEC |
| && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) |
| || GET_CODE (insn) == CALL_INSN) |
| njumps++; |
| else |
| continue; |
| |
| while (njumps > 3) |
| { |
| start = NEXT_INSN (start); |
| if ((GET_CODE (start) == JUMP_INSN |
| && GET_CODE (PATTERN (start)) != ADDR_VEC |
| && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) |
| || GET_CODE (start) == CALL_INSN) |
| njumps--, isjump = 1; |
| else |
| isjump = 0; |
| nbytes -= min_insn_size (start); |
| } |
| gcc_assert (njumps >= 0); |
| if (dump_file) |
| fprintf (dump_file, "Interval %i to %i has %i bytes\n", |
| INSN_UID (start), INSN_UID (insn), nbytes); |
| |
| if (njumps == 3 && isjump && nbytes < 16) |
| { |
| int padsize = 15 - nbytes + min_insn_size (insn); |
| |
| if (dump_file) |
| fprintf (dump_file, "Padding insn %i by %i bytes!\n", |
| INSN_UID (insn), padsize); |
| emit_insn_before (gen_align (GEN_INT (padsize)), insn); |
| } |
| } |
| } |
| |
| /* AMD Athlon works faster |
| when RET is not destination of conditional jump or directly preceded |
| by other jump instruction. We avoid the penalty by inserting NOP just |
| before the RET instructions in such cases. */ |
| static void |
| ix86_pad_returns (void) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) |
| { |
| basic_block bb = e->src; |
| rtx ret = BB_END (bb); |
| rtx prev; |
| bool replace = false; |
| |
| if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN |
| || !maybe_hot_bb_p (bb)) |
| continue; |
| for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) |
| if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) |
| break; |
| if (prev && GET_CODE (prev) == CODE_LABEL) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| if (EDGE_FREQUENCY (e) && e->src->index >= 0 |
| && !(e->flags & EDGE_FALLTHRU)) |
| replace = true; |
| } |
| if (!replace) |
| { |
| prev = prev_active_insn (ret); |
| if (prev |
| && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) |
| || GET_CODE (prev) == CALL_INSN)) |
| replace = true; |
| /* Empty functions get branch mispredict even when the jump destination |
| is not visible to us. */ |
| if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) |
| replace = true; |
| } |
| if (replace) |
| { |
| emit_insn_before (gen_return_internal_long (), ret); |
| delete_insn (ret); |
| } |
| } |
| } |
| |
| /* Implement machine specific optimizations. We implement padding of returns |
| for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ |
| static void |
| ix86_reorg (void) |
| { |
| if (TARGET_PAD_RETURNS && optimize && !optimize_size) |
| ix86_pad_returns (); |
| if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) |
| ix86_avoid_jump_misspredicts (); |
| } |
| |
| /* Return nonzero when QImode register that must be represented via REX prefix |
| is used. */ |
| bool |
| x86_extended_QIreg_mentioned_p (rtx insn) |
| { |
| int i; |
| extract_insn_cached (insn); |
| for (i = 0; i < recog_data.n_operands; i++) |
| if (REG_P (recog_data.operand[i]) |
| && REGNO (recog_data.operand[i]) >= 4) |
| return true; |
| return false; |
| } |
| |
| /* Return nonzero when P points to register encoded via REX prefix. |
| Called via for_each_rtx. */ |
| static int |
| extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) |
| { |
| unsigned int regno; |
| if (!REG_P (*p)) |
| return 0; |
| regno = REGNO (*p); |
| return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); |
| } |
| |
| /* Return true when INSN mentions register that must be encoded using REX |
| prefix. */ |
| bool |
| x86_extended_reg_mentioned_p (rtx insn) |
| { |
| return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); |
| } |
| |
| /* Generate an unsigned DImode/SImode to FP conversion. This is the same code |
| optabs would emit if we didn't have TFmode patterns. */ |
| |
| void |
| x86_emit_floatuns (rtx operands[2]) |
| { |
| rtx neglab, donelab, i0, i1, f0, in, out; |
| enum machine_mode mode, inmode; |
| |
| inmode = GET_MODE (operands[1]); |
| /* APPLE LOCAL begin 4176531 4424891 */ |
| mode = GET_MODE (operands[0]); |
| if (!TARGET_64BIT && mode == DFmode && !optimize_size) |
| { |
| switch (inmode) |
| { |
| case SImode: |
| ix86_expand_convert_uns_SI2DF_sse (operands); |
| break; |
| case DImode: |
| ix86_expand_convert_uns_DI2DF_sse (operands); |
| break; |
| default: |
| abort (); |
| break; |
| } |
| return; |
| } |
| /* APPLE LOCAL end 4176531 4424891 */ |
| |
| out = operands[0]; |
| in = force_reg (inmode, operands[1]); |
| /* APPLE LOCAL begin one line deletion 4424891 */ |
| /* APPLE LOCAL end one line deletion 4424891 */ |
| neglab = gen_label_rtx (); |
| donelab = gen_label_rtx (); |
| i1 = gen_reg_rtx (Pmode); |
| f0 = gen_reg_rtx (mode); |
| |
| emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); |
| |
| emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); |
| emit_jump_insn (gen_jump (donelab)); |
| emit_barrier (); |
| |
| emit_label (neglab); |
| |
| i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); |
| i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); |
| i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); |
| expand_float (f0, i0, 0); |
| emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); |
| |
| emit_label (donelab); |
| } |
| |
| /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector |
| with all elements equal to VAR. Return true if successful. */ |
| |
| static bool |
| ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, |
| rtx target, rtx val) |
| { |
| enum machine_mode smode, wsmode, wvmode; |
| rtx x; |
| |
| switch (mode) |
| { |
| case V2SImode: |
| case V2SFmode: |
| if (!mmx_ok) |
| return false; |
| /* FALLTHRU */ |
| |
| case V2DFmode: |
| case V2DImode: |
| case V4SFmode: |
| case V4SImode: |
| val = force_reg (GET_MODE_INNER (mode), val); |
| x = gen_rtx_VEC_DUPLICATE (mode, val); |
| emit_insn (gen_rtx_SET (VOIDmode, target, x)); |
| return true; |
| |
| case V4HImode: |
| if (!mmx_ok) |
| return false; |
| if (TARGET_SSE || TARGET_3DNOW_A) |
| { |
| val = gen_lowpart (SImode, val); |
| x = gen_rtx_TRUNCATE (HImode, val); |
| x = gen_rtx_VEC_DUPLICATE (mode, x); |
| emit_insn (gen_rtx_SET (VOIDmode, target, x)); |
| return true; |
| } |
| else |
| { |
| smode = HImode; |
| wsmode = SImode; |
| wvmode = V2SImode; |
| goto widen; |
| } |
| |
| case V8QImode: |
| if (!mmx_ok) |
| return false; |
| smode = QImode; |
| wsmode = HImode; |
| wvmode = V4HImode; |
| goto widen; |
| case V8HImode: |
| if (TARGET_SSE2) |
| { |
| rtx tmp1, tmp2; |
| /* Extend HImode to SImode using a paradoxical SUBREG. */ |
| tmp1 = gen_reg_rtx (SImode); |
| emit_move_insn (tmp1, gen_lowpart (SImode, val)); |
| /* Insert the SImode value as low element of V4SImode vector. */ |
| tmp2 = gen_reg_rtx (V4SImode); |
| tmp1 = gen_rtx_VEC_MERGE (V4SImode, |
| gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), |
| CONST0_RTX (V4SImode), |
| const1_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); |
| /* Cast the V4SImode vector back to a V8HImode vector. */ |
| tmp1 = gen_reg_rtx (V8HImode); |
| emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); |
| /* Duplicate the low short through the whole low SImode word. */ |
| emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); |
| /* Cast the V8HImode vector back to a V4SImode vector. */ |
| tmp2 = gen_reg_rtx (V4SImode); |
| emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); |
| /* Replicate the low element of the V4SImode vector. */ |
| emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); |
| /* Cast the V2SImode back to V8HImode, and store in target. */ |
| emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); |
| return true; |
| } |
| smode = HImode; |
| wsmode = SImode; |
| wvmode = V4SImode; |
| goto widen; |
| case V16QImode: |
| if (TARGET_SSE2) |
| { |
| rtx tmp1, tmp2; |
| /* Extend QImode to SImode using a paradoxical SUBREG. */ |
| tmp1 = gen_reg_rtx (SImode); |
| emit_move_insn (tmp1, gen_lowpart (SImode, val)); |
| /* Insert the SImode value as low element of V4SImode vector. */ |
| tmp2 = gen_reg_rtx (V4SImode); |
| tmp1 = gen_rtx_VEC_MERGE (V4SImode, |
| gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), |
| CONST0_RTX (V4SImode), |
| const1_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); |
| /* Cast the V4SImode vector back to a V16QImode vector. */ |
| tmp1 = gen_reg_rtx (V16QImode); |
| emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); |
| /* Duplicate the low byte through the whole low SImode word. */ |
| emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); |
| emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); |
| /* Cast the V16QImode vector back to a V4SImode vector. */ |
| tmp2 = gen_reg_rtx (V4SImode); |
| emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); |
| /* Replicate the low element of the V4SImode vector. */ |
| emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); |
| /* Cast the V2SImode back to V16QImode, and store in target. */ |
| emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); |
| return true; |
| } |
| smode = QImode; |
| wsmode = HImode; |
| wvmode = V8HImode; |
| goto widen; |
| widen: |
| /* Replicate the value once into the next wider mode and recurse. */ |
| val = convert_modes (wsmode, smode, val, true); |
| x = expand_simple_binop (wsmode, ASHIFT, val, |
| GEN_INT (GET_MODE_BITSIZE (smode)), |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); |
| |
| x = gen_reg_rtx (wvmode); |
| if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) |
| gcc_unreachable (); |
| emit_move_insn (target, gen_lowpart (mode, x)); |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector |
| whose ONE_VAR element is VAR, and other elements are zero. Return true |
| if successful. */ |
| |
| static bool |
| ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, |
| rtx target, rtx var, int one_var) |
| { |
| enum machine_mode vsimode; |
| rtx new_target; |
| rtx x, tmp; |
| |
| switch (mode) |
| { |
| case V2SFmode: |
| case V2SImode: |
| if (!mmx_ok) |
| return false; |
| /* FALLTHRU */ |
| |
| case V2DFmode: |
| case V2DImode: |
| if (one_var != 0) |
| return false; |
| var = force_reg (GET_MODE_INNER (mode), var); |
| x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); |
| emit_insn (gen_rtx_SET (VOIDmode, target, x)); |
| return true; |
| |
| case V4SFmode: |
| case V4SImode: |
| if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) |
| new_target = gen_reg_rtx (mode); |
| else |
| new_target = target; |
| var = force_reg (GET_MODE_INNER (mode), var); |
| x = gen_rtx_VEC_DUPLICATE (mode, var); |
| x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); |
| emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); |
| if (one_var != 0) |
| { |
| /* We need to shuffle the value to the correct position, so |
| create a new pseudo to store the intermediate result. */ |
| |
| /* With SSE2, we can use the integer shuffle insns. */ |
| if (mode != V4SFmode && TARGET_SSE2) |
| { |
| emit_insn (gen_sse2_pshufd_1 (new_target, new_target, |
| GEN_INT (1), |
| GEN_INT (one_var == 1 ? 0 : 1), |
| GEN_INT (one_var == 2 ? 0 : 1), |
| GEN_INT (one_var == 3 ? 0 : 1))); |
| if (target != new_target) |
| emit_move_insn (target, new_target); |
| return true; |
| } |
| |
| /* Otherwise convert the intermediate result to V4SFmode and |
| use the SSE1 shuffle instructions. */ |
| if (mode != V4SFmode) |
| { |
| tmp = gen_reg_rtx (V4SFmode); |
| emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); |
| } |
| else |
| tmp = new_target; |
| |
| emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, |
| GEN_INT (1), |
| GEN_INT (one_var == 1 ? 0 : 1), |
| GEN_INT (one_var == 2 ? 0+4 : 1+4), |
| GEN_INT (one_var == 3 ? 0+4 : 1+4))); |
| |
| if (mode != V4SFmode) |
| emit_move_insn (target, gen_lowpart (V4SImode, tmp)); |
| else if (tmp != target) |
| emit_move_insn (target, tmp); |
| } |
| else if (target != new_target) |
| emit_move_insn (target, new_target); |
| return true; |
| |
| case V8HImode: |
| case V16QImode: |
| vsimode = V4SImode; |
| goto widen; |
| case V4HImode: |
| case V8QImode: |
| if (!mmx_ok) |
| return false; |
| vsimode = V2SImode; |
| goto widen; |
| widen: |
| if (one_var != 0) |
| return false; |
| |
| /* Zero extend the variable element to SImode and recurse. */ |
| var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); |
| |
| x = gen_reg_rtx (vsimode); |
| if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, |
| var, one_var)) |
| gcc_unreachable (); |
| |
| emit_move_insn (target, gen_lowpart (mode, x)); |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector |
| consisting of the values in VALS. It is known that all elements |
| except ONE_VAR are constants. Return true if successful. */ |
| |
| static bool |
| ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, |
| rtx target, rtx vals, int one_var) |
| { |
| rtx var = XVECEXP (vals, 0, one_var); |
| enum machine_mode wmode; |
| rtx const_vec, x; |
| |
| const_vec = copy_rtx (vals); |
| XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); |
| const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); |
| |
| switch (mode) |
| { |
| case V2DFmode: |
| case V2DImode: |
| case V2SFmode: |
| case V2SImode: |
| /* For the two element vectors, it's just as easy to use |
| the general case. */ |
| return false; |
| |
| case V4SFmode: |
| case V4SImode: |
| case V8HImode: |
| case V4HImode: |
| break; |
| |
| case V16QImode: |
| wmode = V8HImode; |
| goto widen; |
| case V8QImode: |
| wmode = V4HImode; |
| goto widen; |
| widen: |
| /* There's no way to set one QImode entry easily. Combine |
| the variable value with its adjacent constant value, and |
| promote to an HImode set. */ |
| x = XVECEXP (vals, 0, one_var ^ 1); |
| if (one_var & 1) |
| { |
| var = convert_modes (HImode, QImode, var, true); |
| var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| x = GEN_INT (INTVAL (x) & 0xff); |
| } |
| else |
| { |
| var = convert_modes (HImode, QImode, var, true); |
| x = gen_int_mode (INTVAL (x) << 8, HImode); |
| } |
| if (x != const0_rtx) |
| var = expand_simple_binop (HImode, IOR, var, x, var, |
| 1, OPTAB_LIB_WIDEN); |
| |
| x = gen_reg_rtx (wmode); |
| emit_move_insn (x, gen_lowpart (wmode, const_vec)); |
| ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); |
| |
| emit_move_insn (target, gen_lowpart (mode, x)); |
| return true; |
| |
| default: |
| return false; |
| } |
| |
| emit_move_insn (target, const_vec); |
| ix86_expand_vector_set (mmx_ok, target, var, one_var); |
| return true; |
| } |
| |
| /* A subroutine of ix86_expand_vector_init. Handle the most general case: |
| all values variable, and none identical. */ |
| |
| static void |
| ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, |
| rtx target, rtx vals) |
| { |
| enum machine_mode half_mode = GET_MODE_INNER (mode); |
| rtx op0 = NULL, op1 = NULL; |
| bool use_vec_concat = false; |
| |
| switch (mode) |
| { |
| case V2SFmode: |
| case V2SImode: |
| if (!mmx_ok && !TARGET_SSE) |
| break; |
| /* FALLTHRU */ |
| |
| case V2DFmode: |
| case V2DImode: |
| /* For the two element vectors, we always implement VEC_CONCAT. */ |
| op0 = XVECEXP (vals, 0, 0); |
| op1 = XVECEXP (vals, 0, 1); |
| use_vec_concat = true; |
| break; |
| |
| case V4SFmode: |
| half_mode = V2SFmode; |
| goto half; |
| case V4SImode: |
| half_mode = V2SImode; |
| goto half; |
| half: |
| { |
| rtvec v; |
| |
| /* For V4SF and V4SI, we implement a concat of two V2 vectors. |
| Recurse to load the two halves. */ |
| |
| op0 = gen_reg_rtx (half_mode); |
| v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); |
| ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); |
| |
| op1 = gen_reg_rtx (half_mode); |
| v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); |
| ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); |
| |
| use_vec_concat = true; |
| } |
| break; |
| |
| case V8HImode: |
| case V16QImode: |
| case V4HImode: |
| case V8QImode: |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (use_vec_concat) |
| { |
| if (!register_operand (op0, half_mode)) |
| op0 = force_reg (half_mode, op0); |
| if (!register_operand (op1, half_mode)) |
| op1 = force_reg (half_mode, op1); |
| |
| emit_insn (gen_rtx_SET (VOIDmode, target, |
| gen_rtx_VEC_CONCAT (mode, op0, op1))); |
| } |
| else |
| { |
| int i, j, n_elts, n_words, n_elt_per_word; |
| enum machine_mode inner_mode; |
| rtx words[4], shift; |
| |
| inner_mode = GET_MODE_INNER (mode); |
| n_elts = GET_MODE_NUNITS (mode); |
| n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; |
| n_elt_per_word = n_elts / n_words; |
| shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); |
| |
| for (i = 0; i < n_words; ++i) |
| { |
| rtx word = NULL_RTX; |
| |
| for (j = 0; j < n_elt_per_word; ++j) |
| { |
| rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); |
| elt = convert_modes (word_mode, inner_mode, elt, true); |
| |
| if (j == 0) |
| word = elt; |
| else |
| { |
| word = expand_simple_binop (word_mode, ASHIFT, word, shift, |
| word, 1, OPTAB_LIB_WIDEN); |
| word = expand_simple_binop (word_mode, IOR, word, elt, |
| word, 1, OPTAB_LIB_WIDEN); |
| } |
| } |
| |
| words[i] = word; |
| } |
| |
| if (n_words == 1) |
| emit_move_insn (target, gen_lowpart (mode, words[0])); |
| else if (n_words == 2) |
| { |
| rtx tmp = gen_reg_rtx (mode); |
| emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); |
| emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); |
| emit_move_insn (gen_highpart (word_mode, tmp), words[1]); |
| emit_move_insn (target, tmp); |
| } |
| else if (n_words == 4) |
| { |
| rtx tmp = gen_reg_rtx (V4SImode); |
| vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); |
| ix86_expand_vector_init_general (false, V4SImode, tmp, vals); |
| emit_move_insn (target, gen_lowpart (mode, tmp)); |
| } |
| else |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Initialize vector TARGET via VALS. Suppress the use of MMX |
| instructions unless MMX_OK is true. */ |
| |
| void |
| ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) |
| { |
| enum machine_mode mode = GET_MODE (target); |
| enum machine_mode inner_mode = GET_MODE_INNER (mode); |
| int n_elts = GET_MODE_NUNITS (mode); |
| int n_var = 0, one_var = -1; |
| bool all_same = true, all_const_zero = true; |
| int i; |
| rtx x; |
| |
| for (i = 0; i < n_elts; ++i) |
| { |
| x = XVECEXP (vals, 0, i); |
| if (!CONSTANT_P (x)) |
| n_var++, one_var = i; |
| else if (x != CONST0_RTX (inner_mode)) |
| all_const_zero = false; |
| if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) |
| all_same = false; |
| } |
| |
| /* Constants are best loaded from the constant pool. */ |
| if (n_var == 0) |
| { |
| emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); |
| return; |
| } |
| |
| /* If all values are identical, broadcast the value. */ |
| if (all_same |
| && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, |
| XVECEXP (vals, 0, 0))) |
| return; |
| |
| /* Values where only one field is non-constant are best loaded from |
| the pool and overwritten via move later. */ |
| if (n_var == 1) |
| { |
| if (all_const_zero |
| && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, |
| XVECEXP (vals, 0, one_var), |
| one_var)) |
| return; |
| |
| if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) |
| return; |
| } |
| |
| ix86_expand_vector_init_general (mmx_ok, mode, target, vals); |
| } |
| |
| void |
| ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) |
| { |
| enum machine_mode mode = GET_MODE (target); |
| enum machine_mode inner_mode = GET_MODE_INNER (mode); |
| bool use_vec_merge = false; |
| rtx tmp; |
| |
| switch (mode) |
| { |
| case V2SFmode: |
| case V2SImode: |
| if (mmx_ok) |
| { |
| tmp = gen_reg_rtx (GET_MODE_INNER (mode)); |
| ix86_expand_vector_extract (true, tmp, target, 1 - elt); |
| if (elt == 0) |
| tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); |
| else |
| tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); |
| emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); |
| return; |
| } |
| break; |
| |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| case V2DImode: |
| use_vec_merge = TARGET_SSE4_1; |
| if (use_vec_merge) |
| break; |
| |
| case V2DFmode: |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| { |
| rtx op0, op1; |
| |
| /* For the two element vectors, we implement a VEC_CONCAT with |
| the extraction of the other element. */ |
| |
| tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); |
| tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); |
| |
| if (elt == 0) |
| op0 = val, op1 = tmp; |
| else |
| op0 = tmp, op1 = val; |
| |
| tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); |
| emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); |
| } |
| return; |
| |
| case V4SFmode: |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| use_vec_merge = TARGET_SSE4_1; |
| if (use_vec_merge) |
| break; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| switch (elt) |
| { |
| case 0: |
| use_vec_merge = true; |
| break; |
| |
| case 1: |
| /* tmp = target = A B C D */ |
| tmp = copy_to_reg (target); |
| /* target = A A B B */ |
| emit_insn (gen_sse_unpcklps (target, target, target)); |
| /* target = X A B B */ |
| ix86_expand_vector_set (false, target, val, 0); |
| /* target = A X C D */ |
| emit_insn (gen_sse_shufps_1 (target, target, tmp, |
| GEN_INT (1), GEN_INT (0), |
| GEN_INT (2+4), GEN_INT (3+4))); |
| return; |
| |
| case 2: |
| /* tmp = target = A B C D */ |
| tmp = copy_to_reg (target); |
| /* tmp = X B C D */ |
| ix86_expand_vector_set (false, tmp, val, 0); |
| /* target = A B X D */ |
| emit_insn (gen_sse_shufps_1 (target, target, tmp, |
| GEN_INT (0), GEN_INT (1), |
| GEN_INT (0+4), GEN_INT (3+4))); |
| return; |
| |
| case 3: |
| /* tmp = target = A B C D */ |
| tmp = copy_to_reg (target); |
| /* tmp = X B C D */ |
| ix86_expand_vector_set (false, tmp, val, 0); |
| /* target = A B X D */ |
| emit_insn (gen_sse_shufps_1 (target, target, tmp, |
| GEN_INT (0), GEN_INT (1), |
| GEN_INT (2+4), GEN_INT (0+4))); |
| return; |
| |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| |
| case V4SImode: |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| use_vec_merge = TARGET_SSE4_1; |
| if (use_vec_merge) |
| break; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| /* Element 0 handled by vec_merge below. */ |
| if (elt == 0) |
| { |
| use_vec_merge = true; |
| break; |
| } |
| |
| if (TARGET_SSE2) |
| { |
| /* With SSE2, use integer shuffles to swap element 0 and ELT, |
| store into element 0, then shuffle them back. */ |
| |
| rtx order[4]; |
| |
| order[0] = GEN_INT (elt); |
| order[1] = const1_rtx; |
| order[2] = const2_rtx; |
| order[3] = GEN_INT (3); |
| order[elt] = const0_rtx; |
| |
| emit_insn (gen_sse2_pshufd_1 (target, target, order[0], |
| order[1], order[2], order[3])); |
| |
| ix86_expand_vector_set (false, target, val, 0); |
| |
| emit_insn (gen_sse2_pshufd_1 (target, target, order[0], |
| order[1], order[2], order[3])); |
| } |
| else |
| { |
| /* For SSE1, we have to reuse the V4SF code. */ |
| ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), |
| gen_lowpart (SFmode, val), elt); |
| } |
| return; |
| |
| case V8HImode: |
| use_vec_merge = TARGET_SSE2; |
| break; |
| case V4HImode: |
| use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); |
| break; |
| |
| case V16QImode: |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| use_vec_merge = TARGET_SSE4_1; |
| break; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| case V8QImode: |
| default: |
| break; |
| } |
| |
| if (use_vec_merge) |
| { |
| tmp = gen_rtx_VEC_DUPLICATE (mode, val); |
| tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); |
| emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); |
| } |
| else |
| { |
| rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); |
| |
| emit_move_insn (mem, target); |
| |
| tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); |
| emit_move_insn (tmp, val); |
| |
| emit_move_insn (target, mem); |
| } |
| } |
| |
| void |
| ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) |
| { |
| enum machine_mode mode = GET_MODE (vec); |
| enum machine_mode inner_mode = GET_MODE_INNER (mode); |
| bool use_vec_extr = false; |
| rtx tmp; |
| |
| switch (mode) |
| { |
| case V2SImode: |
| case V2SFmode: |
| if (!mmx_ok) |
| break; |
| /* FALLTHRU */ |
| |
| case V2DFmode: |
| case V2DImode: |
| use_vec_extr = true; |
| break; |
| |
| case V4SFmode: |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| use_vec_extr = TARGET_SSE4_1; |
| if (use_vec_extr) |
| break; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| switch (elt) |
| { |
| case 0: |
| tmp = vec; |
| break; |
| |
| case 1: |
| case 3: |
| tmp = gen_reg_rtx (mode); |
| emit_insn (gen_sse_shufps_1 (tmp, vec, vec, |
| GEN_INT (elt), GEN_INT (elt), |
| GEN_INT (elt+4), GEN_INT (elt+4))); |
| break; |
| |
| case 2: |
| tmp = gen_reg_rtx (mode); |
| emit_insn (gen_sse_unpckhps (tmp, vec, vec)); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| vec = tmp; |
| use_vec_extr = true; |
| elt = 0; |
| break; |
| |
| case V4SImode: |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| use_vec_extr = TARGET_SSE4_1; |
| if (use_vec_extr) |
| break; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| if (TARGET_SSE2) |
| { |
| switch (elt) |
| { |
| case 0: |
| tmp = vec; |
| break; |
| |
| case 1: |
| case 3: |
| tmp = gen_reg_rtx (mode); |
| emit_insn (gen_sse2_pshufd_1 (tmp, vec, |
| GEN_INT (elt), GEN_INT (elt), |
| GEN_INT (elt), GEN_INT (elt))); |
| break; |
| |
| case 2: |
| tmp = gen_reg_rtx (mode); |
| emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| vec = tmp; |
| use_vec_extr = true; |
| elt = 0; |
| } |
| else |
| { |
| /* For SSE1, we have to reuse the V4SF code. */ |
| ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), |
| gen_lowpart (V4SFmode, vec), elt); |
| return; |
| } |
| break; |
| |
| case V8HImode: |
| use_vec_extr = TARGET_SSE2; |
| break; |
| case V4HImode: |
| use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); |
| break; |
| |
| case V16QImode: |
| /* APPLE LOCAL begin 5612787 mainline sse4 */ |
| use_vec_extr = TARGET_SSE4_1; |
| break; |
| /* APPLE LOCAL end 5612787 mainline sse4 */ |
| case V8QImode: |
| /* ??? Could extract the appropriate HImode element and shift. */ |
| default: |
| break; |
| } |
| |
| if (use_vec_extr) |
| { |
| tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); |
| tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); |
| |
| /* Let the rtl optimizers know about the zero extension performed. */ |
| /* APPLE LOCAL 5612787 mainline sse4 */ |
| if (inner_mode == QImode || inner_mode == HImode) |
| { |
| tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); |
| target = gen_lowpart (SImode, target); |
| } |
| |
| emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); |
| } |
| else |
| { |
| rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); |
| |
| emit_move_insn (mem, vec); |
| |
| tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); |
| emit_move_insn (target, tmp); |
| } |
| } |
| |
| /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary |
| pattern to reduce; DEST is the destination; IN is the input vector. */ |
| |
| void |
| ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) |
| { |
| rtx tmp1, tmp2, tmp3; |
| |
| tmp1 = gen_reg_rtx (V4SFmode); |
| tmp2 = gen_reg_rtx (V4SFmode); |
| tmp3 = gen_reg_rtx (V4SFmode); |
| |
| emit_insn (gen_sse_movhlps (tmp1, in, in)); |
| emit_insn (fn (tmp2, tmp1, in)); |
| |
| emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, |
| GEN_INT (1), GEN_INT (1), |
| GEN_INT (1+4), GEN_INT (1+4))); |
| emit_insn (fn (dest, tmp2, tmp3)); |
| } |
| |
| /* Target hook for scalar_mode_supported_p. */ |
| static bool |
| ix86_scalar_mode_supported_p (enum machine_mode mode) |
| { |
| if (DECIMAL_FLOAT_MODE_P (mode)) |
| return true; |
| else |
| return default_scalar_mode_supported_p (mode); |
| } |
| |
| /* Implements target hook vector_mode_supported_p. */ |
| static bool |
| ix86_vector_mode_supported_p (enum machine_mode mode) |
| { |
| if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) |
| return true; |
| if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) |
| return true; |
| if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) |
| return true; |
| if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) |
| return true; |
| return false; |
| } |
| |
| /* Worker function for TARGET_MD_ASM_CLOBBERS. |
| |
| We do this in the new i386 backend to maintain source compatibility |
| with the old cc0-based compiler. */ |
| |
| static tree |
| ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, |
| tree inputs ATTRIBUTE_UNUSED, |
| tree clobbers) |
| { |
| clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), |
| clobbers); |
| clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), |
| clobbers); |
| clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), |
| clobbers); |
| return clobbers; |
| } |
| |
| /* Return true if this goes in small data/bss. */ |
| |
| static bool |
| ix86_in_large_data_p (tree exp) |
| { |
| if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) |
| return false; |
| |
| /* Functions are never large data. */ |
| if (TREE_CODE (exp) == FUNCTION_DECL) |
| return false; |
| |
| if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) |
| { |
| const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); |
| if (strcmp (section, ".ldata") == 0 |
| || strcmp (section, ".lbss") == 0) |
| return true; |
| return false; |
| } |
| else |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
| |
| /* If this is an incomplete type with size 0, then we can't put it |
| in data because it might be too big when completed. */ |
| if (!size || size > ix86_section_threshold) |
| return true; |
| } |
| |
| return false; |
| } |
| static void |
| ix86_encode_section_info (tree decl, rtx rtl, int first) |
| { |
| default_encode_section_info (decl, rtl, first); |
| |
| if (TREE_CODE (decl) == VAR_DECL |
| && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) |
| && ix86_in_large_data_p (decl)) |
| SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; |
| } |
| |
| /* Worker function for REVERSE_CONDITION. */ |
| |
| enum rtx_code |
| ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) |
| { |
| return (mode != CCFPmode && mode != CCFPUmode |
| ? reverse_condition (code) |
| : reverse_condition_maybe_unordered (code)); |
| } |
| |
| /* Output code to perform an x87 FP register move, from OPERANDS[1] |
| to OPERANDS[0]. */ |
| |
| const char * |
| output_387_reg_move (rtx insn, rtx *operands) |
| { |
| if (REG_P (operands[1]) |
| && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
| { |
| if (REGNO (operands[0]) == FIRST_STACK_REG) |
| return output_387_ffreep (operands, 0); |
| return "fstp\t%y0"; |
| } |
| if (STACK_TOP_P (operands[0])) |
| return "fld%z1\t%y1"; |
| return "fst\t%y0"; |
| } |
| |
| /* Output code to perform a conditional jump to LABEL, if C2 flag in |
| FP status register is set. */ |
| |
| void |
| ix86_emit_fp_unordered_jump (rtx label) |
| { |
| rtx reg = gen_reg_rtx (HImode); |
| rtx temp; |
| |
| emit_insn (gen_x86_fnstsw_1 (reg)); |
| |
| if (TARGET_USE_SAHF) |
| { |
| emit_insn (gen_x86_sahf_1 (reg)); |
| |
| temp = gen_rtx_REG (CCmode, FLAGS_REG); |
| temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); |
| } |
| else |
| { |
| emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); |
| |
| temp = gen_rtx_REG (CCNOmode, FLAGS_REG); |
| temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); |
| } |
| |
| temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, |
| gen_rtx_LABEL_REF (VOIDmode, label), |
| pc_rtx); |
| temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); |
| emit_jump_insn (temp); |
| } |
| |
| /* Output code to perform a log1p XFmode calculation. */ |
| |
| void ix86_emit_i387_log1p (rtx op0, rtx op1) |
| { |
| rtx label1 = gen_label_rtx (); |
| rtx label2 = gen_label_rtx (); |
| |
| rtx tmp = gen_reg_rtx (XFmode); |
| rtx tmp2 = gen_reg_rtx (XFmode); |
| |
| emit_insn (gen_absxf2 (tmp, op1)); |
| emit_insn (gen_cmpxf (tmp, |
| CONST_DOUBLE_FROM_REAL_VALUE ( |
| REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), |
| XFmode))); |
| emit_jump_insn (gen_bge (label1)); |
| |
| emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ |
| emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); |
| emit_jump (label2); |
| |
| emit_label (label1); |
| emit_move_insn (tmp, CONST1_RTX (XFmode)); |
| emit_insn (gen_addxf3 (tmp, op1, tmp)); |
| emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ |
| emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); |
| |
| emit_label (label2); |
| } |
| |
| /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ |
| |
| static void |
| i386_solaris_elf_named_section (const char *name, unsigned int flags, |
| tree decl) |
| { |
| /* With Binutils 2.15, the "@unwind" marker must be specified on |
| every occurrence of the ".eh_frame" section, not just the first |
| one. */ |
| if (TARGET_64BIT |
| && strcmp (name, ".eh_frame") == 0) |
| { |
| fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, |
| flags & SECTION_WRITE ? "aw" : "a"); |
| return; |
| } |
| default_elf_asm_named_section (name, flags, decl); |
| } |
| |
| /* APPLE LOCAL begin regparmandstackparm */ |
| |
| /* Mark this fndecl as using the regparmandstackparm calling convention. */ |
| static void |
| ix86_make_regparmandstackparmee (tree *pt) |
| { |
| decl_attributes (pt, |
| tree_cons (get_identifier ("regparmandstackparmee"), |
| NULL_TREE, TYPE_ATTRIBUTES (*pt)), 0); |
| } |
| |
| /* Lookup fndecls marked 'regparmandstackparm', retrieve their $3SSE equivalents. */ |
| static splay_tree ix86_darwin_regparmandstackparm_st; |
| /* Cache for regparmandstackparm fntypes. */ |
| static splay_tree ix86_darwin_fntype_st; |
| |
| /* Append "$3SSE" to an ID, returning a new IDENTIFIER_NODE. */ |
| static tree |
| ix86_darwin_regparmandstackparm_mangle_name (tree id) |
| { |
| static const char *mangle_suffix = "$3SSE"; |
| unsigned int mangle_length = strlen (mangle_suffix); |
| const char *name; |
| unsigned int orig_length; |
| char *buf; |
| |
| if (!id) |
| return NULL_TREE; |
| |
| name = IDENTIFIER_POINTER (id); |
| orig_length = strlen (name); |
| buf = alloca (orig_length + mangle_length + 1); |
| |
| strcpy (buf, name); |
| strcat (buf, mangle_suffix); |
| return get_identifier (buf); /* Expecting get_identifier to reallocate the string. */ |
| } |
| |
| /* Given the "normal" TRAD_FNDECL marked with 'regparmandstackparm', |
| return a duplicate fndecl marked 'regparmandstackparmee' (note trailing |
| 'ee'). Enter them as a pair in the splay tree ST, if non-null; |
| looking up the TRAD_FNDECL will return the new one. */ |
| static tree |
| ix86_darwin_regparmandstackparm_dup_fndecl (tree trad_fndecl, splay_tree st) |
| { |
| tree fntype; |
| tree new_fndecl; |
| |
| fntype = TREE_TYPE (trad_fndecl); |
| |
| /* NEW_FNDECL will be compiled with the XMM-based calling |
| convention, and TRAD_FNDECL (the original) will be compiled with |
| the traditional stack-based calling convention. */ |
| new_fndecl = copy_node (trad_fndecl); |
| DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0; |
| allocate_struct_function (new_fndecl); |
| DECL_STRUCT_FUNCTION (new_fndecl)->function_end_locus |
| = DECL_STRUCT_FUNCTION (trad_fndecl)->function_end_locus; |
| DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = |
| DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl; |
| DECL_RESULT (new_fndecl) = copy_node (DECL_RESULT (trad_fndecl)); |
| DECL_CONTEXT (DECL_RESULT (new_fndecl)) = new_fndecl; |
| SET_DECL_ASSEMBLER_NAME (new_fndecl, 0); |
| DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl)); |
| TYPE_ATTRIBUTES (TREE_TYPE (new_fndecl)) |
| = copy_list (TYPE_ATTRIBUTES (TREE_TYPE (trad_fndecl))); |
| ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl)); |
| /* Kludge: block copied from tree-inline.c(save_body). Should |
| be refactored into a common shareable routine. */ |
| { |
| tree *parg; |
| |
| for (parg = &DECL_ARGUMENTS (new_fndecl); |
| *parg; |
| parg = &TREE_CHAIN (*parg)) |
| { |
| tree new = copy_node (*parg); |
| |
| lang_hooks.dup_lang_specific_decl (new); |
| DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (*parg); |
| DECL_CONTEXT (new) = new_fndecl; |
| /* Note: it may be possible to move the original parameters |
| with the function body, making this splay tree |
| unnecessary. */ |
| if (st) |
| splay_tree_insert (st, (splay_tree_key) *parg, (splay_tree_value) new); |
| TREE_CHAIN (new) = TREE_CHAIN (*parg); |
| *parg = new; |
| } |
| |
| if (DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl) |
| { |
| tree old = DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl; |
| tree new = copy_node (old); |
| |
| lang_hooks.dup_lang_specific_decl (new); |
| DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (old); |
| DECL_CONTEXT (new) = new_fndecl; |
| if (st) |
| splay_tree_insert (st, (splay_tree_key) old, (splay_tree_value) new); |
| TREE_CHAIN (new) = TREE_CHAIN (old); |
| DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = new; |
| } |
| |
| if (st) |
| splay_tree_insert (st, (splay_tree_key) DECL_RESULT (trad_fndecl), |
| (splay_tree_value) DECL_RESULT (new_fndecl)); |
| } |
| #if 0 |
| /* Testing Kludge: If TREE_READONLY is set, cgen can and |
| occasionally will delete "pure" (no side-effect) calls to a |
| library function. Cleared here to preclude this when |
| test-building libraries. */ |
| TREE_READONLY (new_fndecl) = false; |
| #endif |
| |
| return new_fndecl; |
| } |
| |
| /* FNDECL has no body, but user has marked it as a regparmandstackparm |
| item. Create a corresponding regparmandstackparm decl for it, and |
| arrange for calls to be redirected to the regparmandstackparm |
| version. */ |
| static tree |
| ix86_darwin_regparmandstackparm_extern_decl (tree trad_fndecl) |
| { |
| tree new_fndecl; |
| |
| /* new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, (splay_tree)0); */ |
| new_fndecl = copy_node (trad_fndecl); |
| DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl)); |
| DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0; |
| SET_DECL_ASSEMBLER_NAME (new_fndecl, 0); |
| ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl)); |
| cgraph_finalize_function (new_fndecl, /* nested = */ true); |
| if (!ix86_darwin_regparmandstackparm_st) |
| ix86_darwin_regparmandstackparm_st |
| = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); |
| splay_tree_insert (ix86_darwin_regparmandstackparm_st, |
| (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl); |
| return new_fndecl; |
| } |
| |
| /* Invoked after all functions have been seen and digested, but before |
| any inlining decisions have been made. Walk the callgraph, seeking |
| calls to functions that have regparmandstackparm variants. Rewrite the |
| calls, directing them to the new 'regparmandstackparmee' versions. */ |
| void |
| ix86_darwin_redirect_calls(void) |
| { |
| struct cgraph_node *fastcall_node, *node; |
| struct cgraph_edge *edge, *next_edge; |
| tree addr, fastcall_decl, orig_fntype; |
| splay_tree_node call_stn, type_stn; |
| |
| if (!flag_unit_at_a_time) |
| return; |
| |
| if (!ix86_darwin_fntype_st) |
| ix86_darwin_fntype_st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); |
| |
| if (!ix86_darwin_regparmandstackparm_st) |
| ix86_darwin_regparmandstackparm_st |
| = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); |
| |
| /* Extern decls marked "regparmandstackparm" beget regparmandstackparmee |
| decls. */ |
| for (node = cgraph_nodes; node; node = node->next) |
| if (!DECL_SAVED_TREE (node->decl) |
| && lookup_attribute ("regparmandstackparm", |
| TYPE_ATTRIBUTES (TREE_TYPE (node->decl))) |
| && !lookup_attribute ("regparmandstackparmee", |
| TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))) |
| { |
| fastcall_decl = ix86_darwin_regparmandstackparm_extern_decl (node->decl); |
| splay_tree_insert (ix86_darwin_regparmandstackparm_st, |
| (splay_tree_key) node->decl, |
| (splay_tree_value) fastcall_decl); |
| } |
| |
| /* Walk the callgraph, rewriting calls as we go. */ |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| call_stn = splay_tree_lookup (ix86_darwin_regparmandstackparm_st, |
| (splay_tree_key)node->decl); |
| /* If this function was in our splay-tree, we previously created |
| a regparmandstackparm version of it. */ |
| if (call_stn) |
| { |
| fastcall_decl = (tree)call_stn->value; |
| fastcall_node = cgraph_node (fastcall_decl); |
| /* Redirect all calls to this fn to the regparmandstackparm |
| version. */ |
| for (edge = next_edge = node->callers ; edge ; edge = next_edge) |
| { |
| tree call, stmt; |
| next_edge = next_edge->next_caller; |
| cgraph_redirect_edge_callee (edge, fastcall_node); |
| /* APPLE LOCAL */ |
| /* MERGE FIXME call_expr -> call_stmt */ |
| stmt = edge->call_stmt; |
| call = get_call_expr_in (stmt); |
| addr = TREE_OPERAND (call, 0); |
| TREE_OPERAND (addr, 0) = fastcall_decl; |
| orig_fntype = TREE_TYPE (addr); |
| /* Likewise, revise the TYPE of the ADDR node between |
| the CALL_EXPR and the FNDECL. This type determines |
| the parameters and calling convention applied to this |
| CALL_EXPR. */ |
| type_stn = splay_tree_lookup (ix86_darwin_fntype_st, (splay_tree_value)orig_fntype); |
| if (type_stn) |
| TREE_TYPE (addr) = (tree)type_stn->value; |
| else |
| { |
| ix86_make_regparmandstackparmee (&TREE_TYPE (addr)); |
| splay_tree_insert (ix86_darwin_fntype_st, |
| (splay_tree_key)orig_fntype, |
| (splay_tree_value)TREE_TYPE (addr)); |
| } |
| } |
| } |
| } |
| } |
| |
| /* Information necessary to re-context a function body. */ |
| typedef struct { |
| tree old_context; |
| tree new_context; |
| splay_tree decl_map; |
| } recontext_data; |
| |
| /* Visit every node of a function body; if it points at the |
| OLD_CONTEXT, re-direct it to the NEW_CONTEXT. Invoked via |
| walk_tree. DECL_MAP is a splay tree that maps the original |
| parameters to new ones. */ |
| static tree |
| ix86_darwin_re_context_1 (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED) |
| { |
| tree t; |
| recontext_data *rcd; |
| enum tree_code_class class; |
| splay_tree_node n; |
| |
| if (!tp) |
| return NULL_TREE; |
| |
| t = *tp; |
| if (!t) |
| return NULL_TREE; |
| |
| rcd = (recontext_data *)data; |
| n = splay_tree_lookup (rcd->decl_map, (splay_tree_key) t); |
| if (n) |
| { |
| *tp = (tree)n->value; |
| return NULL_TREE; |
| } |
| |
| class = TREE_CODE_CLASS (TREE_CODE (t)); |
| if (class != tcc_declaration) |
| return NULL_TREE; |
| |
| if (DECL_CONTEXT (t) == rcd->old_context) |
| DECL_CONTEXT (t) = rcd->new_context; |
| |
| return NULL_TREE; |
| } |
| |
| /* Walk a function body, updating every pointer to OLD_CONTEXT to |
| NEW_CONTEXT. TP is the top of the function body, and ST is a splay |
| tree of replacements for the parameters. */ |
| static tree |
| ix86_darwin_re_context (tree *tp, tree old_context, tree new_context, splay_tree st) |
| { |
| recontext_data rcd; |
| tree ret; |
| |
| rcd.old_context = old_context; |
| rcd.new_context = new_context; |
| rcd.decl_map = st; |
| |
| ret = walk_tree (tp, ix86_darwin_re_context_1, |
| (void *)&rcd, (struct pointer_set_t *)0); |
| return ret; |
| } |
| |
| /* Given TRAD_FNDECL, create a regparmandstackparm variant and hang the |
| DECL_SAVED_TREE body there. Create a new, one-statement body for |
| TRAD_FNDECL that calls the new one. If the return types are |
| compatible (e.g. non-FP), the call can usually be sibcalled. The |
| inliner will often copy the body from NEW_FNDECL into TRAD_FNDECL, |
| and we do nothing to prevent this. */ |
| static void |
| ix86_darwin_regparmandstackparm_wrapper (tree trad_fndecl) |
| { |
| tree new_fndecl; |
| splay_tree st; |
| tree bind, block, call, clone_parm, modify, parmlist, rdecl, rtn, stmt_list, type; |
| tree_stmt_iterator tsi; |
| |
| st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); |
| new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, st); |
| |
| for (parmlist = NULL, clone_parm = DECL_ARGUMENTS (trad_fndecl); |
| clone_parm; |
| clone_parm = TREE_CHAIN (clone_parm)) |
| { |
| gcc_assert (clone_parm); |
| DECL_ABSTRACT_ORIGIN (clone_parm) = NULL; |
| parmlist = tree_cons (NULL, clone_parm, parmlist); |
| } |
| |
| /* We built this list backwards; fix now. */ |
| parmlist = nreverse (parmlist); |
| type = TREE_TYPE (TREE_TYPE (trad_fndecl)); |
| call = build_function_call (new_fndecl, parmlist); |
| TREE_TYPE (call) = type; |
| if (type == void_type_node) |
| rtn = call; |
| else if (0 && ix86_return_in_memory (type)) |
| { |
| /* Return without a RESULT_DECL: RETURN_EXPR (CALL). */ |
| rtn = make_node (RETURN_EXPR); |
| TREE_OPERAND (rtn, 0) = call; |
| TREE_TYPE (rtn) = type; |
| } |
| else /* RETURN_EXPR(MODIFY(RESULT_DECL, CALL)). */ |
| { |
| rdecl = make_node (RESULT_DECL); |
| TREE_TYPE (rdecl) = type; |
| DECL_MODE (rdecl) = TYPE_MODE (type); |
| DECL_RESULT (trad_fndecl) = rdecl; |
| DECL_CONTEXT (rdecl) = trad_fndecl; |
| modify = build_modify_expr (rdecl, NOP_EXPR, call); |
| TREE_TYPE (modify) = type; |
| rtn = make_node (RETURN_EXPR); |
| TREE_OPERAND (rtn, 0) = modify; |
| TREE_TYPE (rtn) = type; |
| } |
| stmt_list = alloc_stmt_list (); |
| tsi = tsi_start (stmt_list); |
| tsi_link_after (&tsi, rtn, TSI_NEW_STMT); |
| |
| /* This wrapper consists of "return <my_name>$3SSE (<my_arguments>);" |
| thus it has no local variables. */ |
| block = make_node (BLOCK); |
| TREE_USED (block) = true; |
| bind = make_node (BIND_EXPR); |
| BIND_EXPR_BLOCK (bind) = block; |
| BIND_EXPR_BODY (bind) = stmt_list; |
| TREE_TYPE (bind) = void_type_node; |
| TREE_SIDE_EFFECTS (bind) = true; |
| |
| DECL_SAVED_TREE (trad_fndecl) = bind; |
| |
| /* DECL_ABSTRACT_ORIGIN (new_fndecl) = NULL; *//* ? */ |
| |
| ix86_darwin_re_context (&new_fndecl, trad_fndecl, new_fndecl, st); |
| ix86_darwin_re_context (&DECL_SAVED_TREE (new_fndecl), trad_fndecl, new_fndecl, st); |
| splay_tree_delete (st); |
| gimplify_function_tree (new_fndecl); |
| cgraph_finalize_function (new_fndecl, /* nested = */ true); |
| gimplify_function_tree (trad_fndecl); |
| if (!ix86_darwin_regparmandstackparm_st) |
| ix86_darwin_regparmandstackparm_st |
| = splay_tree_new (splay_tree_compare_pointers, NULL, NULL); |
| splay_tree_insert (ix86_darwin_regparmandstackparm_st, |
| (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl); |
| } |
| |
| /* Entry point into the regparmandstackparm stuff. FNDECL might be marked |
| 'regparmandstackparm'; if it is, create the fast version, &etc. */ |
| void |
| ix86_darwin_handle_regparmandstackparm (tree fndecl) |
| { |
| static unsigned int already_running = 0; |
| |
| /* We don't support variable-argument functions yet. */ |
| if (!fndecl || already_running) |
| return; |
| |
| already_running++; |
| |
| if (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (TREE_TYPE (fndecl))) |
| && !lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) |
| { |
| if (DECL_STRUCT_FUNCTION (fndecl) && DECL_STRUCT_FUNCTION (fndecl)->stdarg) |
| error ("regparmandstackparm is incompatible with varargs"); |
| else if (DECL_SAVED_TREE (fndecl)) |
| ix86_darwin_regparmandstackparm_wrapper (fndecl); |
| } |
| |
| already_running--; |
| } |
| /* APPLE LOCAL end regparmandstackparm */ |
| |
| /* APPLE LOCAL begin CW asm blocks */ |
| #include <ctype.h> |
| #include "config/asm.h" |
| |
| /* Addition register names accepted for inline assembly that would |
| otherwise not be registers. This table must be sorted for |
| bsearch. */ |
| static const char *iasm_additional_names[] = { |
| "AH", "AL", "AX", "BH", "BL", "BP", "BX", "CH", "CL", "CX", "DH", |
| "DI", "DL", "DX", "EAX", "EBP", "EBX", "ECX", "EDI", "EDX", "ESI", |
| "ESP", "MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7", "R10", |
| "R11", "R12", "R13", "R14", "R15", "R8", "R9", "RAX", "RBP", "RBX", |
| "RCX", "RDI", "RDX", "RSI", "RSP", "SI", "SP", "ST", "ST(1)", "ST(2)", |
| "ST(3)", "ST(4)", "ST(5)", "ST(6)", "ST(7)", "XMM0", "XMM1", "XMM10", |
| "XMM11", "XMM12", "XMM13", "XMM14", "XMM15", "XMM2", "XMM3", "XMM4", |
| "XMM5", "XMM6", "XMM7", "XMM8", "XMM9" }; |
| |
| /* Comparison function for bsearch to find additional register names. */ |
| static int |
| iasm_reg_comp (const void *a, const void *b) |
| { |
| char *const*x = a; |
| char *const*y = b; |
| int c = strcasecmp (*x, *y); |
| return c; |
| } |
| |
| /* Translate some register names seen in CW asm into GCC standard |
| forms. */ |
| |
| const char * |
| i386_iasm_register_name (const char *regname, char *buf) |
| { |
| const char **r; |
| |
| /* If we can find the named register, return it. */ |
| if (decode_reg_name (regname) >= 0) |
| { |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| return regname; |
| sprintf (buf, "%%%s", regname); |
| return buf; |
| } |
| |
| /* If we can find a lower case version of any registers in |
| additional_names, return it. */ |
| r = bsearch (®name, iasm_additional_names, |
| sizeof (iasm_additional_names) / sizeof (iasm_additional_names[0]), |
| sizeof (iasm_additional_names[0]), iasm_reg_comp); |
| if (r) |
| { |
| char *p; |
| const char *q; |
| q = regname = *r; |
| p = buf; |
| if (ASSEMBLER_DIALECT != ASM_INTEL) |
| *p++ = '%'; |
| regname = p; |
| while ((*p++ = tolower (*q++))) |
| ; |
| if (decode_reg_name (regname) >= 0) |
| return buf; |
| } |
| |
| return NULL; |
| } |
| |
| /* Return true iff the opcode wants memory to be stable. We arrange |
| for a memory clobber in these instances. */ |
| bool |
| iasm_memory_clobber (const char *ARG_UNUSED (opcode)) |
| { |
| return true; |
| } |
| |
| /* Return true iff the operands need swapping. */ |
| |
| bool |
| iasm_x86_needs_swapping (const char *opcode) |
| { |
| /* Don't swap if output format is the same as input format. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| return false; |
| |
| /* These don't need swapping. */ |
| if (strcasecmp (opcode, "bound") == 0) |
| return false; |
| if (strcasecmp (opcode, "invlpga") == 0) |
| return false; |
| if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Swap operands, given in MS-style asm ordering when the output style |
| is in ATT syntax. */ |
| |
| static tree |
| iasm_x86_swap_operands (const char *opcode, tree args) |
| { |
| int noperands; |
| |
| if (iasm_x86_needs_swapping (opcode) == false) |
| return args; |
| |
| #if 0 |
| /* GAS also checks the type of the arguments to determine if they |
| need swapping. */ |
| if ((argtype[0]&Imm) && (argtype[1]&Imm)) |
| return args; |
| #endif |
| noperands = list_length (args); |
| if (noperands == 2 || noperands == 3) |
| { |
| /* Swap first and last (1 and 2 or 1 and 3). */ |
| return nreverse (args); |
| } |
| return args; |
| } |
| |
| /* Map a register name to a high level tree type for a VAR_DECL of |
| that type, whose RTL will refer to the given register. */ |
| |
| static tree |
| iasm_type_for (tree arg) |
| { |
| tree type = NULL_TREE; |
| |
| if (IDENTIFIER_LENGTH (arg) > 2 |
| && IDENTIFIER_POINTER (arg)[0] == '%') |
| { |
| enum machine_mode mode = VOIDmode; |
| if (IDENTIFIER_POINTER (arg)[1] == 'e') |
| mode = SImode; |
| /* LLVM LOCAL begin */ |
| /* Accept H registers for LLVM, this totally obviates need for ASM_USES. */ |
| #ifndef ENABLE_LLVM |
| else if (/* IDENTIFIER_POINTER (arg)[2] == 'h' |
| || */ IDENTIFIER_POINTER (arg)[2] == 'l') |
| mode = QImode; |
| #else |
| else if (IDENTIFIER_POINTER (arg)[2] == 'h' |
| || IDENTIFIER_POINTER (arg)[2] == 'l') |
| mode = QImode; |
| #endif |
| /* LLVM LOCAL end */ |
| else if (IDENTIFIER_POINTER (arg)[2] == 'x') |
| mode = HImode; |
| else if (IDENTIFIER_POINTER (arg)[1] == 'r') |
| mode = DImode; |
| else if (IDENTIFIER_POINTER (arg)[1] == 'x') |
| mode = SFmode; |
| else if (IDENTIFIER_POINTER (arg)[1] == 'm') |
| /* LLVM LOCAL - begin Force MMX to use a vector mode: PR1222. */ |
| #ifdef ENABLE_LLVM |
| mode = V2SImode; |
| #else |
| mode = SFmode; |
| #endif |
| /* LLVM LOCAL - end Force MMX to use a vector mode: PR1222. */ |
| |
| if (mode != VOIDmode) |
| type = lang_hooks.types.type_for_mode (mode, 1); |
| } |
| |
| return type; |
| } |
| |
| /* We raise the code from a named register into a VAR_DECL of an |
| appropriate type that refers to the register so that reload doesn't |
| run out of registers. */ |
| |
| tree |
| iasm_raise_reg (tree arg) |
| { |
| int regno = decode_reg_name (IDENTIFIER_POINTER (arg)); |
| if (regno >= 0) |
| { |
| tree decl = NULL_TREE; |
| |
| decl = lookup_name (arg); |
| if (decl == error_mark_node) |
| decl = 0; |
| if (decl == 0 || !DECL_ASM_BLOCK_REGISTER (decl)) |
| { |
| tree type = iasm_type_for (arg); |
| if (type) |
| { |
| decl = build_decl (VAR_DECL, arg, type); |
| DECL_ARTIFICIAL (decl) = 1; |
| DECL_REGISTER (decl) = 1; |
| C_DECL_REGISTER (decl) = 1; |
| DECL_HARD_REGISTER (decl) = 1; |
| DECL_ASM_BLOCK_REGISTER (decl) = 1; |
| set_user_assembler_name (decl, IDENTIFIER_POINTER (arg)); |
| decl = lang_hooks.decls.pushdecl (decl); |
| } |
| } |
| |
| if (decl) |
| return decl; |
| } |
| |
| return arg; |
| } |
| |
| /* Allow constants and readonly variables to be used in instructions |
| in places that require constants. */ |
| |
| static tree |
| iasm_default_conv (tree e) |
| { |
| if (e == NULL_TREE) |
| return e; |
| |
| if (TREE_CODE (e) == CONST_DECL) |
| e = DECL_INITIAL (e); |
| |
| if (DECL_P (e) && DECL_MODE (e) != BLKmode) |
| e = decl_constant_value (e); |
| return e; |
| } |
| |
| /* Return true iff the operand is suitible for as the offset for a |
| memory instruction. */ |
| |
| static bool |
| iasm_is_offset (tree v) |
| { |
| if (TREE_CODE (v) == INTEGER_CST) |
| return true; |
| if (TREE_CODE (v) == ADDR_EXPR) |
| { |
| v = TREE_OPERAND (v, 0); |
| if (TREE_CODE (v) == VAR_DECL |
| && TREE_STATIC (v) |
| /* LLVM LOCAL begin */ |
| /* DECL_RTL is not set for LLVM */ |
| #ifndef ENABLE_LLVM |
| && MEM_P (DECL_RTL (v)) |
| #endif |
| ) |
| /* LLVM LOCAL end */ |
| { |
| note_alternative_entry_points (); |
| return true; |
| } |
| if (TREE_CODE (v) == LABEL_DECL) |
| return true; |
| return false; |
| } |
| if (TREE_CODE (v) == VAR_DECL |
| && TREE_STATIC (v) |
| /* LLVM LOCAL begin */ |
| /* DECL_RTL is not set for LLVM */ |
| #ifndef ENABLE_LLVM |
| && MEM_P (DECL_RTL (v)) |
| #endif |
| ) |
| /* LLVM LOCAL end */ |
| { |
| note_alternative_entry_points (); |
| return true; |
| } |
| if ((TREE_CODE (v) == MINUS_EXPR |
| || TREE_CODE (v) == PLUS_EXPR) |
| && iasm_is_offset (TREE_OPERAND (v, 0)) |
| && iasm_is_offset (TREE_OPERAND (v, 1))) |
| return true; |
| if (TREE_CODE (v) == NEGATE_EXPR |
| && iasm_is_offset (TREE_OPERAND (v, 0))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Combine two types for [] expressions. */ |
| |
| static tree |
| iasm_combine_type (tree type0, tree type1) |
| { |
| if (type0 == void_type_node |
| || type0 == NULL_TREE) |
| { |
| if (type1 == void_type_node) |
| return NULL_TREE; |
| return type1; |
| } |
| |
| if (type1 == void_type_node |
| || type1 == NULL_TREE) |
| return type0; |
| |
| if (type0 == type1) |
| return type0; |
| |
| error ("too many types in []"); |
| |
| return type0; |
| } |
| |
| /* We canonicalize the inputs form of bracket expressions as the input |
| forms are less constrained than what the assembler will accept. |
| |
| TOP is the top of the canonical tree we're generating and |
| TREE_OPERAND (, 0) is the offset portion of the expression. ARGP |
| points to the current part of the tree we're walking. |
| |
| The tranformations we do: |
| |
| (A+O) ==> A |
| (A-O) ==> A |
| (O+A) ==> A |
| |
| where O are offset expressions. */ |
| |
| static tree |
| iasm_canonicalize_bracket_1 (tree* argp, tree top) |
| { |
| tree arg = *argp; |
| tree offset = TREE_OPERAND (top, 0); |
| tree arg0, arg1; |
| tree rtype = NULL_TREE; |
| |
| *argp = arg = iasm_default_conv (arg); |
| |
| switch (TREE_CODE (arg)) |
| { |
| case NOP_EXPR: |
| if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE) |
| { |
| *argp = TREE_OPERAND (arg, 0); |
| return TREE_TYPE (arg); |
| } |
| break; |
| |
| case BRACKET_EXPR: |
| rtype = TREE_TYPE (arg); |
| /* fall thru */ |
| case PLUS_EXPR: |
| arg0 = TREE_OPERAND (arg, 0); |
| arg1 = TREE_OPERAND (arg, 1); |
| |
| arg0 = iasm_default_conv (arg0); |
| arg1 = iasm_default_conv (arg1); |
| |
| if (iasm_is_offset (arg0)) |
| { |
| if (offset != integer_zero_node) |
| arg0 = build2 (PLUS_EXPR, void_type_node, arg0, offset); |
| TREE_OPERAND (top, 0) = arg0; |
| |
| *argp = arg1; |
| if (arg1) |
| return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top)); |
| } |
| else if (arg1 && iasm_is_offset (arg1)) |
| { |
| if (offset != integer_zero_node) |
| arg1 = build2 (PLUS_EXPR, void_type_node, arg1, offset); |
| TREE_OPERAND (top, 0) = arg1; |
| *argp = arg0; |
| return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top)); |
| } |
| else |
| { |
| rtype = iasm_combine_type (rtype, |
| iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top)); |
| |
| if (arg1) |
| rtype = iasm_combine_type (rtype, |
| iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), top)); |
| if (TREE_OPERAND (arg, 0) == NULL_TREE) |
| { |
| if (TREE_OPERAND (arg, 1)) |
| { |
| TREE_OPERAND (arg, 0) = TREE_OPERAND (arg, 1); |
| TREE_OPERAND (arg, 1) = NULL_TREE; |
| } |
| else |
| *argp = NULL_TREE; |
| } |
| else if (TREE_OPERAND (arg, 1) == NULL_TREE && rtype == NULL_TREE) |
| *argp = TREE_OPERAND (arg, 0); |
| if (TREE_CODE (arg) == PLUS_EXPR |
| && TREE_TYPE (arg) == NULL_TREE |
| && TREE_TYPE (TREE_OPERAND (arg, 0)) |
| && TREE_TYPE (TREE_OPERAND (arg, 1)) |
| && (POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1))) |
| || POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0))))) |
| { |
| tree type = TREE_TYPE (TREE_OPERAND (arg, 1)); |
| if (INTEGRAL_TYPE_P (type)) |
| type = TREE_TYPE (TREE_OPERAND (arg, 0)); |
| TREE_TYPE (arg) = type; |
| } |
| if (TREE_CODE (arg) == PLUS_EXPR |
| && TREE_TYPE (arg) == NULL_TREE |
| && TREE_TYPE (TREE_OPERAND (arg, 0)) |
| && TREE_TYPE (TREE_OPERAND (arg, 0)) == TREE_TYPE (TREE_OPERAND (arg, 1))) |
| { |
| tree type = TREE_TYPE (TREE_OPERAND (arg, 0)); |
| TREE_TYPE (arg) = type; |
| } |
| } |
| return rtype; |
| |
| case MINUS_EXPR: |
| rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top); |
| arg0 = TREE_OPERAND (arg, 0); |
| arg1 = TREE_OPERAND (arg, 1); |
| arg1 = iasm_default_conv (arg1); |
| if (iasm_is_offset (arg1)) |
| { |
| offset = TREE_OPERAND (top, 0); |
| if (offset == integer_zero_node) |
| arg1 = fold (build1 (NEGATE_EXPR, |
| TREE_TYPE (arg1), |
| arg1)); |
| else |
| arg1 = build2 (MINUS_EXPR, void_type_node, offset, arg1); |
| TREE_OPERAND (top, 0) = arg1; |
| *argp = arg0; |
| return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));; |
| } |
| return rtype; |
| |
| case PARM_DECL: |
| case VAR_DECL: |
| { |
| *argp = iasm_addr (arg); |
| break; |
| } |
| |
| case IDENTIFIER_NODE: |
| { |
| *argp = iasm_raise_reg (arg); |
| break; |
| } |
| |
| case MULT_EXPR: |
| if (TREE_TYPE (arg) == NULL_TREE) |
| { |
| if (TREE_CODE (TREE_OPERAND (arg, 1)) == IDENTIFIER_NODE) |
| TREE_OPERAND (arg, 1) = iasm_raise_reg (TREE_OPERAND (arg, 1)); |
| if (TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE) |
| TREE_OPERAND (arg, 0) = iasm_raise_reg (TREE_OPERAND (arg, 0)); |
| if (TREE_TYPE (TREE_OPERAND (arg, 0)) |
| && TREE_TYPE (TREE_OPERAND (arg, 1))) |
| TREE_TYPE (arg) = TREE_TYPE (TREE_OPERAND (arg, 0)); |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Form an indirection for an inline asm address expression operand. |
| We give a warning when we think the optimizer might have to be used |
| to reform complex addresses, &stack_var + %eax + 4 for example, |
| after gimplification rips the address apart. */ |
| |
| static tree |
| iasm_indirect (tree addr) |
| { |
| if (TREE_CODE (addr) == ADDR_EXPR |
| && TREE_CODE (TREE_TYPE (TREE_OPERAND (addr, 0))) != ARRAY_TYPE |
| /* && TREE_CODE (TREE_OPERAND (addr, 0)) == ARRAY_REF */) |
| return TREE_OPERAND (addr, 0); |
| |
| addr = fold (build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr)); |
| |
| if (! optimize && TREE_CODE (addr) == INDIRECT_REF) |
| warning (0, "addressing mode too complex when not optimizing, will consume extra register(s)"); |
| |
| return addr; |
| } |
| |
| /* For an address addition for an inline asm address expression. We |
| try and form ARRAY_REFs, as they will go through gimplification |
| without being ripped apart. */ |
| |
| static tree |
| iasm_add (tree addr, tree off) |
| { |
| if (integer_zerop (off)) |
| return addr; |
| |
| /* We have to convert the offset to an int type, as we rip apart |
| trees whose type has been converted to a pointer type for the |
| offset already. */ |
| return pointer_int_sum (PLUS_EXPR, addr, convert (integer_type_node, off)); |
| } |
| |
| /* We canonicalize the inputs form of bracket expressions as the input |
| forms are less constrained than what the assembler will accept. */ |
| |
| static tree |
| iasm_canonicalize_bracket (tree arg) |
| { |
| tree rtype; |
| |
| gcc_assert (TREE_CODE (arg) == BRACKET_EXPR); |
| |
| /* Let the normal operand printer output this without trying to |
| decompose it into parts so that things like (%esp + 20) + 4 can |
| be output as 24(%esp) by the optimizer instead of 4(%0) and |
| burning an "R" with (%esp + 20). */ |
| if (TREE_OPERAND (arg, 1) == NULL_TREE |
| && TREE_TYPE (TREE_OPERAND (arg, 0)) |
| && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))) |
| { |
| if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL |
| || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL) |
| return arg; |
| return iasm_indirect (TREE_OPERAND (arg, 0)); |
| } |
| |
| /* Ensure that 0 is an offset */ |
| if (TREE_OPERAND (arg, 0) |
| && iasm_is_offset (TREE_OPERAND (arg, 0))) |
| { |
| /* we win if 0 is an offset already. */ |
| } |
| else if (TREE_OPERAND (arg, 1) == NULL_TREE) |
| { |
| /* Move 0 to 1, if 1 is empty and 0 isn't already an offset */ |
| TREE_OPERAND (arg, 1) = TREE_OPERAND (arg, 0); |
| TREE_OPERAND (arg, 0) = integer_zero_node; |
| } |
| else |
| { |
| tree swp; |
| /* Just have to force it now */ |
| swp = iasm_build_bracket (TREE_OPERAND (arg, 0), TREE_OPERAND (arg, 1)); |
| TREE_OPERAND (arg, 0) = integer_zero_node; |
| TREE_OPERAND (arg, 1) = swp; |
| } |
| |
| if (TREE_OPERAND (arg, 1)) |
| { |
| rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), arg); |
| if (rtype) |
| TREE_TYPE (arg) = iasm_combine_type (TREE_TYPE (arg), rtype); |
| } |
| |
| /* For correctness, pointer types should be raised to the tree |
| level, as they denote address calculations with stack based |
| objects, and we want print_operand to print the entire address so |
| that it can combine contants and hard registers into the address. |
| Unfortunnately we might have to rely upon the optimizer to reform |
| the address after the gimplification pass rips it apart. */ |
| |
| /* Handle [INTEGER_CST][ptr][op3] */ |
| if (TREE_OPERAND (arg, 1) |
| && TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST |
| && TREE_CODE (TREE_OPERAND (arg, 1)) == BRACKET_EXPR |
| && TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)) |
| && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) |
| && TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) != void_type_node |
| && (TREE_TYPE (arg) == void_type_node |
| || (TREE_TYPE (arg) == get_identifier ("word") |
| && (TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)))) |
| == HImode)))) |
| { |
| tree op3 = TREE_OPERAND (TREE_OPERAND (arg, 1), 1); |
| tree addr = iasm_add (TREE_OPERAND (TREE_OPERAND (arg, 1), 0), |
| TREE_OPERAND (arg, 0)); |
| tree type; |
| addr = iasm_indirect (addr); |
| if (op3 == NULL_TREE) |
| return addr; |
| type = TREE_TYPE (addr); |
| type = build_pointer_type (type); |
| addr = build1 (ADDR_EXPR, type, addr); |
| addr = fold (build2 (PLUS_EXPR, type, addr, op3)); |
| return iasm_indirect (addr); |
| } |
| |
| /* Handle ptr + INTEGER_CST */ |
| if (TREE_OPERAND (arg, 1) |
| && TREE_TYPE (arg) == void_type_node |
| && TREE_TYPE (TREE_OPERAND (arg, 1)) |
| && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1))) |
| && TREE_TYPE (TREE_TYPE (TREE_OPERAND (arg, 1))) != void_type_node) |
| { |
| if (TREE_CODE (TREE_OPERAND (arg, 1)) == ADDR_EXPR) |
| { |
| if (TREE_OPERAND (arg, 0) == integer_zero_node) |
| return TREE_OPERAND (TREE_OPERAND (arg, 1), 0); |
| if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST) |
| return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0))); |
| } |
| if (TREE_CODE (TREE_OPERAND (arg, 1)) == PLUS_EXPR) |
| { |
| if (TREE_OPERAND (arg, 0) == integer_zero_node) |
| return iasm_indirect (TREE_OPERAND (arg, 1)); |
| if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST) |
| return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0))); |
| } |
| } |
| return arg; |
| } |
| |
| /* We canonicalize the instruction by swapping operands and rewritting |
| the opcode if the output style is in ATT syntax. */ |
| |
| tree |
| iasm_x86_canonicalize_operands (const char **opcode_p, tree iargs, void *ep) |
| { |
| iasm_md_extra_info *e = ep; |
| static char buf[40]; |
| tree args = iargs; |
| int argnum = 1; |
| const char *opcode = *opcode_p; |
| bool fp_style = false; |
| bool fpi_style = false; |
| |
| /* Don't transform if output format is the same as input format. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| return iargs; |
| |
| if (strncasecmp (opcode, "f", 1) == 0) |
| fp_style = true; |
| |
| if (fp_style |
| && strncasecmp (opcode+1, "i", 1) == 0) |
| fpi_style = true; |
| |
| while (args) |
| { |
| tree arg = TREE_VALUE (args); |
| |
| /* Handle st(3) */ |
| if (TREE_CODE (arg) == COMPOUND_EXPR |
| && TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE |
| && strcasecmp (IDENTIFIER_POINTER (TREE_OPERAND (arg, 0)), "%st") == 0 |
| && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST) |
| { |
| int v = tree_low_cst (TREE_OPERAND (arg, 1), 0); |
| |
| if (v < 0 || v > 7) |
| { |
| error ("unknown floating point register st(%d)", v); |
| v = 0; |
| } |
| |
| /* Rewrite %st(0) to %st. */ |
| if (v == 0) |
| TREE_VALUE (args) = TREE_OPERAND (arg, 0); |
| else |
| { |
| char buf[20]; |
| sprintf (buf, "%%st(%d)", v); |
| TREE_VALUE (args) = get_identifier (buf); |
| } |
| } |
| else if (TREE_CODE (arg) == BRACKET_EXPR) |
| TREE_VALUE (args) = arg = iasm_canonicalize_bracket (arg); |
| |
| switch (TREE_CODE (arg)) |
| { |
| case ARRAY_REF: |
| case VAR_DECL: |
| case PARM_DECL: |
| case INDIRECT_REF: |
| if (TYPE_MODE (TREE_TYPE (arg)) == QImode) |
| e->mod[argnum-1] = 'b'; |
| else if (TYPE_MODE (TREE_TYPE (arg)) == HImode) |
| e->mod[argnum-1] = fpi_style ? 's' : 'w'; |
| else if (TYPE_MODE (TREE_TYPE (arg)) == SImode) |
| e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l'); |
| else if (TYPE_MODE (TREE_TYPE (arg)) == DImode) |
| e->mod[argnum-1] = 'q'; |
| else if (TYPE_MODE (TREE_TYPE (arg)) == SFmode) |
| e->mod[argnum-1] = 's'; |
| else if (TYPE_MODE (TREE_TYPE (arg)) == DFmode) |
| e->mod[argnum-1] = 'l'; |
| else if (TYPE_MODE (TREE_TYPE (arg)) == XFmode) |
| e->mod[argnum-1] = 't'; |
| break; |
| case BRACKET_EXPR: |
| /* We use the TREE_TYPE to indicate the type of operand, it |
| it set with code like: inc dword ptr [eax]. */ |
| if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE) |
| { |
| const char *s = IDENTIFIER_POINTER (TREE_TYPE (arg)); |
| if (strcasecmp (s, "byte") == 0) |
| e->mod[argnum-1] = 'b'; |
| else if (strcasecmp (s, "word") == 0) |
| e->mod[argnum-1] = fpi_style ? 's' : 'w'; |
| else if (strcasecmp (s, "dword") == 0) |
| e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l'); |
| else if (strcasecmp (s, "qword") == 0) |
| e->mod[argnum-1] = 'q'; |
| else if (strcasecmp (s, "real4") == 0) |
| e->mod[argnum-1] = 's'; |
| else if (strcasecmp (s, "real8") == 0) |
| e->mod[argnum-1] = 'l'; |
| else if (strcasecmp (s, "real10") == 0) |
| e->mod[argnum-1] = 't'; |
| else if (strcasecmp (s, "tbyte") == 0) |
| e->mod[argnum-1] = 't'; |
| } |
| break; |
| case LABEL_DECL: |
| e->mod[argnum-1] = 'l'; |
| break; |
| case IDENTIFIER_NODE: |
| if (IDENTIFIER_LENGTH (arg) > 2 |
| && IDENTIFIER_POINTER (arg)[0] == '%') |
| { |
| if (IDENTIFIER_POINTER (arg)[1] == 'e') |
| e->mod[argnum-1] = 'l'; |
| else if (IDENTIFIER_POINTER (arg)[2] == 'h' |
| || IDENTIFIER_POINTER (arg)[2] == 'l') |
| e->mod[argnum-1] = 'b'; |
| else if (IDENTIFIER_POINTER (arg)[2] == 'x') |
| e->mod[argnum-1] = 'w'; |
| } |
| break; |
| default: |
| break; |
| } |
| args = TREE_CHAIN (args); |
| ++argnum; |
| } |
| --argnum; |
| |
| args = iasm_x86_swap_operands (opcode, iargs); |
| if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1)) |
| e->pseudo = true; |
| |
| if (strcasecmp (opcode, "movs") == 0 |
| || strcasecmp (opcode, "scas") == 0 |
| || strcasecmp (opcode, "stos") == 0 |
| || strcasecmp (opcode, "xlat") == 0) |
| args = NULL_TREE; |
| else if (strcasecmp (opcode, "cmovpo") == 0) |
| opcode = "cmovnp"; |
| else if (strcasecmp (opcode, "cmovpe") == 0) |
| opcode = "cmovp"; |
| else if (strcasecmp (opcode, "outs") == 0 |
| && TREE_CHAIN (args)) |
| { |
| e->mod[0] = e->mod[1]; |
| } |
| else if (strcasecmp (opcode, "ins") == 0 |
| && TREE_CHAIN (args)) |
| { |
| e->mod[1] = 0; |
| } |
| /* movsx isn't part of the AT&T syntax, they spell it movs. */ |
| else if (strcasecmp (opcode, "movsx") == 0) |
| opcode = "movs"; |
| else if (strcasecmp (opcode, "pushfd") == 0) |
| *opcode_p = "pushf"; |
| else if (strcasecmp (opcode, "popfd") == 0) |
| *opcode_p = "popf"; |
| |
| /* movzx isn't part of the AT&T syntax, they spell it movz. */ |
| if (strcasecmp (opcode, "movzx") == 0) |
| { |
| /* Silly extention of the day, A zero extended move that has the |
| same before and after size is accepted and it just a normal |
| move. */ |
| if (argnum == 2 |
| && (e->mod[0] == e->mod[1] |
| || e->mod[1] == 0)) |
| opcode = "mov"; |
| else |
| opcode = "movz"; |
| } |
| |
| if (strncasecmp (opcode, "f", 1) == 0 && |
| (!(strcasecmp (opcode, "fldcw") == 0))) |
| { |
| if (e->mod[0] == 'w') |
| e->mod[0] = 's'; |
| if (e->mod[1] == 'w') |
| e->mod[1] = 's'; |
| } |
| else if (strcasecmp (opcode, "mov") == 0) |
| { |
| /* The 32-bit integer instructions can be used on floats. */ |
| if (e->mod[0] == 's') |
| e->mod[0] = 'l'; |
| if (e->mod[1] == 's') |
| e->mod[1] = 'l'; |
| } |
| |
| if (e->pseudo) |
| e->mod[0] = e->mod[1] = 0; |
| else if (strcasecmp (opcode, "clflush") == 0 |
| || strcasecmp (opcode, "fbld") == 0 |
| || strcasecmp (opcode, "fbstp") == 0 |
| || strcasecmp (opcode, "fldt") == 0 |
| || strcasecmp (opcode, "fnstcw") == 0 |
| || strcasecmp (opcode, "fnstsw") == 0 |
| || strcasecmp (opcode, "fstcw") == 0 |
| || strcasecmp (opcode, "fstsw") == 0 |
| || strcasecmp (opcode, "fxrstor") == 0 |
| || strcasecmp (opcode, "fxsave") == 0 |
| || strcasecmp (opcode, "invlpg") == 0 |
| || strcasecmp (opcode, "jmp") == 0 |
| || strcasecmp (opcode, "call") == 0 |
| || strcasecmp (opcode, "ja") == 0 |
| || strcasecmp (opcode, "jae") == 0 |
| || strcasecmp (opcode, "jb") == 0 |
| || strcasecmp (opcode, "jbe") == 0 |
| || strcasecmp (opcode, "jc") == 0 |
| || strcasecmp (opcode, "je") == 0 |
| || strcasecmp (opcode, "jg") == 0 |
| || strcasecmp (opcode, "jge") == 0 |
| || strcasecmp (opcode, "jl") == 0 |
| || strcasecmp (opcode, "jle") == 0 |
| || strcasecmp (opcode, "jna") == 0 |
| || strcasecmp (opcode, "jnae") == 0 |
| || strcasecmp (opcode, "jnb") == 0 |
| || strcasecmp (opcode, "jnc") == 0 |
| || strcasecmp (opcode, "jne") == 0 |
| || strcasecmp (opcode, "jng") == 0 |
| || strcasecmp (opcode, "jnge") == 0 |
| || strcasecmp (opcode, "jnl") == 0 |
| || strcasecmp (opcode, "jnle") == 0 |
| || strcasecmp (opcode, "jno") == 0 |
| || strcasecmp (opcode, "jnp") == 0 |
| || strcasecmp (opcode, "jns") == 0 |
| || strcasecmp (opcode, "jnz") == 0 |
| || strcasecmp (opcode, "jo") == 0 |
| || strcasecmp (opcode, "jp") == 0 |
| || strcasecmp (opcode, "jpe") == 0 |
| || strcasecmp (opcode, "jpo") == 0 |
| || strcasecmp (opcode, "js") == 0 |
| || strcasecmp (opcode, "jz") == 0 |
| || strcasecmp (opcode, "ldmxcsr") == 0 |
| || strcasecmp (opcode, "lgdt") == 0 |
| || strcasecmp (opcode, "lidt") == 0 |
| || strcasecmp (opcode, "lldt") == 0 |
| || strcasecmp (opcode, "lmsw") == 0 |
| || strcasecmp (opcode, "ltr") == 0 |
| || strcasecmp (opcode, "movapd") == 0 |
| || strcasecmp (opcode, "movaps") == 0 |
| || strcasecmp (opcode, "movd") == 0 |
| || strcasecmp (opcode, "movhpd") == 0 |
| || strcasecmp (opcode, "movhps") == 0 |
| || strcasecmp (opcode, "movlpd") == 0 |
| || strcasecmp (opcode, "movlps") == 0 |
| || strcasecmp (opcode, "movntdq") == 0 |
| || strcasecmp (opcode, "movntpd") == 0 |
| || strcasecmp (opcode, "movntps") == 0 |
| || strcasecmp (opcode, "movntq") == 0 |
| || strcasecmp (opcode, "movq") == 0 |
| || strcasecmp (opcode, "movsd") == 0 |
| || strcasecmp (opcode, "movss") == 0 |
| || strcasecmp (opcode, "movupd") == 0 |
| || strcasecmp (opcode, "movups") == 0 |
| || strcasecmp (opcode, "out") == 0 |
| || strcasecmp (opcode, "prefetchnta") == 0 |
| || strcasecmp (opcode, "prefetcht0") == 0 |
| || strcasecmp (opcode, "prefetcht1") == 0 |
| || strcasecmp (opcode, "prefetcht2") == 0 |
| || strcasecmp (opcode, "seta") == 0 |
| || strcasecmp (opcode, "setae") == 0 |
| || strcasecmp (opcode, "setb") == 0 |
| || strcasecmp (opcode, "setbe") == 0 |
| || strcasecmp (opcode, "setc") == 0 |
| || strcasecmp (opcode, "sete") == 0 |
| || strcasecmp (opcode, "setg") == 0 |
| || strcasecmp (opcode, "setge") == 0 |
| || strcasecmp (opcode, "setl") == 0 |
| || strcasecmp (opcode, "setle") == 0 |
| || strcasecmp (opcode, "setna") == 0 |
| || strcasecmp (opcode, "setnae") == 0 |
| || strcasecmp (opcode, "setnb") == 0 |
| || strcasecmp (opcode, "setnbe") == 0 |
| || strcasecmp (opcode, "setnc") == 0 |
| || strcasecmp (opcode, "setne") == 0 |
| || strcasecmp (opcode, "setng") == 0 |
| || strcasecmp (opcode, "setnge") == 0 |
| || strcasecmp (opcode, "setnl") == 0 |
| || strcasecmp (opcode, "setnle") == 0 |
| || strcasecmp (opcode, "setno") == 0 |
| || strcasecmp (opcode, "setnp") == 0 |
| || strcasecmp (opcode, "setns") == 0 |
| || strcasecmp (opcode, "setnz") == 0 |
| || strcasecmp (opcode, "seto") == 0 |
| || strcasecmp (opcode, "setp") == 0 |
| || strcasecmp (opcode, "setpe") == 0 |
| || strcasecmp (opcode, "setpo") == 0 |
| || strcasecmp (opcode, "sets") == 0 |
| || strcasecmp (opcode, "setz") == 0 |
| || strcasecmp (opcode, "sldt") == 0 |
| || strcasecmp (opcode, "smsw") == 0 |
| || strcasecmp (opcode, "stmxcsr") == 0 |
| || strcasecmp (opcode, "str") == 0 |
| || strcasecmp (opcode, "xlat") == 0) |
| e->mod[0] = 0; |
| else if (strcasecmp (opcode, "lea") == 0 |
| || strcasecmp (opcode, "rcl") == 0 |
| || strcasecmp (opcode, "rcr") == 0 |
| || strcasecmp (opcode, "rol") == 0 |
| || strcasecmp (opcode, "ror") == 0 |
| || strcasecmp (opcode, "sal") == 0 |
| || strcasecmp (opcode, "sar") == 0 |
| || strcasecmp (opcode, "shl") == 0 |
| || strcasecmp (opcode, "shr") == 0) |
| e->mod[1] = 0; |
| |
| if ((argnum == 1 && e->mod[0]) |
| || (argnum == 2 && e->mod[0] |
| && (e->mod[0] == e->mod[1] |
| || e->mod[1] == 0))) |
| { |
| sprintf (buf, "%s%c", opcode, e->mod[0]); |
| *opcode_p = buf; |
| } |
| else if (argnum == 2 && e->mod[0] && e->mod[1]) |
| { |
| sprintf (buf, "%s%c%c", opcode, e->mod[1], e->mod[0]); |
| *opcode_p = buf; |
| } |
| |
| return args; |
| } |
| |
| /* Character used to seperate the prefix words. */ |
| /* See radr://4141844 for the enhancement to make this uniformly ' '. */ |
| #define IASM_PREFIX_SEP '/' |
| |
| void |
| iasm_x86_print_prefix (char *buf, tree prefix_list) |
| { |
| buf += strlen (buf); |
| while (prefix_list) |
| { |
| tree prefix = TREE_VALUE (prefix_list); |
| size_t len = IDENTIFIER_LENGTH (prefix); |
| memcpy (buf, IDENTIFIER_POINTER (prefix), len); |
| buf += len; |
| buf[0] = IASM_PREFIX_SEP; |
| ++buf; |
| buf[0] = 0; |
| prefix_list = TREE_CHAIN (prefix_list); |
| } |
| } |
| |
| /* Warn when a variables address is used to form a memory address when |
| that address will use an extra register during reload. */ |
| |
| static void |
| iasm_warn_extra_reg (tree arg) |
| { |
| if (TREE_CODE (arg) == ADDR_EXPR |
| && (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL |
| || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL)) |
| warning (0, "addressing mode too complex, will consume an extra register"); |
| } |
| |
| bool |
| iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses, |
| bool must_be_reg, bool must_not_be_reg, void *ep) |
| { |
| iasm_md_extra_info *e = ep; |
| switch (TREE_CODE (arg)) |
| { |
| case BRACKET_EXPR: |
| { |
| tree op1 = TREE_OPERAND (arg, 0); |
| tree op2 = TREE_OPERAND (arg, 1); |
| tree op0 = NULL_TREE, op3 = NULL_TREE; |
| tree scale = NULL_TREE; |
| |
| if (op2 == NULL_TREE |
| && TREE_TYPE (op1) |
| && POINTER_TYPE_P (TREE_TYPE (op1))) |
| { |
| /* Let the normal operand printer output this without trying to |
| decompose it into parts so that things like (%esp + 20) + 4 |
| can be output as 24(%esp) by the optimizer instead of 4(%0) |
| and burning an "R" with (%esp + 20). */ |
| iasm_force_constraint ("m", e); |
| iasm_get_register_var (op1, "", buf, argnum, must_be_reg, e); |
| iasm_force_constraint (0, e); |
| break; |
| } |
| |
| if (op2 |
| && TREE_CODE (op2) == BRACKET_EXPR) |
| { |
| op3 = TREE_OPERAND (op2, 1); |
| op2 = TREE_OPERAND (op2, 0); |
| if (TREE_CODE (op2) == BRACKET_EXPR) |
| { |
| op0 = TREE_OPERAND (op2, 1); |
| op2 = TREE_OPERAND (op2, 0); |
| } |
| } |
| if (op0) |
| return false; |
| |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| strcat (buf, "["); |
| |
| if (op3 == NULL_TREE |
| && op2 && TREE_CODE (op2) == PLUS_EXPR) |
| { |
| op3 = TREE_OPERAND (op2, 0); |
| op2 = TREE_OPERAND (op2, 1); |
| } |
| if (op2 && TREE_CODE (op2) == MULT_EXPR) |
| { |
| tree t; |
| t = op3; |
| op3 = op2; |
| op2 = t; |
| } |
| |
| /* Crack out the scaling, if any. */ |
| if (ASSEMBLER_DIALECT == ASM_ATT |
| && op3 |
| && TREE_CODE (op3) == MULT_EXPR) |
| { |
| if (TREE_CODE (TREE_OPERAND (op3, 1)) == INTEGER_CST) |
| { |
| scale = TREE_OPERAND (op3, 1); |
| op3 = TREE_OPERAND (op3, 0); |
| } |
| else if (TREE_CODE (TREE_OPERAND (op3, 0)) == INTEGER_CST) |
| { |
| scale = TREE_OPERAND (op3, 0); |
| op3 = TREE_OPERAND (op3, 1); |
| } |
| } |
| |
| /* Complicated expression as JMP or CALL target. */ |
| if (e->modifier && strcmp(e->modifier, "A") == 0) |
| { |
| strcat (buf, "*"); |
| e->modifier = 0; |
| } |
| e->as_immediate = true; |
| iasm_print_operand (buf, op1, argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| e->as_immediate = false; |
| |
| /* Just an immediate. */ |
| if (op2 == NULL_TREE && op3 == NULL_TREE) |
| break; |
| |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| strcat (buf, "]"); |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| strcat (buf, "["); |
| else |
| strcat (buf, "("); |
| |
| if (op2) |
| { |
| /* We know by context, this has to be an R. */ |
| iasm_force_constraint ("R", e); |
| iasm_warn_extra_reg (op2); |
| iasm_print_operand (buf, op2, argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| iasm_force_constraint (0, e); |
| } |
| if (op3) |
| { |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| strcat (buf, "]["); |
| else |
| strcat (buf, ","); |
| |
| /* We know by context, this has to be an l. */ |
| iasm_force_constraint ("l", e); |
| iasm_warn_extra_reg (op3); |
| iasm_print_operand (buf, op3, argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| iasm_force_constraint (0, e); |
| if (scale) |
| { |
| strcat (buf, ","); |
| e->as_immediate = true; |
| iasm_print_operand (buf, scale, argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| e->as_immediate = false; |
| } |
| } |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| strcat (buf, "]"); |
| else |
| strcat (buf, ")"); |
| } |
| break; |
| |
| case ADDR_EXPR: |
| if ((TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF |
| || TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL) |
| && ! e->as_immediate) |
| { |
| iasm_get_register_var (arg, "", buf, argnum, must_be_reg, e); |
| break; |
| } |
| if (! e->as_immediate) |
| e->as_offset = true; |
| iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| e->as_offset = false; |
| break; |
| |
| case MULT_EXPR: |
| iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| strcat (buf, "*"); |
| iasm_print_operand (buf, TREE_OPERAND (arg, 1), argnum, uses, |
| must_be_reg, must_not_be_reg, e); |
| break; |
| default: |
| return false; |
| } |
| return true; |
| } |
| /* APPLE LOCAL end CW asm blocks */ |
| |
| /* Return the mangling of TYPE if it is an extended fundamental type. */ |
| |
| static const char * |
| /* APPLE LOCAL mangle_type 7105099 */ |
| ix86_mangle_type (tree type) |
| { |
| /* APPLE LOCAL begin mangle_type 7105099 */ |
| type = TYPE_MAIN_VARIANT (type); |
| |
| if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE |
| && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) |
| return NULL; |
| |
| /* APPLE LOCAL end mangle_type 7105099 */ |
| switch (TYPE_MODE (type)) |
| { |
| case TFmode: |
| /* __float128 is "g". */ |
| return "g"; |
| case XFmode: |
| /* "long double" or __float80 is "e". */ |
| return "e"; |
| default: |
| return NULL; |
| } |
| } |
| |
| /* For 32-bit code we can save PIC register setup by using |
| __stack_chk_fail_local hidden function instead of calling |
| __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC |
| register, so it is better to call __stack_chk_fail directly. */ |
| |
| static tree |
| ix86_stack_protect_fail (void) |
| { |
| return TARGET_64BIT |
| ? default_external_stack_protect_fail () |
| : default_hidden_stack_protect_fail (); |
| } |
| |
| /* Select a format to encode pointers in exception handling data. CODE |
| is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is |
| true if the symbol may be affected by dynamic relocations. |
| |
| ??? All x86 object file formats are capable of representing this. |
| After all, the relocation needed is the same as for the call insn. |
| Whether or not a particular assembler allows us to enter such, I |
| guess we'll have to see. */ |
| int |
| asm_preferred_eh_data_format (int code, int global) |
| { |
| if (flag_pic) |
| { |
| int type = DW_EH_PE_sdata8; |
| if (!TARGET_64BIT |
| || ix86_cmodel == CM_SMALL_PIC |
| || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) |
| type = DW_EH_PE_sdata4; |
| return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; |
| } |
| if (ix86_cmodel == CM_SMALL |
| || (ix86_cmodel == CM_MEDIUM && code)) |
| return DW_EH_PE_udata4; |
| return DW_EH_PE_absptr; |
| } |
| |
| |
| /* LLVM LOCAL begin */ |
| #ifdef ENABLE_LLVM |
| /* These are wrappers for type_natural_mode and examine_argument which are |
| both static functions. */ |
| enum machine_mode ix86_getNaturalModeForType(tree type) { |
| return type_natural_mode(type); |
| } |
| |
| int ix86_HowToPassArgument(enum machine_mode mode, tree type, int in_return, |
| int *int_nregs, int *sse_nregs) { |
| return examine_argument(mode, type, in_return, int_nregs, sse_nregs); |
| } |
| |
| int ix86_ClassifyArgument(enum machine_mode mode, tree type, |
| enum x86_64_reg_class classes[MAX_CLASSES], |
| int bit_offset) { |
| return classify_argument(mode, type, classes, bit_offset); |
| } |
| #endif |
| /* LLVM LOCAL end */ |
| |
| #include "gt-i386.h" |