;; DFA-based pipeline descriptions for MIPS Technologies 24K core. ;; Contributed by Chao-ying Fu (fu@mips.com), Nigel Stephens (nigel@mips.com) ;; and David Ung (davidu@mips.com) ;; ;; The 24K is a single-issue processor with a half-clocked fpu. ;; The 24Kx is 24k with 1:1 clocked fpu. ;; ;; References: ;; “MIPS32 24K Processor Core Family Software User's Manual, Rev 3.04.” ;; ;; Copyright (C) 2005 Free Software Foundation, Inc. ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published ;; by the Free Software Foundation; either version 2, or (at your ;; option) any later version.
;; GCC is distributed in the hope that it will be useful, but WITHOUT ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ;; License for more details.
;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING. If not, write to the ;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, ;; MA 02110-1301, USA.
(define_automaton “r24k_cpu, r24k_mdu, r24k_fpu”)
;; Integer execution unit. (define_cpu_unit “r24k_iss” “r24k_cpu”) (define_cpu_unit “r24k_ixu_arith” “r24k_cpu”) (define_cpu_unit “r24k_mul3a” “r24k_mdu”) (define_cpu_unit “r24k_mul3b” “r24k_mdu”) (define_cpu_unit “r24k_mul3c” “r24k_mdu”)
;; -------------------------------------------------------------- ;; Producers ;; --------------------------------------------------------------
;; 1. Loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs (define_insn_reservation “r24k_int_load” 2 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “load”)) “r24k_iss+r24k_ixu_arith”)
;; 2. Arithmetic: add, addi, addiu, addiupc, addu, and, andi, clo, clz, ;; ext, ins, lui, movn, movz, nor, or, ori, rotr, rotrv, seb, seh, sll, ;; sllv, slt, slti, sltiu, sltu, sra, srav, srl, srlv, sub, subu, wsbh, ;; xor, xori ;; (movn/movz is not matched, we'll need to split condmov to ;; differentiate between integer/float moves) (define_insn_reservation “r24k_int_arith” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “arith,const,nop,shift,slt”)) “r24k_iss+r24k_ixu_arith”)
;; 3. Links: bgezal, bgezall, bltzal, bltzall, jal, jalr, jalx ;; 3a. jr/jalr consumer (define_insn_reservation “r24k_int_jump” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “call,jump”)) “r24k_iss+r24k_ixu_arith”)
;; 3b. branch consumer (define_insn_reservation “r24k_int_branch” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “branch”)) “r24k_iss+r24k_ixu_arith”)
;; 4. MDU: fully pipelined multiplier ;; mult - delivers result to hi/lo in 1 cycle (pipelined) (define_insn_reservation “r24k_int_mult” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “imul”)) “r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)”)
;; madd, msub - delivers result to hi/lo in 1 cycle (pipelined) (define_insn_reservation “r24k_int_madd” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “imadd”)) “r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)”)
;; mul - delivers result to gpr in 5 cycles (define_insn_reservation “r24k_int_mul3” 5 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “imul3”)) “r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)*5”)
;; mfhi, mflo, mflhxu - deliver result to gpr in 5 cycles (define_insn_reservation “r24k_int_mfhilo” 5 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “mfhilo”)) “r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)”)
;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass (define_insn_reservation “r24k_int_mthilo” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “mthilo”)) “r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)”)
;; div - default to 36 cycles for 32bit operands. Faster for 24bit, 16bit and ;; 8bit, but is tricky to identify. (define_insn_reservation “r24k_int_div” 36 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “idiv”)) “r24k_iss+(r24k_mul3a+r24k_mul3b+r24k_mul3c)*36”)
;; 5. Cop: cfc1, di, ei, mfc0, mtc0 ;; (Disabled until we add proper cop0 support) ;;(define_insn_reservation “r24k_int_cop” 3 ;; (and (eq_attr “cpu” “24k,24kx”) ;; (eq_attr “type” “cop0”)) ;; “r24k_iss+r24k_ixu_arith”)
;; 6. Store (define_insn_reservation “r24k_int_store” 1 (and (eq_attr “cpu” “24k,24kx”) (and (eq_attr “type” “store”) (eq_attr “mode” “!unknown”))) “r24k_iss+r24k_ixu_arith”)
;; 6.1 Special case - matches the cprestore pattern which don't set the mode ;; attrib. This avoids being set as r24k_int_store and have it checked ;; against store_data_bypass_p, which would then fail because cprestore ;; does not have a normal SET pattern. (define_insn_reservation “r24k_unknown_store” 1 (and (eq_attr “cpu” “24k,24kx”) (and (eq_attr “type” “store”) (eq_attr “mode” “unknown”))) “r24k_iss+r24k_ixu_arith”)
;; 7. Multiple instructions (define_insn_reservation “r24k_int_multi” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “multi”)) “r24k_iss+r24k_ixu_arith+r24k_fpu_arith+(r24k_mul3a+r24k_mul3b+r24k_mul3c)”)
;; 8. Unknowns - Currently these include blockage, consttable and alignment ;; rtls. They do not really affect scheduling latency, (blockage affects ;; scheduling via log links, but not used here). (define_insn_reservation “r24k_int_unknown” 0 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “unknown”)) “r24k_iss”)
;; 9. Prefetch (define_insn_reservation “r24k_int_prefetch” 1 (and (eq_attr “cpu” “24k,24kx”) (eq_attr “type” “prefetch,prefetchx”)) “r24k_iss+r24k_ixu_arith”)
;; -------------------------------------------------------------- ;; Bypass to Consumer ;; --------------------------------------------------------------
;; load->next use : 2 cycles (Default) ;; load->load base: 3 cycles ;; load->store base: 3 cycles ;; load->prefetch: 3 cycles (define_bypass 3 “r24k_int_load” “r24k_int_load”) (define_bypass 3 “r24k_int_load” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 3 “r24k_int_load” “r24k_int_prefetch”)
;; arith->next use : 1 cycles (Default) ;; arith->load base: 2 cycles ;; arith->store base: 2 cycles ;; arith->prefetch: 2 cycles (define_bypass 2 “r24k_int_arith” “r24k_int_load”) (define_bypass 2 “r24k_int_arith” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 2 “r24k_int_arith” “r24k_int_prefetch”)
;; mul3->next use : 5 cycles (default) ;; mul3->l/s base : 6 cycles ;; mul3->prefetch : 6 cycles (define_bypass 6 “r24k_int_mul3” “r24k_int_load”) (define_bypass 6 “r24k_int_mul3” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 6 “r24k_int_mul3” “r24k_int_prefetch”)
;; mfhilo->next use : 5 cycles (default) ;; mfhilo->l/s base : 6 cycles ;; mfhilo->prefetch : 6 cycles ;; mthilo->madd/msub : 2 cycle (only for mthi/lo not mfhi/lo) (define_bypass 6 “r24k_int_mfhilo” “r24k_int_load”) (define_bypass 6 “r24k_int_mfhilo” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 6 “r24k_int_mfhilo” “r24k_int_prefetch”) (define_bypass 2 “r24k_int_mthilo” “r24k_int_madd”)
;; cop->next use : 3 cycles (Default) ;; cop->l/s base : 4 cycles ;; (define_bypass 4 “r24k_int_cop” “r24k_int_load”) ;; (define_bypass 4 “r24k_int_cop” “r24k_int_store” “!store_data_bypass_p”)
;; multi->next use : 1 cycles (Default) ;; multi->l/s base : 2 cycles ;; multi->prefetch : 2 cycles (define_bypass 2 “r24k_int_multi” “r24k_int_load”) (define_bypass 2 “r24k_int_multi” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 2 “r24k_int_multi” “r24k_int_prefetch”)
;; -------------------------------------------------------------- ;; Floating Point Instructions ;; --------------------------------------------------------------
(define_cpu_unit “r24k_fpu_arith” “r24k_fpu”)
;; The 24k is a single issue cpu, and the fpu runs at half clock speed, ;; so each fpu instruction ties up the shared instruction scheduler for ;; 1 cycle, and the fpu scheduler for 2 cycles. ;; ;; These timings are therefore twice the values in the 24K manual, ;; which are quoted in fpu clocks. ;; ;; The 24kx is a 24k configured with 1:1 cpu and fpu, so use ;; the unscaled timings
(define_reservation “r24k_fpu_iss” “r24k_iss+(r24k_fpu_arith*2)”)
;; fadd, fabs, fneg (define_insn_reservation “r24k_fadd” 8 (and (eq_attr “cpu” “24k”) (eq_attr “type” “fadd,fabs,fneg”)) “r24k_fpu_iss”)
;; fmove, fcmove (define_insn_reservation “r24k_fmove” 8 (and (eq_attr “cpu” “24k”) (eq_attr “type” “fmove,condmove”)) “r24k_fpu_iss”)
;; fload (define_insn_reservation “r24k_fload” 6 (and (eq_attr “cpu” “24k”) (eq_attr “type” “fpload,fpidxload”)) “r24k_fpu_iss”)
;; fstore (define_insn_reservation “r24k_fstore” 2 (and (eq_attr “cpu” “24k”) (eq_attr “type” “fpstore”)) “r24k_fpu_iss”)
;; fmul, fmadd (define_insn_reservation “r24k_fmul_sf” 8 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “SF”))) “r24k_fpu_iss”)
(define_insn_reservation “r24k_fmul_df” 10 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “DF”))) “r24k_fpu_iss,(r24k_fpu_arith*2)”)
;; fdiv, fsqrt, frsqrt (define_insn_reservation “r24k_fdiv_sf” 34 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fdiv,fsqrt,frsqrt”) (eq_attr “mode” “SF”))) “r24k_fpu_iss,(r24k_fpu_arith*26)”)
(define_insn_reservation “r24k_fdiv_df” 64 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “DF”))) “r24k_fpu_iss,(r24k_fpu_arith*56)”)
;; frsqrt (define_insn_reservation “r24k_frsqrt_df” 70 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “DF”))) “r24k_fpu_iss,(r24k_fpu_arith*60)”)
;; fcmp (define_insn_reservation “r24k_fcmp” 4 (and (eq_attr “cpu” “24k”) (eq_attr “type” “fcmp”)) “r24k_fpu_iss”)
;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on the condition) (define_bypass 2 “r24k_fcmp” “r24k_fmove”)
;; fcvt (cvt.d.s, cvt.[sd].[wl]) (define_insn_reservation “r24k_fcvt_i2f_s2d” 8 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fcvt”) (eq_attr “cnv_mode” “I2S,I2D,S2D”))) “r24k_fpu_iss”)
;; fcvt (cvt.s.d) (define_insn_reservation “r24k_fcvt_s2d” 12 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fcvt”) (eq_attr “cnv_mode” “D2S”))) “r24k_fpu_iss”)
;; fcvt (cvt.[wl].[sd], etc) (define_insn_reservation “r24k_fcvt_f2i” 10 (and (eq_attr “cpu” “24k”) (and (eq_attr “type” “fcvt”) (eq_attr “cnv_mode” “S2I,D2I”))) “r24k_fpu_iss”)
;; fxfer (mfc1, mfhc1, mtc1, mthc1) (define_insn_reservation “r24k_fxfer” 4 (and (eq_attr “cpu” “24k”) (eq_attr “type” “xfer”)) “r24k_fpu_iss”)
;; -------------------------------------------------------------- ;; Bypass to Consumer ;; -------------------------------------------------------------- ;; r24k_fcvt_f2i->l/s base : 11 cycles ;; r24k_fcvt_f2i->prefetch : 11 cycles (define_bypass 11 “r24k_fcvt_f2i” “r24k_int_load”) (define_bypass 11 “r24k_fcvt_f2i” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 11 “r24k_fcvt_f2i” “r24k_int_prefetch”)
;; r24k_fxfer->l/s base : 5 cycles ;; r24k_fxfer->prefetch : 5 cycles (define_bypass 5 “r24k_fxfer” “r24k_int_load”) (define_bypass 5 “r24k_fxfer” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 5 “r24k_fxfer” “r24k_int_prefetch”)
;; -------------------------------------------------------------- ;; The 24kx is a 24k configured with 1:1 cpu and fpu, so use ;; the unscaled timings ;; --------------------------------------------------------------
(define_reservation “r24kx_fpu_iss” “r24k_iss+r24k_fpu_arith”)
;; fadd, fabs, fneg (define_insn_reservation “r24kx_fadd” 4 (and (eq_attr “cpu” “24kx”) (eq_attr “type” “fadd,fabs,fneg”)) “r24kx_fpu_iss”)
;; fmove, fcmove (define_insn_reservation “r24kx_fmove” 4 (and (eq_attr “cpu” “24kx”) (eq_attr “type” “fmove,condmove”)) “r24kx_fpu_iss”)
;; fload (define_insn_reservation “r24kx_fload” 3 (and (eq_attr “cpu” “24kx”) (eq_attr “type” “fpload,fpidxload”)) “r24kx_fpu_iss”)
;; fstore (define_insn_reservation “r24kx_fstore” 1 (and (eq_attr “cpu” “24kx”) (eq_attr “type” “fpstore”)) “r24kx_fpu_iss”)
;; fmul, fmadd (define_insn_reservation “r24kx_fmul_sf” 4 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “SF”))) “r24kx_fpu_iss”)
(define_insn_reservation “r24kx_fmul_df” 5 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “DF”))) “r24kx_fpu_iss,r24k_fpu_arith”)
;; fdiv, fsqrt, frsqrt (define_insn_reservation “r24kx_fdiv_sf” 17 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fdiv,fsqrt,frsqrt”) (eq_attr “mode” “SF”))) “r24kx_fpu_iss,(r24k_fpu_arith*13)”)
(define_insn_reservation “r24kx_fdiv_df” 32 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “DF”))) “r24kx_fpu_iss,(r24k_fpu_arith*28)”)
;; frsqrt (define_insn_reservation “r24kx_frsqrt_df” 35 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “DF”))) “r24kx_fpu_iss,(r24k_fpu_arith*30)”)
;; fcmp (define_insn_reservation “r24kx_fcmp” 2 (and (eq_attr “cpu” “24kx”) (eq_attr “type” “fcmp”)) “r24kx_fpu_iss”)
;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on the condition) (define_bypass 1 “r24kx_fcmp” “r24kx_fmove”)
;; fcvt (cvt.d.s, cvt.[sd].[wl]) (define_insn_reservation “r24kx_fcvt_i2f_s2d” 4 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fcvt”) (eq_attr “cnv_mode” “I2S,I2D,S2D”))) “r24kx_fpu_iss”)
;; fcvt (cvt.s.d) (define_insn_reservation “r24kx_fcvt_s2d” 6 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fcvt”) (eq_attr “cnv_mode” “D2S”))) “r24kx_fpu_iss”)
;; fcvt (cvt.[wl].[sd], etc) (define_insn_reservation “r24kx_fcvt_f2i” 5 (and (eq_attr “cpu” “24kx”) (and (eq_attr “type” “fcvt”) (eq_attr “cnv_mode” “S2I,D2I”))) “r24kx_fpu_iss”)
;; fxfer (mfc1, mfhc1, mtc1, mthc1) (define_insn_reservation “r24kx_fxfer” 2 (and (eq_attr “cpu” “24kx”) (eq_attr “type” “xfer”)) “r24kx_fpu_iss”)
;; -------------------------------------------------------------- ;; Bypass to Consumer ;; -------------------------------------------------------------- ;; r24kx_fcvt_f2i->l/s base : 6 cycles ;; r24kx_fcvt_f2i->prefetch : 6 cycles (define_bypass 6 “r24kx_fcvt_f2i” “r24k_int_load”) (define_bypass 6 “r24kx_fcvt_f2i” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 6 “r24kx_fcvt_f2i” “r24k_int_prefetch”)
;; r24kx_fxfer->l/s base : 3 cycles ;; r24kx_fxfer->prefetch : 3 cycles (define_bypass 3 “r24kx_fxfer” “r24k_int_load”) (define_bypass 3 “r24kx_fxfer” “r24k_int_store” “!store_data_bypass_p”) (define_bypass 3 “r24kx_fxfer” “r24k_int_prefetch”)