| //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines resources required by some of P9 instruction. This is part |
| // P9 processor model used for instruction scheduling. Not every instruction |
| // is listed here. Instructions in this file belong to itinerary classes that |
| // have instructions with different resource requirements. |
| // |
| // The makeup of the P9 CPU is modeled as follows: |
| // - Each CPU is made up of two superslices. |
| // - Each superslice is made up of two slices. Therefore, there are 4 slices |
| // for each CPU. |
| // - Up to 6 instructions can be dispatched to each CPU. Three per superslice. |
| // - Each CPU has: |
| // - One CY (Crypto) unit P9_CY_* |
| // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* |
| // - Two PM (Permute) units. One on each superslice. P9_PM_* |
| // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* |
| // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* |
| // - Four DP (Floating Point) units. One on each slice. P9_DP_* |
| // This also includes fixed point multiply add. |
| // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* |
| // - Four Load/Store Queues. P9_LS_* |
| // - Each set of instructions will require a number of these resources. |
| //===----------------------------------------------------------------------===// |
| |
| // Two cycle ALU vector operation that uses an entire superslice. |
| // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines |
| // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. |
| def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| VADDCUW, |
| VADDUBM, |
| VADDUDM, |
| VADDUHM, |
| VADDUWM, |
| VAND, |
| VANDC, |
| VCMPEQUB, |
| VCMPEQUD, |
| VCMPEQUH, |
| VCMPEQUW, |
| VCMPNEB, |
| VCMPNEH, |
| VCMPNEW, |
| VCMPNEZB, |
| VCMPNEZH, |
| VCMPNEZW, |
| VEQV, |
| VEXTSB2D, |
| VEXTSB2W, |
| VEXTSH2D, |
| VEXTSH2W, |
| VEXTSW2D, |
| VRLB, |
| VRLD, |
| VRLDMI, |
| VRLDNM, |
| VRLH, |
| VRLW, |
| VRLWMI, |
| VRLWNM, |
| VSRAB, |
| VSRAD, |
| VSRAH, |
| VSRAW, |
| VSRB, |
| VSRD, |
| VSRH, |
| VSRW, |
| VSLB, |
| VSLD, |
| VSLH, |
| VSLW, |
| VMRGEW, |
| VMRGOW, |
| VNAND, |
| VNEGD, |
| VNEGW, |
| VNOR, |
| VOR, |
| VORC, |
| VPOPCNTB, |
| VPOPCNTH, |
| VSEL, |
| VSUBUBM, |
| VSUBUDM, |
| VSUBUHM, |
| VSUBUWM, |
| VXOR, |
| V_SET0B, |
| V_SET0H, |
| V_SET0, |
| XVABSDP, |
| XVABSSP, |
| XVCPSGNDP, |
| XVCPSGNSP, |
| XVIEXPDP, |
| XVNABSDP, |
| XVNABSSP, |
| XVNEGDP, |
| XVNEGSP, |
| XVXEXPDP, |
| XVIEXPSP, |
| XVXEXPSP, |
| XXLAND, |
| XXLANDC, |
| XXLEQV, |
| XXLNAND, |
| XXLNOR, |
| XXLOR, |
| XXLORf, |
| XXLORC, |
| XXLXOR, |
| XXSEL, |
| XSABSQP, |
| XSCPSGNQP, |
| XSIEXPQP, |
| XSNABSQP, |
| XSNEGQP, |
| XSXEXPQP |
| )>; |
| |
| // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a |
| // slingle slice. However, since it is Restricted it requires all 3 dispatches |
| // (DISP) for that superslice. |
| def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FCMPUS, |
| FCMPUD, |
| XSTSTDCDP, |
| XSTSTDCSP |
| )>; |
| |
| // Standard Dispatch ALU operation for 3 cycles. Only one slice used. |
| def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSMAXCDP, |
| XSMAXDP, |
| XSMAXJDP, |
| XSMINCDP, |
| XSMINDP, |
| XSMINJDP, |
| XSTDIVDP, |
| XSTSQRTDP, |
| XSCMPEQDP, |
| XSCMPEXPDP, |
| XSCMPGEDP, |
| XSCMPGTDP, |
| XSCMPODP, |
| XSCMPUDP, |
| XSXSIGDP, |
| XSCVSPDPN |
| )>; |
| |
| // Standard Dispatch ALU operation for 2 cycles. Only one slice used. |
| def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], |
| (instrs |
| ADDIStocHA, |
| ADDItocL, |
| MCRF, |
| MCRXRX, |
| SLD, |
| SRD, |
| SRAD, |
| SRADI, |
| RLDIC, |
| XSNABSDP, |
| XSXEXPDP, |
| XSABSDP, |
| XSNEGDP, |
| XSCPSGNDP |
| )>; |
| |
| // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a |
| // slingle slice. However, since it is Restricted it requires all 3 dispatches |
| // (DISP) for that superslice. |
| def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| RLDCL, |
| RLDCR, |
| RLDIMI, |
| RLDICL, |
| RLDICR, |
| RLDICL_32_64, |
| XSIEXPDP, |
| FMR, |
| FABSD, |
| FABSS, |
| FNABSD, |
| FNABSS, |
| FNEGD, |
| FNEGS, |
| FCPSGND, |
| FCPSGNS |
| )>; |
| |
| // Three cycle ALU vector operation that uses an entire superslice. |
| // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines |
| // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. |
| def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| VBPERMD, |
| VABSDUB, |
| VABSDUH, |
| VABSDUW, |
| VADDUBS, |
| VADDUHS, |
| VADDUWS, |
| VAVGSB, |
| VAVGSH, |
| VAVGSW, |
| VAVGUB, |
| VAVGUH, |
| VAVGUW, |
| VCMPEQFP, |
| VCMPEQFPo, |
| VCMPGEFP, |
| VCMPGEFPo, |
| VCMPBFP, |
| VCMPBFPo, |
| VCMPGTFP, |
| VCMPGTFPo, |
| VCLZB, |
| VCLZD, |
| VCLZH, |
| VCLZW, |
| VCTZB, |
| VCTZD, |
| VCTZH, |
| VCTZW, |
| VADDSBS, |
| VADDSHS, |
| VADDSWS, |
| VMINFP, |
| VMINSB, |
| VMINSD, |
| VMINSH, |
| VMINSW, |
| VMINUB, |
| VMINUD, |
| VMINUH, |
| VMINUW, |
| VMAXFP, |
| VMAXSB, |
| VMAXSD, |
| VMAXSH, |
| VMAXSW, |
| VMAXUB, |
| VMAXUD, |
| VMAXUH, |
| VMAXUW, |
| VPOPCNTW, |
| VPOPCNTD, |
| VPRTYBD, |
| VPRTYBW, |
| VSHASIGMAD, |
| VSHASIGMAW, |
| VSUBSBS, |
| VSUBSHS, |
| VSUBSWS, |
| VSUBUBS, |
| VSUBUHS, |
| VSUBUWS, |
| VSUBCUW, |
| VCMPGTSB, |
| VCMPGTSBo, |
| VCMPGTSD, |
| VCMPGTSDo, |
| VCMPGTSH, |
| VCMPGTSHo, |
| VCMPGTSW, |
| VCMPGTSWo, |
| VCMPGTUB, |
| VCMPGTUBo, |
| VCMPGTUD, |
| VCMPGTUDo, |
| VCMPGTUH, |
| VCMPGTUHo, |
| VCMPGTUW, |
| VCMPGTUWo, |
| VCMPNEBo, |
| VCMPNEHo, |
| VCMPNEWo, |
| VCMPNEZBo, |
| VCMPNEZHo, |
| VCMPNEZWo, |
| VCMPEQUBo, |
| VCMPEQUDo, |
| VCMPEQUHo, |
| VCMPEQUWo, |
| XVCMPEQDP, |
| XVCMPEQDPo, |
| XVCMPEQSP, |
| XVCMPEQSPo, |
| XVCMPGEDP, |
| XVCMPGEDPo, |
| XVCMPGESP, |
| XVCMPGESPo, |
| XVCMPGTDP, |
| XVCMPGTDPo, |
| XVCMPGTSP, |
| XVCMPGTSPo, |
| XVMAXDP, |
| XVMAXSP, |
| XVMINDP, |
| XVMINSP, |
| XVTDIVDP, |
| XVTDIVSP, |
| XVTSQRTDP, |
| XVTSQRTSP, |
| XVTSTDCDP, |
| XVTSTDCSP, |
| XVXSIGDP, |
| XVXSIGSP |
| )>; |
| |
| // 7 cycle DP vector operation that uses an entire superslice. |
| // Uses both DP units (the even DPE and odd DPO units), two pipelines |
| // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. |
| def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| VADDFP, |
| VCTSXS, |
| VCTSXS_0, |
| VCTUXS, |
| VCTUXS_0, |
| VEXPTEFP, |
| VLOGEFP, |
| VMADDFP, |
| VMHADDSHS, |
| VNMSUBFP, |
| VREFP, |
| VRFIM, |
| VRFIN, |
| VRFIP, |
| VRFIZ, |
| VRSQRTEFP, |
| VSUBFP, |
| XVADDDP, |
| XVADDSP, |
| XVCVDPSP, |
| XVCVDPSXDS, |
| XVCVDPSXWS, |
| XVCVDPUXDS, |
| XVCVDPUXWS, |
| XVCVHPSP, |
| XVCVSPDP, |
| XVCVSPHP, |
| XVCVSPSXDS, |
| XVCVSPSXWS, |
| XVCVSPUXDS, |
| XVCVSPUXWS, |
| XVCVSXDDP, |
| XVCVSXDSP, |
| XVCVSXWDP, |
| XVCVSXWSP, |
| XVCVUXDDP, |
| XVCVUXDSP, |
| XVCVUXWDP, |
| XVCVUXWSP, |
| XVMADDADP, |
| XVMADDASP, |
| XVMADDMDP, |
| XVMADDMSP, |
| XVMSUBADP, |
| XVMSUBASP, |
| XVMSUBMDP, |
| XVMSUBMSP, |
| XVMULDP, |
| XVMULSP, |
| XVNMADDADP, |
| XVNMADDASP, |
| XVNMADDMDP, |
| XVNMADDMSP, |
| XVNMSUBADP, |
| XVNMSUBASP, |
| XVNMSUBMDP, |
| XVNMSUBMSP, |
| XVRDPI, |
| XVRDPIC, |
| XVRDPIM, |
| XVRDPIP, |
| XVRDPIZ, |
| XVREDP, |
| XVRESP, |
| XVRSPI, |
| XVRSPIC, |
| XVRSPIM, |
| XVRSPIP, |
| XVRSPIZ, |
| XVRSQRTEDP, |
| XVRSQRTESP, |
| XVSUBDP, |
| XVSUBSP, |
| VCFSX, |
| VCFSX_0, |
| VCFUX, |
| VCFUX_0, |
| VMHRADDSHS, |
| VMLADDUHM, |
| VMSUMMBM, |
| VMSUMSHM, |
| VMSUMSHS, |
| VMSUMUBM, |
| VMSUMUHM, |
| VMSUMUHS, |
| VMULESB, |
| VMULESH, |
| VMULESW, |
| VMULEUB, |
| VMULEUH, |
| VMULEUW, |
| VMULOSB, |
| VMULOSH, |
| VMULOSW, |
| VMULOUB, |
| VMULOUH, |
| VMULOUW, |
| VMULUWM, |
| VSUM2SWS, |
| VSUM4SBS, |
| VSUM4SHS, |
| VSUM4UBS, |
| VSUMSWS |
| )>; |
| |
| // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three |
| // dispatch units for the superslice. |
| def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FRSP, |
| FRIND, |
| FRINS, |
| FRIPD, |
| FRIPS, |
| FRIZD, |
| FRIZS, |
| FRIMD, |
| FRIMS, |
| FRE, |
| FRES, |
| FRSQRTE, |
| FRSQRTES, |
| FMADDS, |
| FMADD, |
| FMSUBS, |
| FMSUB, |
| FNMADDS, |
| FNMADD, |
| FNMSUBS, |
| FNMSUB, |
| FSELD, |
| FSELS, |
| FADDS, |
| FMULS, |
| FMUL, |
| FSUBS, |
| FCFID, |
| FCTID, |
| FCTIDZ, |
| FCFIDU, |
| FCFIDS, |
| FCFIDUS, |
| FCTIDUZ, |
| FCTIWUZ, |
| FCTIW, |
| FCTIWZ, |
| XSMADDADP, |
| XSMADDASP, |
| XSMADDMDP, |
| XSMADDMSP, |
| XSMSUBADP, |
| XSMSUBASP, |
| XSMSUBMDP, |
| XSMSUBMSP, |
| XSMULDP, |
| XSMULSP, |
| XSNMADDADP, |
| XSNMADDASP, |
| XSNMADDMDP, |
| XSNMADDMSP, |
| XSNMSUBADP, |
| XSNMSUBASP, |
| XSNMSUBMDP, |
| XSNMSUBMSP |
| )>; |
| |
| // 7 cycle Restricted DP operation and one 2 cycle ALU operation. |
| // The DP is restricted so we need a full 5 dispatches. |
| def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FMULo, |
| FMADDo, |
| FMSUBo, |
| FNMADDo, |
| FNMSUBo |
| )>; |
| |
| // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units. |
| def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSADDDP, |
| XSADDSP, |
| XSCVDPHP, |
| XSCVDPSP, |
| XSCVDPSXDS, |
| XSCVDPSXDSs, |
| XSCVDPSXWS, |
| XSCVDPUXDS, |
| XSCVDPUXDSs, |
| XSCVDPUXWS, |
| XSCVHPDP, |
| XSCVSPDP, |
| XSCVSXDDP, |
| XSCVSXDSP, |
| XSCVUXDDP, |
| XSCVUXDSP, |
| XSRDPI, |
| XSRDPIC, |
| XSRDPIM, |
| XSRDPIP, |
| XSRDPIZ, |
| XSREDP, |
| XSRESP, |
| //XSRSP, |
| XSRSQRTEDP, |
| XSRSQRTESP, |
| XSSUBDP, |
| XSSUBSP, |
| XSCVDPSPN |
| )>; |
| |
| // Three Cycle PM operation. Only one PM unit per superslice so we use the whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| VBPERMQ, |
| VCLZLSBB, |
| VCTZLSBB, |
| VEXTRACTD, |
| VEXTRACTUB, |
| VEXTRACTUH, |
| VEXTRACTUW, |
| VEXTUBLX, |
| VEXTUBRX, |
| VEXTUHLX, |
| VEXTUHRX, |
| VEXTUWLX, |
| VEXTUWRX, |
| VGBBD, |
| VINSERTB, |
| VINSERTD, |
| VINSERTH, |
| VINSERTW, |
| VMRGHB, |
| VMRGHH, |
| VMRGHW, |
| VMRGLB, |
| VMRGLH, |
| VMRGLW, |
| VPERM, |
| VPERMR, |
| VPERMXOR, |
| VPKPX, |
| VPKSDSS, |
| VPKSDUS, |
| VPKSHSS, |
| VPKSHUS, |
| VPKSWSS, |
| VPKSWUS, |
| VPKUDUM, |
| VPKUDUS, |
| VPKUHUM, |
| VPKUHUS, |
| VPKUWUM, |
| VPKUWUS, |
| VPRTYBQ, |
| VSL, |
| VSLDOI, |
| VSLO, |
| VSLV, |
| VSPLTB, |
| VSPLTBs, |
| VSPLTH, |
| VSPLTHs, |
| VSPLTISB, |
| VSPLTISH, |
| VSPLTISW, |
| VSPLTW, |
| VSR, |
| VSRO, |
| VSRV, |
| VUPKHPX, |
| VUPKHSB, |
| VUPKHSH, |
| VUPKHSW, |
| VUPKLPX, |
| VUPKLSB, |
| VUPKLSH, |
| VUPKLSW, |
| XXBRD, |
| XXBRH, |
| XXBRQ, |
| XXBRW, |
| XXEXTRACTUW, |
| XXINSERTW, |
| XXMRGHW, |
| XXMRGLW, |
| XXPERM, |
| XXPERMR, |
| XXSLDWI, |
| XXSPLTIB, |
| XXSPLTW, |
| XXSPLTWs, |
| XXPERMDI, |
| XXPERMDIs, |
| VADDCUQ, |
| VADDECUQ, |
| VADDEUQM, |
| VADDUQM, |
| VMUL10CUQ, |
| VMUL10ECUQ, |
| VMUL10EUQ, |
| VMUL10UQ, |
| VSUBCUQ, |
| VSUBECUQ, |
| VSUBEUQM, |
| VSUBUQM, |
| XSCMPEXPQP, |
| XSCMPOQP, |
| XSCMPUQP, |
| XSTSTDCQP, |
| XSXSIGQP |
| )>; |
| |
| // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSADDQP, |
| XSADDQPO, |
| XSCVDPQP, |
| XSCVQPDP, |
| XSCVQPDPO, |
| XSCVQPSDZ, |
| XSCVQPSWZ, |
| XSCVQPUDZ, |
| XSCVQPUWZ, |
| XSCVSDQP, |
| XSCVUDQP, |
| XSRQPI, |
| XSRQPXP, |
| XSSUBQP, |
| XSSUBQPO |
| )>; |
| |
| // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSMADDQP, |
| XSMADDQPO, |
| XSMSUBQP, |
| XSMSUBQPO, |
| XSMULQP, |
| XSMULQPO, |
| XSNMADDQP, |
| XSNMADDQPO, |
| XSNMSUBQP, |
| XSNMSUBQPO |
| )>; |
| |
| // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSDIVQP, |
| XSDIVQPO |
| )>; |
| |
| // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSSQRTQP, |
| XSSQRTQPO |
| )>; |
| |
| // 5 Cycle load uses a single slice. |
| def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], |
| (instrs |
| LXSDX, |
| LXVD2X, |
| LXSIWZX, |
| LXV, |
| LXVX, |
| LXSD, |
| DFLOADf64 |
| )>; |
| |
| // 4 Cycle load uses a single slice. |
| def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], |
| (instrs |
| COPY |
| )>; |
| |
| // 4 Cycle Restricted load uses a single slice but the dispatch for the whole |
| // superslice. |
| def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| LFIWZX, |
| LFDX, |
| LFD |
| )>; |
| |
| // Cracked Restricted Load instruction. |
| // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU |
| // operations cannot be done at the same time and so their latencies are added. |
| // Full 6 dispatches are required as this is both cracked and restricted. |
| def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| LFIWAX, |
| LFSX, |
| LFS |
| )>; |
| |
| // Cracked Load instruction. |
| // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU |
| // operations cannot be done at the same time and so their latencies are added. |
| // Full 4 dispatches are required as this is a cracked instruction. |
| def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| LXSSPX, |
| LXSIWAX, |
| LXSSP, |
| DFLOADf32 |
| )>; |
| |
| // Cracked Load that requires the PM resource. |
| // Since the Load and the PM cannot be done at the same time the latencies are |
| // added. Requires 8 cycles. |
| // Since the PM requires the full superslice we need both EXECE, EXECO pipelines |
| // as well as 3 dispatches for the PM. The Load requires the remaining 2 |
| // dispatches. |
| def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| LXVDSX, |
| LXVWSX, |
| LXVW4X |
| )>; |
| |
| // Single slice Restricted store operation. The restricted operation requires |
| // all three dispatches for the superslice. |
| def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| STFS, |
| STFD, |
| STFIWX, |
| STFSX, |
| STFDX, |
| STXSDX, |
| STXSSPX, |
| STXSIWX, |
| DFSTOREf32, |
| DFSTOREf64 |
| )>; |
| |
| // Store operation that requires the whole superslice. |
| def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| STXVD2X, |
| STXVW4X |
| )>; |
| |
| |
| // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| DIVW, |
| DIVWU, |
| MODSW |
| )>; |
| |
| // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| DIVWE, |
| DIVD, |
| DIVWEU, |
| DIVDU, |
| MODSD, |
| MODUD, |
| MODUW |
| )>; |
| |
| // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| DIVDE, |
| DIVDEU |
| )>; |
| |
| // Cracked DIV and ALU operation. Requires one full slice for the ALU operation |
| // and one full superslice for the DIV operation since there is only one DIV |
| // per superslice. Latency of DIV plus ALU is 26. |
| def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| DIVDo, |
| DIVDUo, |
| DIVWEo, |
| DIVWEUo |
| )>; |
| |
| // Cracked DIV and ALU operation. Requires one full slice for the ALU operation |
| // and one full superslice for the DIV operation since there is only one DIV |
| // per superslice. Latency of DIV plus ALU is 42. |
| def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| DIVDEo, |
| DIVDEUo |
| )>; |
| |
| // CR access instructions in _BrMCR, IIC_BrMCRX. |
| |
| // Cracked, restricted, ALU operations. |
| // Here the two ALU ops can actually be done in parallel and therefore the |
| // latencies are not added together. Otherwise this is like having two |
| // instructions running together on two pipelines and 6 dispatches. |
| // ALU ops are 2 cycles each. |
| def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| MTOCRF, |
| MTOCRF8, |
| MTCRF, |
| MTCRF8 |
| )>; |
| |
| // Cracked, restricted, ALU operations. |
| // Here the two ALU ops can actually be done in parallel and therefore the |
| // latencies are not added together. Otherwise this is like having two |
| // instructions running together on two pipelines and 6 dispatches. |
| // ALU ops are 3 cycles each. |
| def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| MCRFS |
| )>; |
| |
| // FP Div instructions in IIC_FPDivD and IIC_FPDivS. |
| |
| // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. |
| def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FDIV |
| )>; |
| |
| // 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. |
| def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FDIVo |
| )>; |
| |
| // 33 Cycle DP Instruction. Takes one slice and 2 dispatches. |
| def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSDIVDP |
| )>; |
| |
| // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. |
| def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FDIVS |
| )>; |
| |
| // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. |
| def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| FDIVSo |
| )>; |
| |
| // 22 Cycle DP Instruction. Takes one slice and 2 dispatches. |
| def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C], |
| (instrs |
| XSDIVSP |
| )>; |
| |
| // 24 Cycle DP Vector Instruction. Takes one full superslice. |
| // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given |
| // superslice. |
| def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| XVDIVSP |
| )>; |
| |
| // 33 Cycle DP Vector Instruction. Takes one full superslice. |
| // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given |
| // superslice. |
| def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, |
| DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| XVDIVDP |
| )>; |
| |
| // Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX. |
| |
| // Instruction cracked into three pieces. One Load and two ALU operations. |
| // The Load and one of the ALU ops cannot be run at the same time and so the |
| // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. |
| // Both the load and the ALU that depends on it are restricted and so they take |
| // a total of 6 dispatches. The final 2 dispatches come from the second ALU op. |
| // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. |
| def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, |
| IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| LFSU, |
| LFSUX |
| )>; |
| |
| // Cracked instruction made up of a Load and an ALU. The ALU does not depend on |
| // the load and so it can be run at the same time as the load. The load is also |
| // restricted. 3 dispatches are from the restricted load while the other two |
| // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline |
| // is required for the ALU. |
| def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, |
| DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| LFDU, |
| LFDUX |
| )>; |
| |
| // Crypto Instructions |
| |
| // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole |
| // superslice. That includes both exec pipelines (EXECO, EXECE) and all three |
| // dispatches. |
| def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], |
| (instrs |
| VPMSUMB, |
| VPMSUMD, |
| VPMSUMH, |
| VPMSUMW, |
| VCIPHER, |
| VCIPHERLAST, |
| VNCIPHER, |
| VNCIPHERLAST, |
| VSBOX |
| )>; |