lib/Target/PowerPC/P9InstrResources.td - llvm - Git at Google

 //===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines resources required by some of P9 instruction. This is part
 // P9 processor model used for instruction scheduling. Not every instruction
 // is listed here. Instructions in this file belong to itinerary classes that
 // have instructions with different resource requirements.
 //
 // The makeup of the P9 CPU is modeled as follows:
 //   - Each CPU is made up of two superslices.
 //   - Each superslice is made up of two slices. Therefore, there are 4 slices
 //      for each CPU.
 //   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
 //   - Each CPU has:
 //     - One CY (Crypto) unit P9_CY_*
 //     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
 //     - Two PM (Permute) units. One on each superslice. P9_PM_*
 //     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
 //     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
 //     - Four DP (Floating Point) units. One on each slice. P9_DP_*
 //       This also includes fixed point multiply add.
 //     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
 //     - Four Load/Store Queues. P9_LS_*
 //   - Each set of instructions will require a number of these resources.
 //===----------------------------------------------------------------------===//

 // Two cycle ALU vector operation that uses an entire superslice.
 //  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
 //  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VADDCUW,
     VADDUBM,
     VADDUDM,
     VADDUHM,
     VADDUWM,
     VAND,
     VANDC,
     VCMPEQUB,
     VCMPEQUD,
     VCMPEQUH,
     VCMPEQUW,
     VCMPNEB,
     VCMPNEH,
     VCMPNEW,
     VCMPNEZB,
     VCMPNEZH,
     VCMPNEZW,
     VEQV,
     VEXTSB2D,
     VEXTSB2W,
     VEXTSH2D,
     VEXTSH2W,
     VEXTSW2D,
     VRLB,
     VRLD,
     VRLDMI,
     VRLDNM,
     VRLH,
     VRLW,
     VRLWMI,
     VRLWNM,
     VSRAB,
     VSRAD,
     VSRAH,
     VSRAW,
     VSRB,
     VSRD,
     VSRH,
     VSRW,
     VSLB,
     VSLD,
     VSLH,
     VSLW,
     VMRGEW,
     VMRGOW,
     VNAND,
     VNEGD,
     VNEGW,
     VNOR,
     VOR,
     VORC,
     VPOPCNTB,
     VPOPCNTH,
     VSEL,
     VSUBUBM,
     VSUBUDM,
     VSUBUHM,
     VSUBUWM,
     VXOR,
     V_SET0B,
     V_SET0H,
     V_SET0,
     XVABSDP,
     XVABSSP,
     XVCPSGNDP,
     XVCPSGNSP,
     XVIEXPDP,
     XVNABSDP,
     XVNABSSP,
     XVNEGDP,
     XVNEGSP,
     XVXEXPDP,
     XVIEXPSP,
     XVXEXPSP,
     XXLAND,
     XXLANDC,
     XXLEQV,
     XXLNAND,
     XXLNOR,
     XXLOR,
     XXLORf,
     XXLORC,
     XXLXOR,
     XXSEL,
     XSABSQP,
     XSCPSGNQP,
     XSIEXPQP,
     XSNABSQP,
     XSNEGQP,
     XSXEXPQP
 )>;

 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
 //  slingle slice. However, since it is Restricted it requires all 3 dispatches
 //  (DISP) for that superslice.
 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FCMPUS,
     FCMPUD,
     XSTSTDCDP,
     XSTSTDCSP
 )>;

 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     XSMAXCDP,
     XSMAXDP,
     XSMAXJDP,
     XSMINCDP,
     XSMINDP,
     XSMINJDP,
     XSTDIVDP,
     XSTSQRTDP,
     XSCMPEQDP,
     XSCMPEXPDP,
     XSCMPGEDP,
     XSCMPGTDP,
     XSCMPODP,
     XSCMPUDP,
     XSXSIGDP,
     XSCVSPDPN
 )>;

 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     ADDIStocHA,
     ADDItocL,
     MCRF,
     MCRXRX,
     SLD,
     SRD,
     SRAD,
     SRADI,
     RLDIC,
     XSNABSDP,
     XSXEXPDP,
     XSABSDP,
     XSNEGDP,
     XSCPSGNDP
 )>;

 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
 //  slingle slice. However, since it is Restricted it requires all 3 dispatches
 //  (DISP) for that superslice.
 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     RLDCL,
     RLDCR,
     RLDIMI,
     RLDICL,
     RLDICR,
     RLDICL_32_64,
     XSIEXPDP,
     FMR,
     FABSD,
     FABSS,
     FNABSD,
     FNABSS,
     FNEGD,
     FNEGS,
     FCPSGND,
     FCPSGNS
 )>;

 // Three cycle ALU vector operation that uses an entire superslice.
 //  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
 //  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VBPERMD,
     VABSDUB,
     VABSDUH,
     VABSDUW,
     VADDUBS,
     VADDUHS,
     VADDUWS,
     VAVGSB,
     VAVGSH,
     VAVGSW,
     VAVGUB,
     VAVGUH,
     VAVGUW,
     VCMPEQFP,
     VCMPEQFPo,
     VCMPGEFP,
     VCMPGEFPo,
     VCMPBFP,
     VCMPBFPo,
     VCMPGTFP,
     VCMPGTFPo,
     VCLZB,
     VCLZD,
     VCLZH,
     VCLZW,
     VCTZB,
     VCTZD,
     VCTZH,
     VCTZW,
     VADDSBS,
     VADDSHS,
     VADDSWS,
     VMINFP,
     VMINSB,
     VMINSD,
     VMINSH,
     VMINSW,
     VMINUB,
     VMINUD,
     VMINUH,
     VMINUW,
     VMAXFP,
     VMAXSB,
     VMAXSD,
     VMAXSH,
     VMAXSW,
     VMAXUB,
     VMAXUD,
     VMAXUH,
     VMAXUW,
     VPOPCNTW,
     VPOPCNTD,
     VPRTYBD,
     VPRTYBW,
     VSHASIGMAD,
     VSHASIGMAW,
     VSUBSBS,
     VSUBSHS,
     VSUBSWS,
     VSUBUBS,
     VSUBUHS,
     VSUBUWS,
     VSUBCUW,
     VCMPGTSB,
     VCMPGTSBo,
     VCMPGTSD,
     VCMPGTSDo,
     VCMPGTSH,
     VCMPGTSHo,
     VCMPGTSW,
     VCMPGTSWo,
     VCMPGTUB,
     VCMPGTUBo,
     VCMPGTUD,
     VCMPGTUDo,
     VCMPGTUH,
     VCMPGTUHo,
     VCMPGTUW,
     VCMPGTUWo,
     VCMPNEBo,
     VCMPNEHo,
     VCMPNEWo,
     VCMPNEZBo,
     VCMPNEZHo,
     VCMPNEZWo,
     VCMPEQUBo,
     VCMPEQUDo,
     VCMPEQUHo,
     VCMPEQUWo,
     XVCMPEQDP,
     XVCMPEQDPo,
     XVCMPEQSP,
     XVCMPEQSPo,
     XVCMPGEDP,
     XVCMPGEDPo,
     XVCMPGESP,
     XVCMPGESPo,
     XVCMPGTDP,
     XVCMPGTDPo,
     XVCMPGTSP,
     XVCMPGTSPo,
     XVMAXDP,
     XVMAXSP,
     XVMINDP,
     XVMINSP,
     XVTDIVDP,
     XVTDIVSP,
     XVTSQRTDP,
     XVTSQRTSP,
     XVTSTDCDP,
     XVTSTDCSP,
     XVXSIGDP,
     XVXSIGSP
 )>;

 // 7 cycle DP vector operation that uses an entire superslice.
 //  Uses both DP units (the even DPE and odd DPO units), two pipelines
 //  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VADDFP,
     VCTSXS,
     VCTSXS_0,
     VCTUXS,
     VCTUXS_0,
     VEXPTEFP,
     VLOGEFP,
     VMADDFP,
     VMHADDSHS,
     VNMSUBFP,
     VREFP,
     VRFIM,
     VRFIN,
     VRFIP,
     VRFIZ,
     VRSQRTEFP,
     VSUBFP,
     XVADDDP,
     XVADDSP,
     XVCVDPSP,
     XVCVDPSXDS,
     XVCVDPSXWS,
     XVCVDPUXDS,
     XVCVDPUXWS,
     XVCVHPSP,
     XVCVSPDP,
     XVCVSPHP,
     XVCVSPSXDS,
     XVCVSPSXWS,
     XVCVSPUXDS,
     XVCVSPUXWS,
     XVCVSXDDP,
     XVCVSXDSP,
     XVCVSXWDP,
     XVCVSXWSP,
     XVCVUXDDP,
     XVCVUXDSP,
     XVCVUXWDP,
     XVCVUXWSP,
     XVMADDADP,
     XVMADDASP,
     XVMADDMDP,
     XVMADDMSP,
     XVMSUBADP,
     XVMSUBASP,
     XVMSUBMDP,
     XVMSUBMSP,
     XVMULDP,
     XVMULSP,
     XVNMADDADP,
     XVNMADDASP,
     XVNMADDMDP,
     XVNMADDMSP,
     XVNMSUBADP,
     XVNMSUBASP,
     XVNMSUBMDP,
     XVNMSUBMSP,
     XVRDPI,
     XVRDPIC,
     XVRDPIM,
     XVRDPIP,
     XVRDPIZ,
     XVREDP,
     XVRESP,
     XVRSPI,
     XVRSPIC,
     XVRSPIM,
     XVRSPIP,
     XVRSPIZ,
     XVRSQRTEDP,
     XVRSQRTESP,
     XVSUBDP,
     XVSUBSP,
     VCFSX,
     VCFSX_0,
     VCFUX,
     VCFUX_0,
     VMHRADDSHS,
     VMLADDUHM,
     VMSUMMBM,
     VMSUMSHM,
     VMSUMSHS,
     VMSUMUBM,
     VMSUMUHM,
     VMSUMUHS,
     VMULESB,
     VMULESH,
     VMULESW,
     VMULEUB,
     VMULEUH,
     VMULEUW,
     VMULOSB,
     VMULOSH,
     VMULOSW,
     VMULOUB,
     VMULOUH,
     VMULOUW,
     VMULUWM,
     VSUM2SWS,
     VSUM4SBS,
     VSUM4SHS,
     VSUM4UBS,
     VSUMSWS
 )>;

 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
 //  dispatch units for the superslice.
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FRSP,
     FRIND,
     FRINS,
     FRIPD,
     FRIPS,
     FRIZD,
     FRIZS,
     FRIMD,
     FRIMS,
     FRE,
     FRES,
     FRSQRTE,
     FRSQRTES,
     FMADDS,
     FMADD,
     FMSUBS,
     FMSUB,
     FNMADDS,
     FNMADD,
     FNMSUBS,
     FNMSUB,
     FSELD,
     FSELS,
     FADDS,
     FMULS,
     FMUL,
     FSUBS,
     FCFID,
     FCTID,
     FCTIDZ,
     FCFIDU,
     FCFIDS,
     FCFIDUS,
     FCTIDUZ,
     FCTIWUZ,
     FCTIW,
     FCTIWZ,
     XSMADDADP,
     XSMADDASP,
     XSMADDMDP,
     XSMADDMSP,
     XSMSUBADP,
     XSMSUBASP,
     XSMSUBMDP,
     XSMSUBMSP,
     XSMULDP,
     XSMULSP,
     XSNMADDADP,
     XSNMADDASP,
     XSNMADDMDP,
     XSNMADDMSP,
     XSNMSUBADP,
     XSNMSUBASP,
     XSNMSUBMDP,
     XSNMSUBMSP
 )>;

 // 7 cycle Restricted DP operation and one 2 cycle ALU operation.
 //  The DP is restricted so we need a full 5 dispatches.
 def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FMULo,
     FMADDo,
     FMSUBo,
     FNMADDo,
     FNMSUBo
 )>;

 // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     XSADDDP,
     XSADDSP,
     XSCVDPHP,
     XSCVDPSP,
     XSCVDPSXDS,
     XSCVDPSXDSs,
     XSCVDPSXWS,
     XSCVDPUXDS,
     XSCVDPUXDSs,
     XSCVDPUXWS,
     XSCVHPDP,
     XSCVSPDP,
     XSCVSXDDP,
     XSCVSXDSP,
     XSCVUXDDP,
     XSCVUXDSP,
     XSRDPI,
     XSRDPIC,
     XSRDPIM,
     XSRDPIP,
     XSRDPIZ,
     XSREDP,
     XSRESP,
     //XSRSP,
     XSRSQRTEDP,
     XSRSQRTESP,
     XSSUBDP,
     XSSUBSP,
     XSCVDPSPN
 )>;

 // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     VBPERMQ,
     VCLZLSBB,
     VCTZLSBB,
     VEXTRACTD,
     VEXTRACTUB,
     VEXTRACTUH,
     VEXTRACTUW,
     VEXTUBLX,
     VEXTUBRX,
     VEXTUHLX,
     VEXTUHRX,
     VEXTUWLX,
     VEXTUWRX,
     VGBBD,
     VINSERTB,
     VINSERTD,
     VINSERTH,
     VINSERTW,
     VMRGHB,
     VMRGHH,
     VMRGHW,
     VMRGLB,
     VMRGLH,
     VMRGLW,
     VPERM,
     VPERMR,
     VPERMXOR,
     VPKPX,
     VPKSDSS,
     VPKSDUS,
     VPKSHSS,
     VPKSHUS,
     VPKSWSS,
     VPKSWUS,
     VPKUDUM,
     VPKUDUS,
     VPKUHUM,
     VPKUHUS,
     VPKUWUM,
     VPKUWUS,
     VPRTYBQ,
     VSL,
     VSLDOI,
     VSLO,
     VSLV,
     VSPLTB,
     VSPLTBs,
     VSPLTH,
     VSPLTHs,
     VSPLTISB,
     VSPLTISH,
     VSPLTISW,
     VSPLTW,
     VSR,
     VSRO,
     VSRV,
     VUPKHPX,
     VUPKHSB,
     VUPKHSH,
     VUPKHSW,
     VUPKLPX,
     VUPKLSB,
     VUPKLSH,
     VUPKLSW,
     XXBRD,
     XXBRH,
     XXBRQ,
     XXBRW,
     XXEXTRACTUW,
     XXINSERTW,
     XXMRGHW,
     XXMRGLW,
     XXPERM,
     XXPERMR,
     XXSLDWI,
     XXSPLTIB,
     XXSPLTW,
     XXSPLTWs,
     XXPERMDI,
     XXPERMDIs,
     VADDCUQ,
     VADDECUQ,
     VADDEUQM,
     VADDUQM,
     VMUL10CUQ,
     VMUL10ECUQ,
     VMUL10EUQ,
     VMUL10UQ,
     VSUBCUQ,
     VSUBECUQ,
     VSUBEUQM,
     VSUBUQM,
     XSCMPEXPQP,
     XSCMPOQP,
     XSCMPUQP,
     XSTSTDCQP,
     XSXSIGQP
 )>;

 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSADDQP,
     XSADDQPO,
     XSCVDPQP,
     XSCVQPDP,
     XSCVQPDPO,
     XSCVQPSDZ,
     XSCVQPSWZ,
     XSCVQPUDZ,
     XSCVQPUWZ,
     XSCVSDQP,
     XSCVUDQP,
     XSRQPI,
     XSRQPXP,
     XSSUBQP,
     XSSUBQPO
 )>;

 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSMADDQP,
     XSMADDQPO,
     XSMSUBQP,
     XSMSUBQPO,
     XSMULQP,
     XSMULQPO,
     XSNMADDQP,
     XSNMADDQPO,
     XSNMSUBQP,
     XSNMSUBQPO
 )>;

 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSDIVQP,
     XSDIVQPO
 )>;

 // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XSSQRTQP,
     XSSQRTQPO
 )>;

 // 5 Cycle load uses a single slice.
 def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
     LXSDX,
     LXVD2X,
     LXSIWZX,
     LXV,
     LXVX,
     LXSD,
     DFLOADf64
 )>;

 // 4 Cycle load uses a single slice.
 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
     COPY
 )>;

 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
 //  superslice.
 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LFIWZX,
     LFDX,
     LFD
 )>;

 // Cracked Restricted Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
 // Full 6 dispatches are required as this is both cracked and restricted.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LFIWAX,
     LFSX,
     LFS
 )>;

 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
 def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LXSSPX,
     LXSIWAX,
     LXSSP,
     DFLOADf32
 )>;

 // Cracked Load that requires the PM resource.
 // Since the Load and the PM cannot be done at the same time the latencies are
 //  added. Requires 8 cycles.
 // Since the PM requires the full superslice we need both EXECE, EXECO pipelines
 //  as well as 3 dispatches for the PM. The Load requires the remaining 2
 //  dispatches.
 def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LXVDSX,
     LXVWSX,
     LXVW4X
 )>;

 // Single slice Restricted store operation. The restricted operation requires
 //  all three dispatches for the superslice.
 def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     STFS,
     STFD,
     STFIWX,
     STFSX,
     STFDX,
     STXSDX,
     STXSSPX,
     STXSIWX,
     DFSTOREf32,
     DFSTOREf64
 )>;

 // Store operation that requires the whole superslice.
 def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     STXVD2X,
     STXVW4X
 )>;


 // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVW,
     DIVWU,
     MODSW
 )>;

 // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVWE,
     DIVD,
     DIVWEU,
     DIVDU,
     MODSD,
     MODUD,
     MODUW
 )>;

 // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVDE,
     DIVDEU
 )>;

 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
 //  and one full superslice for the DIV operation since there is only one DIV
 //  per superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVDo,
     DIVDUo,
     DIVWEo,
     DIVWEUo
 )>;

 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
 //  and one full superslice for the DIV operation since there is only one DIV
 //  per superslice. Latency of DIV plus ALU is 42.
 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     DIVDEo,
     DIVDEUo
 )>;

 // CR access instructions in _BrMCR, IIC_BrMCRX.

 // Cracked, restricted, ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
 //  latencies are not added together. Otherwise this is like having two
 //  instructions running together on two pipelines and 6 dispatches.
 // ALU ops are 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     MTOCRF,
     MTOCRF8,
     MTCRF,
     MTCRF8
 )>;

 // Cracked, restricted, ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
 //  latencies are not added together. Otherwise this is like having two
 //  instructions running together on two pipelines and 6 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     MCRFS
 )>;

 // FP Div instructions in IIC_FPDivD and IIC_FPDivS.

 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FDIV
 )>;

 // 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
 def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FDIVo
 )>;

 // 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     XSDIVDP
 )>;

 // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FDIVS
 )>;

 // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
 def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FDIVSo
 )>;

 // 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     XSDIVSP
 )>;

 // 24 Cycle DP Vector Instruction. Takes one full superslice.
 // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
 //  superslice.
 def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XVDIVSP
 )>;

 // 33 Cycle DP Vector Instruction. Takes one full superslice.
 // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
 //  superslice.
 def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     XVDIVDP
 )>;

 // Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.

 // Instruction cracked into three pieces. One Load and two ALU operations.
 // The Load and one of the ALU ops cannot be run at the same time and so the
 //  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
 // Both the load and the ALU that depends on it are restricted and so they take
 //  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
 // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
 def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
               IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LFSU,
     LFSUX
 )>;

 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
 //  the load and so it can be run at the same time as the load. The load is also
 //  restricted. 3 dispatches are from the restricted load while the other two
 //  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
 //  is required for the ALU.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     LFDU,
     LFDUX
 )>;

 // Crypto Instructions

 // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
 def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
   VPMSUMB,
   VPMSUMD,
   VPMSUMH,
   VPMSUMW,
   VCIPHER,
   VCIPHERLAST,
   VNCIPHER,
   VNCIPHERLAST,
   VSBOX
 )>;
	//===- P9InstrResources.td - P9 Instruction Resource Defs -- tablegen --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines resources required by some of P9 instruction. This is part
	// P9 processor model used for instruction scheduling. Not every instruction
	// is listed here. Instructions in this file belong to itinerary classes that
	// have instructions with different resource requirements.
	//
	// The makeup of the P9 CPU is modeled as follows:
	// - Each CPU is made up of two superslices.
	// - Each superslice is made up of two slices. Therefore, there are 4 slices
	// for each CPU.
	// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
	// - Each CPU has:
	// - One CY (Crypto) unit P9_CY_*
	// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
	// - Two PM (Permute) units. One on each superslice. P9_PM_*
	// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
	// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
	// - Four DP (Floating Point) units. One on each slice. P9_DP_*
	// This also includes fixed point multiply add.
	// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
	// - Four Load/Store Queues. P9_LS_*
	// - Each set of instructions will require a number of these resources.
	//===----------------------------------------------------------------------===//

	// Two cycle ALU vector operation that uses an entire superslice.
	// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
	// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
	def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	VADDCUW,
	VADDUBM,
	VADDUDM,
	VADDUHM,
	VADDUWM,
	VAND,
	VANDC,
	VCMPEQUB,
	VCMPEQUD,
	VCMPEQUH,
	VCMPEQUW,
	VCMPNEB,
	VCMPNEH,
	VCMPNEW,
	VCMPNEZB,
	VCMPNEZH,
	VCMPNEZW,
	VEQV,
	VEXTSB2D,
	VEXTSB2W,
	VEXTSH2D,
	VEXTSH2W,
	VEXTSW2D,
	VRLB,
	VRLD,
	VRLDMI,
	VRLDNM,
	VRLH,
	VRLW,
	VRLWMI,
	VRLWNM,
	VSRAB,
	VSRAD,
	VSRAH,
	VSRAW,
	VSRB,
	VSRD,
	VSRH,
	VSRW,
	VSLB,
	VSLD,
	VSLH,
	VSLW,
	VMRGEW,
	VMRGOW,
	VNAND,
	VNEGD,
	VNEGW,
	VNOR,
	VOR,
	VORC,
	VPOPCNTB,
	VPOPCNTH,
	VSEL,
	VSUBUBM,
	VSUBUDM,
	VSUBUHM,
	VSUBUWM,
	VXOR,
	V_SET0B,
	V_SET0H,
	V_SET0,
	XVABSDP,
	XVABSSP,
	XVCPSGNDP,
	XVCPSGNSP,
	XVIEXPDP,
	XVNABSDP,
	XVNABSSP,
	XVNEGDP,
	XVNEGSP,
	XVXEXPDP,
	XVIEXPSP,
	XVXEXPSP,
	XXLAND,
	XXLANDC,
	XXLEQV,
	XXLNAND,
	XXLNOR,
	XXLOR,
	XXLORf,
	XXLORC,
	XXLXOR,
	XXSEL,
	XSABSQP,
	XSCPSGNQP,
	XSIEXPQP,
	XSNABSQP,
	XSNEGQP,
	XSXEXPQP
	)>;

	// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
	// slingle slice. However, since it is Restricted it requires all 3 dispatches
	// (DISP) for that superslice.
	def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FCMPUS,
	FCMPUD,
	XSTSTDCDP,
	XSTSTDCSP
	)>;

	// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
	def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
	(instrs
	XSMAXCDP,
	XSMAXDP,
	XSMAXJDP,
	XSMINCDP,
	XSMINDP,
	XSMINJDP,
	XSTDIVDP,
	XSTSQRTDP,
	XSCMPEQDP,
	XSCMPEXPDP,
	XSCMPGEDP,
	XSCMPGTDP,
	XSCMPODP,
	XSCMPUDP,
	XSXSIGDP,
	XSCVSPDPN
	)>;

	// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
	def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
	(instrs
	ADDIStocHA,
	ADDItocL,
	MCRF,
	MCRXRX,
	SLD,
	SRD,
	SRAD,
	SRADI,
	RLDIC,
	XSNABSDP,
	XSXEXPDP,
	XSABSDP,
	XSNEGDP,
	XSCPSGNDP
	)>;

	// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
	// slingle slice. However, since it is Restricted it requires all 3 dispatches
	// (DISP) for that superslice.
	def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	RLDCL,
	RLDCR,
	RLDIMI,
	RLDICL,
	RLDICR,
	RLDICL_32_64,
	XSIEXPDP,
	FMR,
	FABSD,
	FABSS,
	FNABSD,
	FNABSS,
	FNEGD,
	FNEGS,
	FCPSGND,
	FCPSGNS
	)>;

	// Three cycle ALU vector operation that uses an entire superslice.
	// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
	// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
	def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	VBPERMD,
	VABSDUB,
	VABSDUH,
	VABSDUW,
	VADDUBS,
	VADDUHS,
	VADDUWS,
	VAVGSB,
	VAVGSH,
	VAVGSW,
	VAVGUB,
	VAVGUH,
	VAVGUW,
	VCMPEQFP,
	VCMPEQFPo,
	VCMPGEFP,
	VCMPGEFPo,
	VCMPBFP,
	VCMPBFPo,
	VCMPGTFP,
	VCMPGTFPo,
	VCLZB,
	VCLZD,
	VCLZH,
	VCLZW,
	VCTZB,
	VCTZD,
	VCTZH,
	VCTZW,
	VADDSBS,
	VADDSHS,
	VADDSWS,
	VMINFP,
	VMINSB,
	VMINSD,
	VMINSH,
	VMINSW,
	VMINUB,
	VMINUD,
	VMINUH,
	VMINUW,
	VMAXFP,
	VMAXSB,
	VMAXSD,
	VMAXSH,
	VMAXSW,
	VMAXUB,
	VMAXUD,
	VMAXUH,
	VMAXUW,
	VPOPCNTW,
	VPOPCNTD,
	VPRTYBD,
	VPRTYBW,
	VSHASIGMAD,
	VSHASIGMAW,
	VSUBSBS,
	VSUBSHS,
	VSUBSWS,
	VSUBUBS,
	VSUBUHS,
	VSUBUWS,
	VSUBCUW,
	VCMPGTSB,
	VCMPGTSBo,
	VCMPGTSD,
	VCMPGTSDo,
	VCMPGTSH,
	VCMPGTSHo,
	VCMPGTSW,
	VCMPGTSWo,
	VCMPGTUB,
	VCMPGTUBo,
	VCMPGTUD,
	VCMPGTUDo,
	VCMPGTUH,
	VCMPGTUHo,
	VCMPGTUW,
	VCMPGTUWo,
	VCMPNEBo,
	VCMPNEHo,
	VCMPNEWo,
	VCMPNEZBo,
	VCMPNEZHo,
	VCMPNEZWo,
	VCMPEQUBo,
	VCMPEQUDo,
	VCMPEQUHo,
	VCMPEQUWo,
	XVCMPEQDP,
	XVCMPEQDPo,
	XVCMPEQSP,
	XVCMPEQSPo,
	XVCMPGEDP,
	XVCMPGEDPo,
	XVCMPGESP,
	XVCMPGESPo,
	XVCMPGTDP,
	XVCMPGTDPo,
	XVCMPGTSP,
	XVCMPGTSPo,
	XVMAXDP,
	XVMAXSP,
	XVMINDP,
	XVMINSP,
	XVTDIVDP,
	XVTDIVSP,
	XVTSQRTDP,
	XVTSQRTSP,
	XVTSTDCDP,
	XVTSTDCSP,
	XVXSIGDP,
	XVXSIGSP
	)>;

	// 7 cycle DP vector operation that uses an entire superslice.
	// Uses both DP units (the even DPE and odd DPO units), two pipelines
	// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
	def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	VADDFP,
	VCTSXS,
	VCTSXS_0,
	VCTUXS,
	VCTUXS_0,
	VEXPTEFP,
	VLOGEFP,
	VMADDFP,
	VMHADDSHS,
	VNMSUBFP,
	VREFP,
	VRFIM,
	VRFIN,
	VRFIP,
	VRFIZ,
	VRSQRTEFP,
	VSUBFP,
	XVADDDP,
	XVADDSP,
	XVCVDPSP,
	XVCVDPSXDS,
	XVCVDPSXWS,
	XVCVDPUXDS,
	XVCVDPUXWS,
	XVCVHPSP,
	XVCVSPDP,
	XVCVSPHP,
	XVCVSPSXDS,
	XVCVSPSXWS,
	XVCVSPUXDS,
	XVCVSPUXWS,
	XVCVSXDDP,
	XVCVSXDSP,
	XVCVSXWDP,
	XVCVSXWSP,
	XVCVUXDDP,
	XVCVUXDSP,
	XVCVUXWDP,
	XVCVUXWSP,
	XVMADDADP,
	XVMADDASP,
	XVMADDMDP,
	XVMADDMSP,
	XVMSUBADP,
	XVMSUBASP,
	XVMSUBMDP,
	XVMSUBMSP,
	XVMULDP,
	XVMULSP,
	XVNMADDADP,
	XVNMADDASP,
	XVNMADDMDP,
	XVNMADDMSP,
	XVNMSUBADP,
	XVNMSUBASP,
	XVNMSUBMDP,
	XVNMSUBMSP,
	XVRDPI,
	XVRDPIC,
	XVRDPIM,
	XVRDPIP,
	XVRDPIZ,
	XVREDP,
	XVRESP,
	XVRSPI,
	XVRSPIC,
	XVRSPIM,
	XVRSPIP,
	XVRSPIZ,
	XVRSQRTEDP,
	XVRSQRTESP,
	XVSUBDP,
	XVSUBSP,
	VCFSX,
	VCFSX_0,
	VCFUX,
	VCFUX_0,
	VMHRADDSHS,
	VMLADDUHM,
	VMSUMMBM,
	VMSUMSHM,
	VMSUMSHS,
	VMSUMUBM,
	VMSUMUHM,
	VMSUMUHS,
	VMULESB,
	VMULESH,
	VMULESW,
	VMULEUB,
	VMULEUH,
	VMULEUW,
	VMULOSB,
	VMULOSH,
	VMULOSW,
	VMULOUB,
	VMULOUH,
	VMULOUW,
	VMULUWM,
	VSUM2SWS,
	VSUM4SBS,
	VSUM4SHS,
	VSUM4UBS,
	VSUMSWS
	)>;

	// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
	// dispatch units for the superslice.
	def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FRSP,
	FRIND,
	FRINS,
	FRIPD,
	FRIPS,
	FRIZD,
	FRIZS,
	FRIMD,
	FRIMS,
	FRE,
	FRES,
	FRSQRTE,
	FRSQRTES,
	FMADDS,
	FMADD,
	FMSUBS,
	FMSUB,
	FNMADDS,
	FNMADD,
	FNMSUBS,
	FNMSUB,
	FSELD,
	FSELS,
	FADDS,
	FMULS,
	FMUL,
	FSUBS,
	FCFID,
	FCTID,
	FCTIDZ,
	FCFIDU,
	FCFIDS,
	FCFIDUS,
	FCTIDUZ,
	FCTIWUZ,
	FCTIW,
	FCTIWZ,
	XSMADDADP,
	XSMADDASP,
	XSMADDMDP,
	XSMADDMSP,
	XSMSUBADP,
	XSMSUBASP,
	XSMSUBMDP,
	XSMSUBMSP,
	XSMULDP,
	XSMULSP,
	XSNMADDADP,
	XSNMADDASP,
	XSNMADDMDP,
	XSNMADDMSP,
	XSNMSUBADP,
	XSNMSUBASP,
	XSNMSUBMDP,
	XSNMSUBMSP
	)>;

	// 7 cycle Restricted DP operation and one 2 cycle ALU operation.
	// The DP is restricted so we need a full 5 dispatches.
	def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FMULo,
	FMADDo,
	FMSUBo,
	FNMADDo,
	FNMSUBo
	)>;

	// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
	def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
	(instrs
	XSADDDP,
	XSADDSP,
	XSCVDPHP,
	XSCVDPSP,
	XSCVDPSXDS,
	XSCVDPSXDSs,
	XSCVDPSXWS,
	XSCVDPUXDS,
	XSCVDPUXDSs,
	XSCVDPUXWS,
	XSCVHPDP,
	XSCVSPDP,
	XSCVSXDDP,
	XSCVSXDSP,
	XSCVUXDDP,
	XSCVUXDSP,
	XSRDPI,
	XSRDPIC,
	XSRDPIM,
	XSRDPIP,
	XSRDPIZ,
	XSREDP,
	XSRESP,
	//XSRSP,
	XSRSQRTEDP,
	XSRSQRTESP,
	XSSUBDP,
	XSSUBSP,
	XSCVDPSPN
	)>;

	// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	VBPERMQ,
	VCLZLSBB,
	VCTZLSBB,
	VEXTRACTD,
	VEXTRACTUB,
	VEXTRACTUH,
	VEXTRACTUW,
	VEXTUBLX,
	VEXTUBRX,
	VEXTUHLX,
	VEXTUHRX,
	VEXTUWLX,
	VEXTUWRX,
	VGBBD,
	VINSERTB,
	VINSERTD,
	VINSERTH,
	VINSERTW,
	VMRGHB,
	VMRGHH,
	VMRGHW,
	VMRGLB,
	VMRGLH,
	VMRGLW,
	VPERM,
	VPERMR,
	VPERMXOR,
	VPKPX,
	VPKSDSS,
	VPKSDUS,
	VPKSHSS,
	VPKSHUS,
	VPKSWSS,
	VPKSWUS,
	VPKUDUM,
	VPKUDUS,
	VPKUHUM,
	VPKUHUS,
	VPKUWUM,
	VPKUWUS,
	VPRTYBQ,
	VSL,
	VSLDOI,
	VSLO,
	VSLV,
	VSPLTB,
	VSPLTBs,
	VSPLTH,
	VSPLTHs,
	VSPLTISB,
	VSPLTISH,
	VSPLTISW,
	VSPLTW,
	VSR,
	VSRO,
	VSRV,
	VUPKHPX,
	VUPKHSB,
	VUPKHSH,
	VUPKHSW,
	VUPKLPX,
	VUPKLSB,
	VUPKLSH,
	VUPKLSW,
	XXBRD,
	XXBRH,
	XXBRQ,
	XXBRW,
	XXEXTRACTUW,
	XXINSERTW,
	XXMRGHW,
	XXMRGLW,
	XXPERM,
	XXPERMR,
	XXSLDWI,
	XXSPLTIB,
	XXSPLTW,
	XXSPLTWs,
	XXPERMDI,
	XXPERMDIs,
	VADDCUQ,
	VADDECUQ,
	VADDEUQM,
	VADDUQM,
	VMUL10CUQ,
	VMUL10ECUQ,
	VMUL10EUQ,
	VMUL10UQ,
	VSUBCUQ,
	VSUBECUQ,
	VSUBEUQM,
	VSUBUQM,
	XSCMPEXPQP,
	XSCMPOQP,
	XSCMPUQP,
	XSTSTDCQP,
	XSXSIGQP
	)>;

	// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	XSADDQP,
	XSADDQPO,
	XSCVDPQP,
	XSCVQPDP,
	XSCVQPDPO,
	XSCVQPSDZ,
	XSCVQPSWZ,
	XSCVQPUDZ,
	XSCVQPUWZ,
	XSCVSDQP,
	XSCVUDQP,
	XSRQPI,
	XSRQPXP,
	XSSUBQP,
	XSSUBQPO
	)>;

	// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	XSMADDQP,
	XSMADDQPO,
	XSMSUBQP,
	XSMSUBQPO,
	XSMULQP,
	XSMULQPO,
	XSNMADDQP,
	XSNMADDQPO,
	XSNMSUBQP,
	XSNMSUBQPO
	)>;

	// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	XSDIVQP,
	XSDIVQPO
	)>;

	// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	XSSQRTQP,
	XSSQRTQPO
	)>;

	// 5 Cycle load uses a single slice.
	def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
	(instrs
	LXSDX,
	LXVD2X,
	LXSIWZX,
	LXV,
	LXVX,
	LXSD,
	DFLOADf64
	)>;

	// 4 Cycle load uses a single slice.
	def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
	(instrs
	COPY
	)>;

	// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
	// superslice.
	def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	LFIWZX,
	LFDX,
	LFD
	)>;

	// Cracked Restricted Load instruction.
	// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
	// operations cannot be done at the same time and so their latencies are added.
	// Full 6 dispatches are required as this is both cracked and restricted.
	def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	LFIWAX,
	LFSX,
	LFS
	)>;

	// Cracked Load instruction.
	// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
	// operations cannot be done at the same time and so their latencies are added.
	// Full 4 dispatches are required as this is a cracked instruction.
	def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	LXSSPX,
	LXSIWAX,
	LXSSP,
	DFLOADf32
	)>;

	// Cracked Load that requires the PM resource.
	// Since the Load and the PM cannot be done at the same time the latencies are
	// added. Requires 8 cycles.
	// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
	// as well as 3 dispatches for the PM. The Load requires the remaining 2
	// dispatches.
	def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	LXVDSX,
	LXVWSX,
	LXVW4X
	)>;

	// Single slice Restricted store operation. The restricted operation requires
	// all three dispatches for the superslice.
	def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	STFS,
	STFD,
	STFIWX,
	STFSX,
	STFDX,
	STXSDX,
	STXSSPX,
	STXSIWX,
	DFSTOREf32,
	DFSTOREf64
	)>;

	// Store operation that requires the whole superslice.
	def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	STXVD2X,
	STXVW4X
	)>;


	// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	DIVW,
	DIVWU,
	MODSW
	)>;

	// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	DIVWE,
	DIVD,
	DIVWEU,
	DIVDU,
	MODSD,
	MODUD,
	MODUW
	)>;

	// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	DIVDE,
	DIVDEU
	)>;

	// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
	// and one full superslice for the DIV operation since there is only one DIV
	// per superslice. Latency of DIV plus ALU is 26.
	def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	DIVDo,
	DIVDUo,
	DIVWEo,
	DIVWEUo
	)>;

	// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
	// and one full superslice for the DIV operation since there is only one DIV
	// per superslice. Latency of DIV plus ALU is 42.
	def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	DIVDEo,
	DIVDEUo
	)>;

	// CR access instructions in _BrMCR, IIC_BrMCRX.

	// Cracked, restricted, ALU operations.
	// Here the two ALU ops can actually be done in parallel and therefore the
	// latencies are not added together. Otherwise this is like having two
	// instructions running together on two pipelines and 6 dispatches.
	// ALU ops are 2 cycles each.
	def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	MTOCRF,
	MTOCRF8,
	MTCRF,
	MTCRF8
	)>;

	// Cracked, restricted, ALU operations.
	// Here the two ALU ops can actually be done in parallel and therefore the
	// latencies are not added together. Otherwise this is like having two
	// instructions running together on two pipelines and 6 dispatches.
	// ALU ops are 3 cycles each.
	def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	MCRFS
	)>;

	// FP Div instructions in IIC_FPDivD and IIC_FPDivS.

	// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
	def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FDIV
	)>;

	// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
	def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FDIVo
	)>;

	// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
	def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
	(instrs
	XSDIVDP
	)>;

	// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
	def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FDIVS
	)>;

	// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
	def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	FDIVSo
	)>;

	// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
	def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
	(instrs
	XSDIVSP
	)>;

	// 24 Cycle DP Vector Instruction. Takes one full superslice.
	// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
	// superslice.
	def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	XVDIVSP
	)>;

	// 33 Cycle DP Vector Instruction. Takes one full superslice.
	// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
	// superslice.
	def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
	DISP_1C, DISP_1C, DISP_1C],
	(instrs
	XVDIVDP
	)>;

	// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.

	// Instruction cracked into three pieces. One Load and two ALU operations.
	// The Load and one of the ALU ops cannot be run at the same time and so the
	// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
	// Both the load and the ALU that depends on it are restricted and so they take
	// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
	// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
	def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
	IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	LFSU,
	LFSUX
	)>;

	// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
	// the load and so it can be run at the same time as the load. The load is also
	// restricted. 3 dispatches are from the restricted load while the other two
	// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
	// is required for the ALU.
	def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
	DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	LFDU,
	LFDUX
	)>;

	// Crypto Instructions

	// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
	// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
	// dispatches.
	def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
	(instrs
	VPMSUMB,
	VPMSUMD,
	VPMSUMH,
	VPMSUMW,
	VCIPHER,
	VCIPHERLAST,
	VNCIPHER,
	VNCIPHERLAST,
	VSBOX
	)>;