lib/Target/CellSPU/SPUMathInstr.td - llvm - Git at Google

 //===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
 //
 //                     Cell SPU math operations
 //
 // This target description file contains instruction sequences for various
 // math operations, such as vector multiplies, i32 multiply, etc., for the
 // SPU's i32, i16 i8 and corresponding vector types.
 //
 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
 // purely and completely coincidental.
 //===----------------------------------------------------------------------===//

 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // v16i8 multiply instruction sequence:
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
           (ORv4i32
            (ANDv4i32
             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
                        (FSMBIv8i16 0x2222)),
             (ILAv4i32 0x0000ffff)),
            (SHLIv4i32
             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)),
                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
                        (FSMBIv8i16 0x2222)), 16))>;

 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // v8i16 multiply instruction sequence:
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
                      (FSMBIv8i16 0xcccc))>;

 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // v4i32, i32 multiply instruction sequence:
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

 def MPYv4i32:
   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
       (Av4i32
         (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
                        (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
         (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;

 def MPYi32:
   Pat<(mul R32C:$rA, R32C:$rB),
       (Ar32
         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
               (MPYHr32 R32C:$rB, R32C:$rA)),
         (MPYUr32 R32C:$rA, R32C:$rB))>;

 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // f32, v4f32 divide instruction sequence:
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

 // Reciprocal estimate and interpolation
 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
 // Division estimate
 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
 // Newton-Raphson iteration
 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
                                Interpf32.Fragment,
                                DivEstf32.Fragment)>;
 // Epsilon addition
 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;

 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
           (SELBf32_cond NRaphf32.Fragment,
                         Epsilonf32.Fragment,
                         (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;

 // Reciprocal estimate and interpolation
 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
 // Division estimate
 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
 // Newton-Raphson iteration
 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
                                               (v4f32 VECREG:$rB),
                                               (v4f32 VECREG:$rA)),
                                    Interpv4f32.Fragment,
                                    DivEstv4f32.Fragment)>;
 // Epsilon addition
 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;

 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
           (SELBv4f32_cond NRaphv4f32.Fragment,
                         Epsilonv4f32.Fragment,
                         (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
                                               Epsilonv4f32.Fragment,
                                               (v4f32 VECREG:$rA)), -1))>;
	//===-- SPUMathInst.td - Cell SPU math operations ---------- tablegen ---===//
	//
	// Cell SPU math operations
	//
	// This target description file contains instruction sequences for various
	// math operations, such as vector multiplies, i32 multiply, etc., for the
	// SPU's i32, i16 i8 and corresponding vector types.
	//
	// Any resemblance to libsimdmath or the Cell SDK simdmath library is
	// purely and completely coincidental.
	//===----------------------------------------------------------------------===//

	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
	// v16i8 multiply instruction sequence:
	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

	def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
	(ORv4i32
	(ANDv4i32
	(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
	(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
	(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
	(FSMBIv8i16 0x2222)),
	(ILAv4i32 0x0000ffff)),
	(SHLIv4i32
	(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
	(ROTMAIv4i32_i32 VECREG:$rB, 16)),
	(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
	(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
	(FSMBIv8i16 0x2222)), 16))>;

	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
	// v8i16 multiply instruction sequence:
	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

	def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
	(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
	(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
	(FSMBIv8i16 0xcccc))>;

	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
	// v4i32, i32 multiply instruction sequence:
	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

	def MPYv4i32:
	Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
	(Av4i32
	(v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
	(v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
	(v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;

	def MPYi32:
	Pat<(mul R32C:$rA, R32C:$rB),
	(Ar32
	(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
	(MPYHr32 R32C:$rB, R32C:$rA)),
	(MPYUr32 R32C:$rA, R32C:$rB))>;

	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
	// f32, v4f32 divide instruction sequence:
	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~

	// Reciprocal estimate and interpolation
	def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
	// Division estimate
	def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
	// Newton-Raphson iteration
	def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
	Interpf32.Fragment,
	DivEstf32.Fragment)>;
	// Epsilon addition
	def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;

	def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
	(SELBf32_cond NRaphf32.Fragment,
	Epsilonf32.Fragment,
	(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;

	// Reciprocal estimate and interpolation
	def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
	// Division estimate
	def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
	// Newton-Raphson iteration
	def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
	(v4f32 VECREG:$rB),
	(v4f32 VECREG:$rA)),
	Interpv4f32.Fragment,
	DivEstv4f32.Fragment)>;
	// Epsilon addition
	def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;

	def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
	(SELBv4f32_cond NRaphv4f32.Fragment,
	Epsilonv4f32.Fragment,
	(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
	Epsilonv4f32.Fragment,
	(v4f32 VECREG:$rA)), -1))>;