mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
8b6f5df4ae
The "half vectors" are now widened to full size by the legalizer. The only exception is in parameter passing, where half vectors are expanded. This causes changes to some dejagnu tests. llvm-svn: 111360
98 lines
4.4 KiB
TableGen
98 lines
4.4 KiB
TableGen
//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
|
|
//
|
|
// Cell SPU math operations
|
|
//
|
|
// This target description file contains instruction sequences for various
|
|
// math operations, such as vector multiplies, i32 multiply, etc., for the
|
|
// SPU's i32, i16 i8 and corresponding vector types.
|
|
//
|
|
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
|
|
// purely and completely coincidental.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// v16i8 multiply instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
|
|
(ORv4i32
|
|
(ANDv4i32
|
|
(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
|
|
(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
|
|
(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
|
|
(FSMBIv8i16 0x2222)),
|
|
(ILAv4i32 0x0000ffff)),
|
|
(SHLIv4i32
|
|
(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
|
|
(ROTMAIv4i32_i32 VECREG:$rB, 16)),
|
|
(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
|
|
(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
|
|
(FSMBIv8i16 0x2222)), 16))>;
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// v8i16 multiply instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
|
|
(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
|
|
(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
|
|
(FSMBIv8i16 0xcccc))>;
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// v4i32, i32 multiply instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
def MPYv4i32:
|
|
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
|
|
(Av4i32
|
|
(v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
|
|
(v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
|
|
(v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
|
|
|
|
def MPYi32:
|
|
Pat<(mul R32C:$rA, R32C:$rB),
|
|
(Ar32
|
|
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
|
|
(MPYHr32 R32C:$rB, R32C:$rA)),
|
|
(MPYUr32 R32C:$rA, R32C:$rB))>;
|
|
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
// f32, v4f32 divide instruction sequence:
|
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
|
|
|
// Reciprocal estimate and interpolation
|
|
def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
|
|
// Division estimate
|
|
def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
|
|
// Newton-Raphson iteration
|
|
def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
|
|
Interpf32.Fragment,
|
|
DivEstf32.Fragment)>;
|
|
// Epsilon addition
|
|
def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
|
|
|
|
def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
|
|
(SELBf32_cond NRaphf32.Fragment,
|
|
Epsilonf32.Fragment,
|
|
(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
|
|
|
|
// Reciprocal estimate and interpolation
|
|
def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
|
|
// Division estimate
|
|
def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
|
|
// Newton-Raphson iteration
|
|
def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
|
|
(v4f32 VECREG:$rB),
|
|
(v4f32 VECREG:$rA)),
|
|
Interpv4f32.Fragment,
|
|
DivEstv4f32.Fragment)>;
|
|
// Epsilon addition
|
|
def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
|
|
|
|
def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
|
|
(SELBv4f32_cond NRaphv4f32.Fragment,
|
|
Epsilonv4f32.Fragment,
|
|
(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
|
|
Epsilonv4f32.Fragment,
|
|
(v4f32 VECREG:$rA)), -1))>;
|