1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[PowerPC] Add new infrastructure to select load/store instructions, update P8/P9 load/store patterns.

This patch introduces a new infrastructure that is used to select the load and
store instructions in the PPC backend.

The primary motivation is that the current implementation of selecting load/stores
is dependent on the ordering of patterns in TableGen. Given this limitation, we
are not able to easily and reliably generate the P10 prefixed load and stores
instructions (such as when the immediates that fit within 34-bits). This
refactoring is meant to provide us with more control over the patterns/different
forms to exploit, as well as eliminating dependency of pattern declaration in TableGen.

The idea of this refactoring is that it introduces a set of addressing modes that
correspond to different instruction formats of a particular load and store
instruction, along with a set of common flags that describes a load/store.
Whenever a load/store instruction is being selected, we analyze the instruction
and compute a set of flags for it. The computed flags are then used to
select the most optimal load/store addressing mode.

This patch is the first of a series of patches to be committed - it contains the
initial implementation of the refactored load/store selection infrastructure and
also updates P8/P9 patterns to adopt this infrastructure. The idea is that
incremental patches will add more implementation and support, and eventually
the old implementation will be removed.

Differential Revision: https://reviews.llvm.org/D93370
This commit is contained in:
Amy Kwan 2021-04-27 22:37:02 -05:00
parent f8a477d8bf
commit f189e0c45f
9 changed files with 1144 additions and 601 deletions

View File

@ -229,6 +229,45 @@ namespace {
return false;
}
/// SelectDSForm - Returns true if address N can be represented by the
/// addressing mode of DSForm instructions (a base register, plus a signed
/// 16-bit displacement that is a multiple of 4.
bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
Align(4)) == PPC::AM_DSForm;
}
/// SelectDQForm - Returns true if address N can be represented by the
/// addressing mode of DQForm instructions (a base register, plus a signed
/// 16-bit displacement that is a multiple of 16.
bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
Align(16)) == PPC::AM_DQForm;
}
/// SelectDForm - Returns true if address N can be represented by
/// the addressing mode of DForm instructions (a base register, plus a
/// signed 16-bit immediate.
bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_DForm;
}
/// SelectXForm - Returns true if address N can be represented by the
/// addressing mode of XForm instructions (an indexed [r+r] operation).
bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
None) == PPC::AM_XForm;
}
/// SelectForceXForm - Given the specified address, force it to be
/// represented as an indexed [r+r] operation (an XForm instruction).
bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
PPC::AM_XForm;
}
/// SelectAddrIdx - Given the specified address, check to see if it can be
/// represented as an indexed [r+r] operation.
/// This is for xform instructions whose associated displacement form is D.

View File

@ -136,6 +136,10 @@ extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
// Initialize map that relates the PPC addressing modes to the computed flags
// of a load/store instruction. The map is used to determine the optimal
// addressing mode when selecting load and stores.
initializeAddrModeMap();
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
@ -1424,6 +1428,84 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
}
// *********************************** NOTE ************************************
// For selecting load and store instructions, the addressing modes are defined
// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
// patterns to match the load the store instructions.
//
// The TD definitions for the addressing modes correspond to their respective
// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
// address mode flags of a particular node. Afterwards, the computed address
// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
// accordingly, based on the preferred addressing mode.
//
// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
// MemOpFlags contains all the possible flags that can be used to compute the
// optimal addressing mode for load and store instructions.
// AddrMode contains all the possible load and store addressing modes available
// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
//
// When adding new load and store instructions, it is possible that new address
// flags may need to be added into MemOpFlags, and a new addressing mode will
// need to be added to AddrMode. An entry of the new addressing mode (consisting
// of the minimal and main distinguishing address flags for the new load/store
// instructions) will need to be added into initializeAddrModeMap() below.
// Finally, when adding new addressing modes, the getAddrModeForFlags() will
// need to be updated to account for selecting the optimal addressing mode.
// *****************************************************************************
/// Initialize the map that relates the different addressing modes of the load
/// and store instructions to a set of flags. This ensures the load/store
/// instruction is correctly matched during instruction selection.
void PPCTargetLowering::initializeAddrModeMap() {
AddrModesMap[PPC::AM_DForm] = {
// LWZ, STW
PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
// LBZ, LHZ, STB, STH
PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
// LHA
PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
// LFS, LFD, STFS, STFD
PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
};
AddrModesMap[PPC::AM_DSForm] = {
// LWA
PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
// LD, STD
PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
// DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
};
AddrModesMap[PPC::AM_DQForm] = {
// LXV, STXV
PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
PPC::MOF_NotAddNorCst | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
};
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
@ -2436,6 +2518,20 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
/// Used when computing address flags for selecting loads and stores.
/// If we have an OR, check if the LHS and RHS are provably disjoint.
/// An OR of two provably disjoint values is equivalent to an ADD.
/// Most PPC load/store instructions compute the effective address as a sum,
/// so doing this conversion is useful.
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
if (N.getOpcode() != ISD::OR)
return false;
KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
if (!LHSKnown.Zero.getBoolValue())
return false;
KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
}
/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
/// be represented as an indexed [r+r] operation.
@ -16836,3 +16932,343 @@ SDValue PPCTargetLowering::combineVSelect(SDNode *N,
return SDValue();
}
/// getAddrModeForFlags - Based on the set of address flags, select the most
/// optimal instruction format to match by.
PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
// This is not a node we should be handling here.
if (Flags == PPC::MOF_None)
return PPC::AM_None;
// Unaligned D-Forms are tried first, followed by the aligned D-Forms.
for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
if ((Flags & FlagSet) == FlagSet)
return PPC::AM_DForm;
for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
if ((Flags & FlagSet) == FlagSet)
return PPC::AM_DSForm;
for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
if ((Flags & FlagSet) == FlagSet)
return PPC::AM_DQForm;
// If no other forms are selected, return an X-Form as it is the most
// general addressing mode.
return PPC::AM_XForm;
}
/// Set alignment flags based on whether or not the Frame Index is aligned.
/// Utilized when computing flags for address computation when selecting
/// load and store instructions.
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
SelectionDAG &DAG) {
bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
if (!FI)
return;
const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
// If this is (add $FI, $S16Imm), the alignment flags are already set
// based on the immediate. We just need to clear the alignment flags
// if the FI alignment is weaker.
if ((FrameIndexAlign % 4) != 0)
FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
if ((FrameIndexAlign % 16) != 0)
FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
// If the address is a plain FrameIndex, set alignment flags based on
// FI alignment.
if (!IsAdd) {
if ((FrameIndexAlign % 4) == 0)
FlagSet |= PPC::MOF_RPlusSImm16Mult4;
if ((FrameIndexAlign % 16) == 0)
FlagSet |= PPC::MOF_RPlusSImm16Mult16;
}
}
/// Given a node, compute flags that are used for address computation when
/// selecting load and store instructions. The flags computed are stored in
/// FlagSet. This function takes into account whether the node is a constant,
/// an ADD, OR, or a constant, and computes the address flags accordingly.
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
SelectionDAG &DAG) {
// Set the alignment flags for the node depending on if the node is
// 4-byte or 16-byte aligned.
auto SetAlignFlagsForImm = [&](uint64_t Imm) {
if ((Imm & 0x3) == 0)
FlagSet |= PPC::MOF_RPlusSImm16Mult4;
if ((Imm & 0xf) == 0)
FlagSet |= PPC::MOF_RPlusSImm16Mult16;
};
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
// All 32-bit constants can be computed as LIS + Disp.
const APInt &ConstImm = CN->getAPIntValue();
if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
FlagSet |= PPC::MOF_AddrIsSImm32;
SetAlignFlagsForImm(ConstImm.getZExtValue());
setAlignFlagsForFI(N, FlagSet, DAG);
}
if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
FlagSet |= PPC::MOF_RPlusSImm34;
else // Let constant materialization handle large constants.
FlagSet |= PPC::MOF_NotAddNorCst;
} else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
// This address can be represented as an addition of:
// - Register + Imm16 (possibly a multiple of 4/16)
// - Register + Imm34
// - Register + PPCISD::Lo
// - Register + Register
// In any case, we won't have to match this as Base + Zero.
SDValue RHS = N.getOperand(1);
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
const APInt &ConstImm = CN->getAPIntValue();
if (ConstImm.isSignedIntN(16)) {
FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
SetAlignFlagsForImm(ConstImm.getZExtValue());
setAlignFlagsForFI(N, FlagSet, DAG);
}
if (ConstImm.isSignedIntN(34))
FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
else
FlagSet |= PPC::MOF_RPlusR; // Register.
} else if (RHS.getOpcode() == PPCISD::Lo &&
!cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
else
FlagSet |= PPC::MOF_RPlusR;
} else { // The address computation is not a constant or an addition.
setAlignFlagsForFI(N, FlagSet, DAG);
FlagSet |= PPC::MOF_NotAddNorCst;
}
}
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
SelectionDAG &DAG) const {
unsigned FlagSet = PPC::MOF_None;
// Compute subtarget flags.
if (!Subtarget.hasP9Vector())
FlagSet |= PPC::MOF_SubtargetBeforeP9;
else {
FlagSet |= PPC::MOF_SubtargetP9;
if (Subtarget.hasPrefixInstrs())
FlagSet |= PPC::MOF_SubtargetP10;
}
if (Subtarget.hasSPE())
FlagSet |= PPC::MOF_SubtargetSPE;
// Mark this as something we don't want to handle here if it is atomic
// or pre-increment instruction.
if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
if (LSB->isIndexed())
return PPC::MOF_None;
if (isa<AtomicSDNode>(Parent))
return PPC::MOF_None;
// Compute in-memory type flags. This is based on if there are scalars,
// floats or vectors.
const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
assert(MN && "Parent should be a MemSDNode!");
EVT MemVT = MN->getMemoryVT();
unsigned Size = MemVT.getSizeInBits();
if (MemVT.isScalarInteger()) {
assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!");
if (Size < 32)
FlagSet |= PPC::MOF_SubWordInt;
else if (Size == 32)
FlagSet |= PPC::MOF_WordInt;
else
FlagSet |= PPC::MOF_DoubleWordInt;
} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
if (Size == 128)
FlagSet |= PPC::MOF_Vector;
else if (Size == 256)
FlagSet |= PPC::MOF_Vector256;
else
llvm_unreachable("Not expecting illegal vectors!");
} else { // Floating point type: can be scalar, f128 or vector types.
if (Size == 32 || Size == 64)
FlagSet |= PPC::MOF_ScalarFloat;
else if (MemVT == MVT::f128 || MemVT.isVector())
FlagSet |= PPC::MOF_Vector;
else
llvm_unreachable("Not expecting illegal scalar floats!");
}
// Compute flags for address computation.
computeFlagsForAddressComputation(N, FlagSet, DAG);
// Compute type extension flags.
if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
switch (LN->getExtensionType()) {
case ISD::SEXTLOAD:
FlagSet |= PPC::MOF_SExt;
break;
case ISD::EXTLOAD:
case ISD::ZEXTLOAD:
FlagSet |= PPC::MOF_ZExt;
break;
case ISD::NON_EXTLOAD:
FlagSet |= PPC::MOF_NoExt;
break;
}
} else
FlagSet |= PPC::MOF_NoExt;
// For integers, no extension is the same as zero extension.
// We set the extension mode to zero extension so we don't have
// to add separate entries in AddrModesMap for loads and stores.
if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
FlagSet |= PPC::MOF_ZExt;
FlagSet &= ~PPC::MOF_NoExt;
}
// If we don't have prefixed instructions, 34-bit constants should be
// treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
bool IsNonP1034BitConst =
((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
FlagSet) == PPC::MOF_RPlusSImm34;
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
IsNonP1034BitConst)
FlagSet |= PPC::MOF_NotAddNorCst;
return FlagSet;
}
/// SelectForceXFormMode - Given the specified address, force it to be
/// represented as an indexed [r+r] operation (an XForm instruction).
PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG) const {
PPC::AddrMode Mode = PPC::AM_XForm;
int16_t ForceXFormImm = 0;
if (provablyDisjointOr(DAG, N) &&
!isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
Disp = N.getOperand(0);
Base = N.getOperand(1);
return Mode;
}
// If the address is the result of an add, we will utilize the fact that the
// address calculation includes an implicit add. However, we can reduce
// register pressure if we do not materialize a constant just for use as the
// index register. We only get rid of the add if it is not an add of a
// value and a 16-bit signed constant and both have a single use.
if (N.getOpcode() == ISD::ADD &&
(!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
!N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
Disp = N.getOperand(0);
Base = N.getOperand(1);
return Mode;
}
// Otherwise, use R0 as the base register.
Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Base = N;
return Mode;
}
/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
/// compute the address flags of the node, get the optimal address mode based
/// on the flags, and set the Base and Disp based on the address mode.
PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
MaybeAlign Align) const {
SDLoc DL(Parent);
// Compute the address flags.
unsigned Flags = computeMOFlags(Parent, N, DAG);
// Get the optimal address mode based on the Flags.
PPC::AddrMode Mode = getAddrModeForFlags(Flags);
// Set Base and Disp accordingly depending on the address mode.
switch (Mode) {
case PPC::AM_DForm:
case PPC::AM_DSForm:
case PPC::AM_DQForm: {
// This is a register plus a 16-bit immediate. The base will be the
// register and the displacement will be the immediate unless it
// isn't sufficiently aligned.
if (Flags & PPC::MOF_RPlusSImm16) {
SDValue Op0 = N.getOperand(0);
SDValue Op1 = N.getOperand(1);
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
int16_t Imm = CN->getAPIntValue().getZExtValue();
if (!Align || isAligned(*Align, Imm)) {
Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
Base = Op0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
}
break;
}
}
// This is a register plus the @lo relocation. The base is the register
// and the displacement is the global address.
else if (Flags & PPC::MOF_RPlusLo) {
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
Base = N.getOperand(0);
break;
}
// This is a constant address at most 32 bits. The base will be
// zero or load-immediate-shifted and the displacement will be
// the low 16 bits of the address.
else if (Flags & PPC::MOF_AddrIsSImm32) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
EVT CNType = CN->getValueType(0);
uint64_t CNImm = CN->getZExtValue();
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0".
int16_t Imm;
if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
Disp = DAG.getTargetConstant(Imm, DL, CNType);
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CNType);
break;
}
// Handle 32-bit sext immediate with LIS + Addr mode.
if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
(!Align || isAligned(*Align, CNImm))) {
int32_t Addr = (int32_t)CNImm;
// Otherwise, break this down into LIS + Disp.
Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
Base =
DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
break;
}
}
// Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else
Base = N;
break;
}
case PPC::AM_None:
break;
default: { // By default, X-Form is always available to be selected.
// When a frame index is not aligned, we also match by XForm.
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
Base = FI ? N : N.getOperand(1);
Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType())
: N.getOperand(0);
break;
}
}
return Mode;
}

View File

@ -671,6 +671,49 @@ namespace llvm {
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
// Flags for computing the optimal addressing mode for loads and stores.
enum MemOpFlags {
MOF_None = 0,
// Extension mode for integer loads.
MOF_SExt = 1,
MOF_ZExt = 1 << 1,
MOF_NoExt = 1 << 2,
// Address computation flags.
MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar.
MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant.
MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation
MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4.
MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16.
MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant.
MOF_RPlusR = 1 << 11, // Sum of two variables.
MOF_PCRel = 1 << 12, // PC-Relative relocation.
MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant.
// The in-memory type.
MOF_SubWordInt = 1 << 15,
MOF_WordInt = 1 << 16,
MOF_DoubleWordInt = 1 << 17,
MOF_ScalarFloat = 1 << 18, // Scalar single or double precision.
MOF_Vector = 1 << 19, // Vector types and quad precision scalars.
MOF_Vector256 = 1 << 20,
// Subtarget features.
MOF_SubtargetBeforeP9 = 1 << 22,
MOF_SubtargetP9 = 1 << 23,
MOF_SubtargetP10 = 1 << 24,
MOF_SubtargetSPE = 1 << 25
};
// The addressing modes for loads and stores.
enum AddrMode {
AM_None,
AM_DForm,
AM_DSForm,
AM_DQForm,
AM_XForm,
};
} // end namespace PPC
class PPCTargetLowering : public TargetLowering {
@ -1041,6 +1084,18 @@ namespace llvm {
unsigned JTI,
MCContext &Ctx) const override;
/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
/// compute the address flags of the node, get the optimal address mode
/// based on the flags, and set the Base and Disp based on the address mode.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N,
SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
MaybeAlign Align) const;
/// SelectForceXFormMode - Given the specified address, force it to be
/// represented as an indexed [r+r] operation (an XForm instruction).
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
/// Structure that collects some common arguments that get passed around
/// between the functions for call lowering.
struct CallFlags {
@ -1083,6 +1138,10 @@ namespace llvm {
}
};
// Map that relates a set of common address flags to PPC addressing modes.
std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap;
void initializeAddrModeMap();
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
@ -1314,6 +1373,17 @@ namespace llvm {
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
/// getAddrModeForFlags - Based on the set of address flags, select the most
/// optimal instruction format to match by.
PPC::AddrMode getAddrModeForFlags(unsigned Flags) const;
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
/// The address flags are stored in a map, which is then searched
/// through to determine the optimal load/store instruction format.
unsigned computeMOFlags(const SDNode *Parent, SDValue N,
SelectionDAG &DAG) const;
}; // end class PPCTargetLowering
namespace PPC {

View File

@ -1062,21 +1062,21 @@ let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
"lha $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi16 iaddr:$src))]>,
[(set i64:$rD, (sextloadi16 DForm:$src))]>,
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
(DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64,
(sextloadi32 DSForm:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
"lhax $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi16 xaddr:$src))]>,
[(set i64:$rD, (sextloadi16 XForm:$src))]>,
PPC970_DGroup_Cracked;
def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
[(set i64:$rD, (sextloadi32 XForm:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
@ -1117,23 +1117,23 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
[(set i64:$rD, (zextloadi8 DForm:$src))]>;
def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
"lhz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi16 iaddr:$src))]>;
[(set i64:$rD, (zextloadi16 DForm:$src))]>;
def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
"lwz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
[(set i64:$rD, (zextloadi32 DForm:$src))]>, isPPC64;
def LBZX8 : XForm_1_memOp<31, 87, (outs g8rc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 xaddr:$src))]>;
[(set i64:$rD, (zextloadi8 XForm:$src))]>;
def LHZX8 : XForm_1_memOp<31, 279, (outs g8rc:$rD), (ins memrr:$src),
"lhzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi16 xaddr:$src))]>;
[(set i64:$rD, (zextloadi16 XForm:$src))]>;
def LWZX8 : XForm_1_memOp<31, 23, (outs g8rc:$rD), (ins memrr:$src),
"lwzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi32 xaddr:$src))]>;
[(set i64:$rD, (zextloadi32 XForm:$src))]>;
// Update forms.
@ -1178,7 +1178,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64;
[(set i64:$rD, (load DSForm:$src))]>, isPPC64;
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@ -1201,10 +1201,10 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddrX4:$src))]>, isPPC64;
[(set i64:$rD, (load XForm:$src))]>, isPPC64;
def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
[(set i64:$rD, (PPClbrx ForceXForm:$src, i64))]>, isPPC64;
let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in {
def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src),
@ -1380,38 +1380,38 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
"stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i64:$rS, iaddr:$src)]>;
[(truncstorei8 i64:$rS, DForm:$src)]>;
def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
"sth $rS, $src", IIC_LdStStore,
[(truncstorei16 i64:$rS, iaddr:$src)]>;
[(truncstorei16 i64:$rS, DForm:$src)]>;
def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
"stw $rS, $src", IIC_LdStStore,
[(truncstorei32 i64:$rS, iaddr:$src)]>;
[(truncstorei32 i64:$rS, DForm:$src)]>;
def STBX8 : XForm_8_memOp<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
"stbx $rS, $dst", IIC_LdStStore,
[(truncstorei8 i64:$rS, xaddr:$dst)]>,
[(truncstorei8 i64:$rS, XForm:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8_memOp<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
"sthx $rS, $dst", IIC_LdStStore,
[(truncstorei16 i64:$rS, xaddr:$dst)]>,
[(truncstorei16 i64:$rS, XForm:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
"stwx $rS, $dst", IIC_LdStStore,
[(truncstorei32 i64:$rS, xaddr:$dst)]>,
[(truncstorei32 i64:$rS, XForm:$dst)]>,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
"std $rS, $dst", IIC_LdStSTD,
[(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64;
[(store i64:$rS, DSForm:$dst)]>, isPPC64;
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
[(store i64:$rS, XForm:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
"stdbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
[(PPCstbrx i64:$rS, ForceXForm:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
@ -1567,26 +1567,26 @@ def : Pat<(not i64:$in),
(i64not $in)>;
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
(LBZ8 iaddr:$src)>;
def : Pat<(zextloadi1 xaddr:$src),
(LBZX8 xaddr:$src)>;
def : Pat<(extloadi1 iaddr:$src),
(LBZ8 iaddr:$src)>;
def : Pat<(extloadi1 xaddr:$src),
(LBZX8 xaddr:$src)>;
def : Pat<(extloadi8 iaddr:$src),
(LBZ8 iaddr:$src)>;
def : Pat<(extloadi8 xaddr:$src),
(LBZX8 xaddr:$src)>;
def : Pat<(extloadi16 iaddr:$src),
(LHZ8 iaddr:$src)>;
def : Pat<(extloadi16 xaddr:$src),
(LHZX8 xaddr:$src)>;
def : Pat<(extloadi32 iaddr:$src),
(LWZ8 iaddr:$src)>;
def : Pat<(extloadi32 xaddr:$src),
(LWZX8 xaddr:$src)>;
def : Pat<(zextloadi1 DForm:$src),
(LBZ8 DForm:$src)>;
def : Pat<(zextloadi1 XForm:$src),
(LBZX8 XForm:$src)>;
def : Pat<(extloadi1 DForm:$src),
(LBZ8 DForm:$src)>;
def : Pat<(extloadi1 XForm:$src),
(LBZX8 XForm:$src)>;
def : Pat<(extloadi8 DForm:$src),
(LBZ8 DForm:$src)>;
def : Pat<(extloadi8 XForm:$src),
(LBZX8 XForm:$src)>;
def : Pat<(extloadi16 DForm:$src),
(LHZ8 DForm:$src)>;
def : Pat<(extloadi16 XForm:$src),
(LHZX8 XForm:$src)>;
def : Pat<(extloadi32 DForm:$src),
(LWZ8 DForm:$src)>;
def : Pat<(extloadi32 XForm:$src),
(LWZX8 XForm:$src)>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
@ -1640,15 +1640,6 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
def : Pat<(i64 (PPCtoc_entry tglobaltlsaddr:$disp, i64:$reg)),
(i64 (LDtoc tglobaltlsaddr:$disp, i64:$reg))>;
// Patterns to match r+r indexed loads and stores for
// addresses without at least 4-byte alignment.
def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)),
(LWAX xoaddr:$src)>;
def : Pat<(i64 (NonDSFormLoad xoaddr:$src)),
(LDX xoaddr:$src)>;
def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
// 64-bits atomic loads and stores
def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>;
def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>;

View File

@ -411,46 +411,46 @@ let hasSideEffects = 1 in {
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
[(set v16i8:$vD, (int_ppc_altivec_lvebx ForceXForm:$src))]>;
def LVEHX: XForm_1_memOp<31, 39, (outs vrrc:$vD), (ins memrr:$src),
"lvehx $vD, $src", IIC_LdStLoad,
[(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
[(set v8i16:$vD, (int_ppc_altivec_lvehx ForceXForm:$src))]>;
def LVEWX: XForm_1_memOp<31, 71, (outs vrrc:$vD), (ins memrr:$src),
"lvewx $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
[(set v4i32:$vD, (int_ppc_altivec_lvewx ForceXForm:$src))]>;
def LVX : XForm_1_memOp<31, 103, (outs vrrc:$vD), (ins memrr:$src),
"lvx $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
[(set v4i32:$vD, (int_ppc_altivec_lvx ForceXForm:$src))]>;
def LVXL : XForm_1_memOp<31, 359, (outs vrrc:$vD), (ins memrr:$src),
"lvxl $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
[(set v4i32:$vD, (int_ppc_altivec_lvxl ForceXForm:$src))]>;
}
def LVSL : XForm_1_memOp<31, 6, (outs vrrc:$vD), (ins memrr:$src),
"lvsl $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
[(set v16i8:$vD, (int_ppc_altivec_lvsl ForceXForm:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1_memOp<31, 38, (outs vrrc:$vD), (ins memrr:$src),
"lvsr $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
[(set v16i8:$vD, (int_ppc_altivec_lvsr ForceXForm:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores.
def STVEBX: XForm_8_memOp<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
"stvebx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
[(int_ppc_altivec_stvebx v16i8:$rS, ForceXForm:$dst)]>;
def STVEHX: XForm_8_memOp<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
"stvehx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
[(int_ppc_altivec_stvehx v8i16:$rS, ForceXForm:$dst)]>;
def STVEWX: XForm_8_memOp<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
"stvewx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
[(int_ppc_altivec_stvewx v4i32:$rS, ForceXForm:$dst)]>;
def STVX : XForm_8_memOp<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
"stvx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
[(int_ppc_altivec_stvx v4i32:$rS, ForceXForm:$dst)]>;
def STVXL : XForm_8_memOp<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
"stvxl $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
[(int_ppc_altivec_stvxl v4i32:$rS, ForceXForm:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
@ -894,11 +894,11 @@ def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
def : Pat<(v4i32 (load ForceXForm:$src)), (LVX ForceXForm:$src)>;
// Stores.
def : Pat<(store v4i32:$rS, xoaddr:$dst),
(STVX $rS, xoaddr:$dst)>;
def : Pat<(store v4i32:$rS, ForceXForm:$dst),
(STVX $rS, ForceXForm:$dst)>;
// Bit conversions.
def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;

View File

@ -1143,6 +1143,13 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
// PC Relative Address
def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
// Load and Store Instruction Selection addressing modes.
def DForm : ComplexPattern<iPTR, 2, "SelectDForm", [], [SDNPWantParent]>;
def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm", [], [SDNPWantParent]>;
def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm", [], [SDNPWantParent]>;
def XForm : ComplexPattern<iPTR, 2, "SelectXForm", [], [SDNPWantParent]>;
def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm", [], [SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
def In32BitMode : Predicate<"!Subtarget->isPPC64()">;
@ -2221,25 +2228,25 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
[(set i32:$rD, (zextloadi8 DForm:$src))]>;
def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
"lha $rD, $src", IIC_LdStLHA,
[(set i32:$rD, (sextloadi16 iaddr:$src))]>,
[(set i32:$rD, (sextloadi16 DForm:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
"lhz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi16 iaddr:$src))]>;
[(set i32:$rD, (zextloadi16 DForm:$src))]>;
def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
"lwz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load iaddr:$src))]>;
[(set i32:$rD, (load DForm:$src))]>;
let Predicates = [HasFPU] in {
def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
"lfs $rD, $src", IIC_LdStLFD,
[(set f32:$rD, (load iaddr:$src))]>;
[(set f32:$rD, (load DForm:$src))]>;
def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
"lfd $rD, $src", IIC_LdStLFD,
[(set f64:$rD, (load iaddr:$src))]>;
[(set f64:$rD, (load DForm:$src))]>;
}
@ -2324,17 +2331,17 @@ def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in {
def LBZX : XForm_1_memOp<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
[(set i32:$rD, (zextloadi8 XForm:$src))]>;
def LHAX : XForm_1_memOp<31, 343, (outs gprc:$rD), (ins memrr:$src),
"lhax $rD, $src", IIC_LdStLHA,
[(set i32:$rD, (sextloadi16 xaddr:$src))]>,
[(set i32:$rD, (sextloadi16 XForm:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1_memOp<31, 279, (outs gprc:$rD), (ins memrr:$src),
"lhzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi16 xaddr:$src))]>;
[(set i32:$rD, (zextloadi16 XForm:$src))]>;
def LWZX : XForm_1_memOp<31, 23, (outs gprc:$rD), (ins memrr:$src),
"lwzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
[(set i32:$rD, (load XForm:$src))]>;
def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$rD), (ins memrr:$src),
"lhbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
@ -2345,10 +2352,10 @@ def LWBRX : XForm_1_memOp<31, 534, (outs gprc:$rD), (ins memrr:$src),
let Predicates = [HasFPU] in {
def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$frD), (ins memrr:$src),
"lfsx $frD, $src", IIC_LdStLFD,
[(set f32:$frD, (load xaddr:$src))]>;
[(set f32:$frD, (load XForm:$src))]>;
def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$frD), (ins memrr:$src),
"lfdx $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (load xaddr:$src))]>;
[(set f64:$frD, (load XForm:$src))]>;
def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$frD), (ins memrr:$src),
"lfiwax $frD, $src", IIC_LdStLFD,
@ -2372,20 +2379,20 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst),
"stb $rS, $dst", IIC_LdStStore,
[(truncstorei8 i32:$rS, iaddr:$dst)]>;
[(truncstorei8 i32:$rS, DForm:$dst)]>;
def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst),
"sth $rS, $dst", IIC_LdStStore,
[(truncstorei16 i32:$rS, iaddr:$dst)]>;
[(truncstorei16 i32:$rS, DForm:$dst)]>;
def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst),
"stw $rS, $dst", IIC_LdStStore,
[(store i32:$rS, iaddr:$dst)]>;
[(store i32:$rS, DForm:$dst)]>;
let Predicates = [HasFPU] in {
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
"stfs $rS, $dst", IIC_LdStSTFD,
[(store f32:$rS, iaddr:$dst)]>;
[(store f32:$rS, DForm:$dst)]>;
def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
"stfd $rS, $dst", IIC_LdStSTFD,
[(store f64:$rS, iaddr:$dst)]>;
[(store f64:$rS, DForm:$dst)]>;
}
}
@ -2428,15 +2435,15 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
let PPC970_Unit = 2 in {
def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
"stbx $rS, $dst", IIC_LdStStore,
[(truncstorei8 i32:$rS, xaddr:$dst)]>,
[(truncstorei8 i32:$rS, XForm:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
"sthx $rS, $dst", IIC_LdStStore,
[(truncstorei16 i32:$rS, xaddr:$dst)]>,
[(truncstorei16 i32:$rS, XForm:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
"stwx $rS, $dst", IIC_LdStStore,
[(store i32:$rS, xaddr:$dst)]>,
[(store i32:$rS, XForm:$dst)]>,
PPC970_DGroup_Cracked;
def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
@ -2455,10 +2462,10 @@ def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
"stfsx $frS, $dst", IIC_LdStSTFD,
[(store f32:$frS, xaddr:$dst)]>;
[(store f32:$frS, XForm:$dst)]>;
def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
"stfdx $frS, $dst", IIC_LdStSTFD,
[(store f64:$frS, xaddr:$dst)]>;
[(store f64:$frS, XForm:$dst)]>;
}
}
@ -3558,27 +3565,27 @@ def : Pat<(srl i32:$rS, i32:$rB),
def : Pat<(shl i32:$rS, i32:$rB),
(SLW $rS, $rB)>;
def : Pat<(i32 (zextloadi1 iaddr:$src)),
(LBZ iaddr:$src)>;
def : Pat<(i32 (zextloadi1 xaddr:$src)),
(LBZX xaddr:$src)>;
def : Pat<(i32 (extloadi1 iaddr:$src)),
(LBZ iaddr:$src)>;
def : Pat<(i32 (extloadi1 xaddr:$src)),
(LBZX xaddr:$src)>;
def : Pat<(i32 (extloadi8 iaddr:$src)),
(LBZ iaddr:$src)>;
def : Pat<(i32 (extloadi8 xaddr:$src)),
(LBZX xaddr:$src)>;
def : Pat<(i32 (extloadi16 iaddr:$src)),
(LHZ iaddr:$src)>;
def : Pat<(i32 (extloadi16 xaddr:$src)),
(LHZX xaddr:$src)>;
def : Pat<(i32 (zextloadi1 DForm:$src)),
(LBZ DForm:$src)>;
def : Pat<(i32 (zextloadi1 XForm:$src)),
(LBZX XForm:$src)>;
def : Pat<(i32 (extloadi1 DForm:$src)),
(LBZ DForm:$src)>;
def : Pat<(i32 (extloadi1 XForm:$src)),
(LBZX XForm:$src)>;
def : Pat<(i32 (extloadi8 DForm:$src)),
(LBZ DForm:$src)>;
def : Pat<(i32 (extloadi8 XForm:$src)),
(LBZX XForm:$src)>;
def : Pat<(i32 (extloadi16 DForm:$src)),
(LHZ DForm:$src)>;
def : Pat<(i32 (extloadi16 XForm:$src)),
(LHZX XForm:$src)>;
let Predicates = [HasFPU] in {
def : Pat<(f64 (extloadf32 iaddr:$src)),
(COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
def : Pat<(f64 (extloadf32 DForm:$src)),
(COPY_TO_REGCLASS (LFS DForm:$src), F8RC)>;
def : Pat<(f64 (extloadf32 XForm:$src)),
(COPY_TO_REGCLASS (LFSX XForm:$src), F8RC)>;
def : Pat<(f64 (any_fpextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;

View File

@ -2539,14 +2539,14 @@ let Predicates = [IsISA3_1] in {
def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
(v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
(v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
(v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 8)),
(v1i128 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 16)),
(v1i128 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 32)),
(v1i128 (COPY_TO_REGCLASS (LXVRWX ForceXForm:$src), VRRC))>;
def : Pat <(v1i128 (PPClxvrzx ForceXForm:$src, 64)),
(v1i128 (COPY_TO_REGCLASS (LXVRDX ForceXForm:$src), VRRC))>;
def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
(v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
@ -2564,23 +2564,23 @@ let Predicates = [IsISA3_1, HasVSX] in {
let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
// Store element 0 of a VSX register to memory
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst),
(STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst),
(STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>;
def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst),
(STXVRWX $src, xoaddr:$dst)>;
def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst),
(STXVRWX $src, xoaddr:$dst)>;
def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst),
(STXVRDX $src, xoaddr:$dst)>;
def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst),
(STXVRDX $src, xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), ForceXForm:$dst),
(STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), ForceXForm:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), ForceXForm:$dst),
(STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), ForceXForm:$dst)>;
def : Pat<(store (i32 (extractelt v4i32:$src, 0)), ForceXForm:$dst),
(STXVRWX $src, ForceXForm:$dst)>;
def : Pat<(store (f32 (extractelt v4f32:$src, 0)), ForceXForm:$dst),
(STXVRWX $src, ForceXForm:$dst)>;
def : Pat<(store (i64 (extractelt v2i64:$src, 0)), ForceXForm:$dst),
(STXVRDX $src, ForceXForm:$dst)>;
def : Pat<(store (f64 (extractelt v2f64:$src, 0)), ForceXForm:$dst),
(STXVRDX $src, ForceXForm:$dst)>;
// Load element 0 of a VSX register to memory
def : Pat<(v8i16 (scalar_to_vector (i32 (extloadi16 xoaddr:$src)))),
(v8i16 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VSRC))>;
def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 xoaddr:$src)))),
(v16i8 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VSRC))>;
def : Pat<(v8i16 (scalar_to_vector (i32 (extloadi16 ForceXForm:$src)))),
(v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>;
def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))),
(v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>;
}
// FIXME: The swap is overkill when the shift amount is a constant.

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@ define dso_local void @AlignDSForm() local_unnamed_addr {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, best8x8mode@toc@ha
; CHECK-NEXT: addi r3, r3, best8x8mode@toc@l
; CHECK-NEXT: ldx r3, 0, r3
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: std r3, 0(r3)
entry:
%0 = load <4 x i16>, <4 x i16>* bitcast ([4 x i16]* @best8x8mode to <4 x i16>*), align 2