1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[ARM] Invert CSEL predicates if the opposite is a simpler constant to materialise

This moves ConstantMaterializationCost into ARMBaseInstrInfo so that it can
also be used in ISel Lowering, adding codesize values to the computed costs, to
be able to compare either approximate instruction counts or codesize costs.

It also adds a HasLowerConstantMaterializationCost, which compares the
ConstantMaterializationCost of two values, returning true if the first is
smaller either in instruction count/codesize, or falling back to the other in
the case that they are equal.

This is used in constant CSEL lowering to invert the predicate if the opposite
is easier to materialise.

Differential revision: https://reviews.llvm.org/D66701

llvm-svn: 370741
This commit is contained in:
David Green 2019-09-03 11:06:24 +00:00
parent 2cffd5437a
commit dbd4d84623
5 changed files with 79 additions and 34 deletions

View File

@ -5354,3 +5354,50 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
return &*CmpMI;
}
unsigned llvm::ConstantMaterializationCost(unsigned Val,
const ARMSubtarget *Subtarget,
bool ForCodesize) {
if (Subtarget->isThumb()) {
if (Val <= 255) // MOV
return ForCodesize ? 2 : 1;
if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
return ForCodesize ? 4 : 1;
if (Val <= 510) // MOV + ADDi8
return ForCodesize ? 4 : 2;
if (~Val <= 255) // MOV + MVN
return ForCodesize ? 4 : 2;
if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
return ForCodesize ? 4 : 2;
} else {
if (ARM_AM::getSOImmVal(Val) != -1) // MOV
return ForCodesize ? 4 : 1;
if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
return ForCodesize ? 4 : 1;
if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
return ForCodesize ? 4 : 1;
if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
return ForCodesize ? 8 : 2;
}
if (Subtarget->useMovt()) // MOVW + MOVT
return ForCodesize ? 8 : 2;
return ForCodesize ? 8 : 3; // Literal pool load
}
bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
const ARMSubtarget *Subtarget,
bool ForCodesize) {
// Check with ForCodesize
unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
if (Cost1 < Cost2)
return true;
if (Cost1 > Cost2)
return false;
// If they are equal, try with !ForCodesize
return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
}

View File

@ -621,6 +621,20 @@ void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
unsigned Inactive);
/// Returns the number of instructions required to materialize the given
/// constant in a register, or 3 if a literal pool load is needed.
/// If ForCodesize is specified, an approximate cost in bytes is returned.
unsigned ConstantMaterializationCost(unsigned Val,
const ARMSubtarget *Subtarget,
bool ForCodesize = false);
/// Returns true if Val1 has a lower Constant Materialization Cost than Val2.
/// Uses the cost from ConstantMaterializationCost, first with ForCodesize as
/// specified. If the scores are equal, return the comparison for !ForCodesize.
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
const ARMSubtarget *Subtarget,
bool ForCodesize = false);
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H

View File

@ -250,10 +250,6 @@ private:
SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
bool is64BitVector);
/// Returns the number of instructions required to materialize the given
/// constant in a register, or 3 if a literal pool load is needed.
unsigned ConstantMaterializationCost(unsigned Val) const;
/// Checks if N is a multiplication by a constant where we can extract out a
/// power of two from the constant so that it can be used in a shift, but only
/// if it simplifies the materialization of the constant. Returns true if it
@ -454,27 +450,6 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
if (Subtarget->isThumb()) {
if (Val <= 255) return 1; // MOV
if (Subtarget->hasV6T2Ops() &&
(Val <= 0xffff || // MOV
ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
return 1;
if (Val <= 510) return 2; // MOV + ADDi8
if (~Val <= 255) return 2; // MOV + MVN
if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
} else {
if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
}
if (Subtarget->useMovt()) return 2; // MOVW + MOVT
return 3; // Literal pool load
}
bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
unsigned MaxShift,
unsigned &PowerOfTwo,
@ -504,8 +479,8 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
// Only optimise if the new cost is better
unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
unsigned OldCost = ConstantMaterializationCost(MulConstVal);
unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
return NewCost < OldCost;
}
@ -2791,7 +2766,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
// If we can't materialize the constant we need to use a literal pool
if (ConstantMaterializationCost(Val) > 2) {
if (ConstantMaterializationCost(Val, Subtarget) > 2) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
@ -2932,8 +2907,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
bool PreferImmediateEncoding =
Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
if (!PreferImmediateEncoding &&
ConstantMaterializationCost(Imm) >
ConstantMaterializationCost(~Imm)) {
ConstantMaterializationCost(Imm, Subtarget) >
ConstantMaterializationCost(~Imm, Subtarget)) {
// The current immediate costs more to materialize than a negated
// immediate, so negate the immediate and use a BIC.
SDValue NewImm =

View File

@ -4841,6 +4841,15 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
if (Opcode) {
// If one of the constants is cheaper than another, materialise the
// cheaper one and let the csel generate the other.
if (Opcode != ARMISD::CSINC &&
HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);
CC = ISD::getSetCCInverse(CC, true);
}
// Attempt to use ZR checking TVal is 0, possibly inverting the condition
// to get there. CSINC not is invertable like the other two (~(~a) == a,
// -(-a) == a, but (a+1)+1 != a).

View File

@ -42,9 +42,9 @@ entry:
define i32 @csinv_const_56(i32 %a) {
; CHECK-LABEL: csinv_const_56:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mvn r1, #5
; CHECK-NEXT: movs r1, #5
; CHECK-NEXT: cmp r0, #45
; CHECK-NEXT: csinv r0, r1, r1, gt
; CHECK-NEXT: csinv r0, r1, r1, le
; CHECK-NEXT: bx lr
entry:
%cmp = icmp sgt i32 %a, 45
@ -93,9 +93,9 @@ entry:
define i32 @csneg_const_r(i32 %a) {
; CHECK-LABEL: csneg_const_r:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov.w r1, #-1
; CHECK-NEXT: movs r1, #1
; CHECK-NEXT: cmp r0, #45
; CHECK-NEXT: csneg r0, r1, r1, gt
; CHECK-NEXT: csneg r0, r1, r1, le
; CHECK-NEXT: bx lr
entry:
%cmp = icmp sgt i32 %a, 45