[ARM] Invert CSEL predicates if the opposite is a simpler constant to materialise

This moves ConstantMaterializationCost into ARMBaseInstrInfo so that it can also be used in ISel Lowering, adding codesize values to the computed costs, to be able to compare either approximate instruction counts or codesize costs. It also adds a HasLowerConstantMaterializationCost, which compares the ConstantMaterializationCost of two values, returning true if the first is smaller either in instruction count/codesize, or falling back to the other in the case that they are equal. This is used in constant CSEL lowering to invert the predicate if the opposite is easier to materialise. Differential revision: https://reviews.llvm.org/D66701 llvm-svn: 370741
2024-11-23 03:02:36 +01:00 · 2019-09-03 11:06:24 +00:00 · 2019-09-03 11:06:24 +00:00 · dbd4d84623
commit dbd4d84623
parent 2cffd5437a
5 changed files with 79 additions and 34 deletions
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@ -5354,3 +5354,50 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
  return &*CmpMI;
 }
 unsigned llvm::ConstantMaterializationCost(unsigned Val,
                                           const ARMSubtarget *Subtarget,
                                           bool ForCodesize) {
  if (Subtarget->isThumb()) {
    if (Val <= 255) // MOV
      return ForCodesize ? 2 : 1;
    if (Subtarget->hasV6T2Ops() && (Val <= 0xffff ||                    // MOV
                                    ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
                                    ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
      return ForCodesize ? 4 : 1;
    if (Val <= 510) // MOV + ADDi8
      return ForCodesize ? 4 : 2;
    if (~Val <= 255) // MOV + MVN
      return ForCodesize ? 4 : 2;
    if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL
      return ForCodesize ? 4 : 2;
  } else {
    if (ARM_AM::getSOImmVal(Val) != -1) // MOV
      return ForCodesize ? 4 : 1;
    if (ARM_AM::getSOImmVal(~Val) != -1) // MVN
      return ForCodesize ? 4 : 1;
    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
      return ForCodesize ? 4 : 1;
    if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
      return ForCodesize ? 8 : 2;
  }
  if (Subtarget->useMovt()) // MOVW + MOVT
    return ForCodesize ? 8 : 2;
  return ForCodesize ? 8 : 3; // Literal pool load
 }
 bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
                                               const ARMSubtarget *Subtarget,
                                               bool ForCodesize) {
  // Check with ForCodesize
  unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);
  unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);
  if (Cost1 < Cost2)
    return true;
  if (Cost1 > Cost2)
    return false;
  // If they are equal, try with !ForCodesize
  return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
         ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
 }
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@ -621,6 +621,20 @@ void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
 void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
                              unsigned Inactive);
 /// Returns the number of instructions required to materialize the given
 /// constant in a register, or 3 if a literal pool load is needed.
 /// If ForCodesize is specified, an approximate cost in bytes is returned.
 unsigned ConstantMaterializationCost(unsigned Val,
                                     const ARMSubtarget *Subtarget,
                                     bool ForCodesize = false);
 /// Returns true if Val1 has a lower Constant Materialization Cost than Val2.
 /// Uses the cost from ConstantMaterializationCost, first with ForCodesize as
 /// specified. If the scores are equal, return the comparison for !ForCodesize.
 bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
                                         const ARMSubtarget *Subtarget,
                                         bool ForCodesize = false);
 } // end namespace llvm
 #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@ -250,10 +250,6 @@ private:
  SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
                        bool is64BitVector);
  /// Returns the number of instructions required to materialize the given
  /// constant in a register, or 3 if a literal pool load is needed.
  unsigned ConstantMaterializationCost(unsigned Val) const;
  /// Checks if N is a multiplication by a constant where we can extract out a
  /// power of two from the constant so that it can be used in a shift, but only
  /// if it simplifies the materialization of the constant. Returns true if it
@ -454,27 +450,6 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
         (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 }
 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
  if (Subtarget->isThumb()) {
    if (Val <= 255) return 1;                               // MOV
    if (Subtarget->hasV6T2Ops() &&
        (Val <= 0xffff ||                                   // MOV
         ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
         ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
      return 1;
    if (Val <= 510) return 2;                               // MOV + ADDi8
    if (~Val <= 255) return 2;                              // MOV + MVN
    if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
  } else {
    if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
    if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
    if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
  }
  if (Subtarget->useMovt()) return 2; // MOVW + MOVT
  return 3; // Literal pool load
 }
 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
                                             unsigned MaxShift,
                                             unsigned &PowerOfTwo,
@ -504,8 +479,8 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
  // Only optimise if the new cost is better
  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
-  unsigned OldCost = ConstantMaterializationCost(MulConstVal);
+  unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
-  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
  return NewCost < OldCost;
 }
@ -2791,7 +2766,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
  case ISD::Constant: {
    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
    // If we can't materialize the constant we need to use a literal pool
-    if (ConstantMaterializationCost(Val) > 2) {
+    if (ConstantMaterializationCost(Val, Subtarget) > 2) {
      SDValue CPIdx = CurDAG->getTargetConstantPool(
          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
          TLI->getPointerTy(CurDAG->getDataLayout()));
@ -2932,8 +2907,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
      bool PreferImmediateEncoding =
        Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
      if (!PreferImmediateEncoding &&
-          ConstantMaterializationCost(Imm) >
+          ConstantMaterializationCost(Imm, Subtarget) >
-              ConstantMaterializationCost(~Imm)) {
+              ConstantMaterializationCost(~Imm, Subtarget)) {
        // The current immediate costs more to materialize than a negated
        // immediate, so negate the immediate and use a BIC.
        SDValue NewImm =
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -4841,6 +4841,15 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    }
    if (Opcode) {
      // If one of the constants is cheaper than another, materialise the
      // cheaper one and let the csel generate the other.
      if (Opcode != ARMISD::CSINC &&
          HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
        std::swap(TrueVal, FalseVal);
        std::swap(TVal, FVal);
        CC = ISD::getSetCCInverse(CC, true);
      }
      // Attempt to use ZR checking TVal is 0, possibly inverting the condition
      // to get there. CSINC not is invertable like the other two (~(~a) == a,
      // -(-a) == a, but (a+1)+1 != a).
--- a/test/CodeGen/Thumb2/csel.ll
+++ b/test/CodeGen/Thumb2/csel.ll
@ -42,9 +42,9 @@ entry:
 define i32 @csinv_const_56(i32 %a) {
 ; CHECK-LABEL: csinv_const_56:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    mvn r1, #5
+; CHECK-NEXT:    movs r1, #5
 ; CHECK-NEXT:    cmp r0, #45
-; CHECK-NEXT:    csinv r0, r1, r1, gt
+; CHECK-NEXT:    csinv r0, r1, r1, le
 ; CHECK-NEXT:    bx lr
 entry:
  %cmp = icmp sgt i32 %a, 45
@ -93,9 +93,9 @@ entry:
 define i32 @csneg_const_r(i32 %a) {
 ; CHECK-LABEL: csneg_const_r:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    mov.w r1, #-1
+; CHECK-NEXT:    movs r1, #1
 ; CHECK-NEXT:    cmp r0, #45
-; CHECK-NEXT:    csneg r0, r1, r1, gt
+; CHECK-NEXT:    csneg r0, r1, r1, le
 ; CHECK-NEXT:    bx lr
 entry:
  %cmp = icmp sgt i32 %a, 45