[FastISel][AArch64] Fold mul into add/sub and logical operations.

Try to fold the multiply into the add/sub or logical operations (when possible). This is related to rdar://problem/18369687. llvm-svn: 217978
2025-01-31 20:51:52 +01:00 · 2014-09-17 19:51:38 +00:00 · 2014-09-17 19:51:38 +00:00 · df7d94ca78
commit df7d94ca78
parent 6305202d76
2 changed files with 117 additions and 19 deletions
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@ -949,8 +949,13 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
  if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
    std::swap(LHS, RHS);
  // Canonicalize mul by power of 2 to the RHS.
  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    if (isMulPowOf2(LHS))
      std::swap(LHS, RHS);
  // Canonicalize shift immediate to the RHS.
-  if (UseAdd && isValueAvailable(LHS))
+  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
      if (isa<ConstantInt>(SI->getOperand(1)))
        if (SI->getOpcode() == Instruction::Shl  ||
@ -980,7 +985,8 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
    return ResultReg;
  // Only extend the RHS within the instruction if there is a valid extend type.
-  if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) {
+  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
      isValueAvailable(RHS)) {
    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
        if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
@ -1000,8 +1006,28 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
                         ExtendType, 0, SetFlags, WantResult);
  }
  // Check if the mul can be folded into the instruction.
  if (RHS->hasOneUse() && isValueAvailable(RHS))
    if (isMulPowOf2(RHS)) {
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
        if (C->getValue().isPowerOf2())
          std::swap(MulLHS, MulRHS);
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
      unsigned RHSReg = getRegForValue(MulLHS);
      if (!RHSReg)
        return 0;
      bool RHSIsKill = hasTrivialKill(MulLHS);
      return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
                           AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
    }
  // Check if the shift can be folded into the instruction.
-  if (isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS))
    if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
@ -1296,12 +1322,16 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
    std::swap(LHS, RHS);
  // Canonicalize mul by power-of-2 to the RHS.
  if (LHS->hasOneUse() && isValueAvailable(LHS))
    if (isMulPowOf2(LHS))
      std::swap(LHS, RHS);
  // Canonicalize shift immediate to the RHS.
-  if (isValueAvailable(LHS))
+  if (LHS->hasOneUse() && isValueAvailable(LHS))
-    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
+    if (const auto *SI = dyn_cast<ShlOperator>(LHS))
      if (isa<ConstantInt>(SI->getOperand(1)))
-        if (SI->getOpcode() == Instruction::Shl)
+        std::swap(LHS, RHS);
          std::swap(LHS, RHS);
  unsigned LHSReg = getRegForValue(LHS);
  if (!LHSReg)
@ -1316,19 +1346,39 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
  if (ResultReg)
    return ResultReg;
  // Check if the mul can be folded into the instruction.
  if (RHS->hasOneUse() && isValueAvailable(RHS))
    if (isMulPowOf2(RHS)) {
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
        if (C->getValue().isPowerOf2())
          std::swap(MulLHS, MulRHS);
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
      unsigned RHSReg = getRegForValue(MulLHS);
      if (!RHSReg)
        return 0;
      bool RHSIsKill = hasTrivialKill(MulLHS);
      return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
                              RHSIsKill, ShiftVal);
    }
  // Check if the shift can be folded into the instruction.
-  if (isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS))
-    if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
+    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
-      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
+      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
-        if (SI->getOpcode() == Instruction::Shl) {
+        uint64_t ShiftVal = C->getZExtValue();
-          uint64_t ShiftVal = C->getZExtValue();
+        unsigned RHSReg = getRegForValue(SI->getOperand(0));
-          unsigned RHSReg = getRegForValue(SI->getOperand(0));
+        if (!RHSReg)
-          if (!RHSReg)
+          return 0;
-            return 0;
+        bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
-          bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+        return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
-          return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                RHSIsKill, ShiftVal);
-                                  RHSIsKill, ShiftVal);
+      }
        }
  unsigned RHSReg = getRegForValue(RHS);
  if (!RHSReg)
--- a/test/CodeGen/AArch64/fast-isel-logic-op.ll
+++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll
@ -108,6 +108,22 @@ define i64 @and_rs_i64(i64 %a, i64 %b) {
  ret i64 %2
 }
 define i32 @and_mul_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: and_mul_i32
 ; CHECK:       and w0, w0, w1, lsl #2
  %1 = mul i32 %b, 4
  %2 = and i32 %a, %1
  ret i32 %2
 }
 define i64 @and_mul_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: and_mul_i64
 ; CHECK:       and x0, x0, x1, lsl #2
  %1 = mul i64 %b, 4
  %2 = and i64 %a, %1
  ret i64 %2
 }
 ; OR
 define zeroext i1 @or_rr_i1(i1 signext %a, i1 signext %b) {
 ; CHECK-LABEL: or_rr_i1
@ -210,6 +226,22 @@ define i64 @or_rs_i64(i64 %a, i64 %b) {
  ret i64 %2
 }
 define i32 @or_mul_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: or_mul_i32
 ; CHECK:       orr w0, w0, w1, lsl #2
  %1 = mul i32 %b, 4
  %2 = or i32 %a, %1
  ret i32 %2
 }
 define i64 @or_mul_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: or_mul_i64
 ; CHECK:       orr x0, x0, x1, lsl #2
  %1 = mul i64 %b, 4
  %2 = or i64 %a, %1
  ret i64 %2
 }
 ; XOR
 define zeroext i1 @xor_rr_i1(i1 signext %a, i1 signext %b) {
 ; CHECK-LABEL: xor_rr_i1
@ -312,3 +344,19 @@ define i64 @xor_rs_i64(i64 %a, i64 %b) {
  ret i64 %2
 }
 define i32 @xor_mul_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: xor_mul_i32
 ; CHECK:       eor w0, w0, w1, lsl #2
  %1 = mul i32 %b, 4
  %2 = xor i32 %a, %1
  ret i32 %2
 }
 define i64 @xor_mul_i64(i64 %a, i64 %b) {
 ; CHECK-LABEL: xor_mul_i64
 ; CHECK:       eor x0, x0, x1, lsl #2
  %1 = mul i64 %b, 4
  %2 = xor i64 %a, %1
  ret i64 %2
 }