From 8ec32a2ae04b3c41f54de59b7c38c177c2cadcba Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 29 Apr 2021 09:39:21 -0700 Subject: [PATCH] [RISCV] Teach DAG combine to fold (and (select_cc lhs, rhs, cc, -1, c), x) -> (select_cc lhs, rhs, cc, x, (and, x, c)) Similar for or/xor with 0 in place of -1. This is the canonical form produced by InstCombine for something like `c ? x & y : x;` Since we have to use control flow to expand select we'll usually end up with a mv in basic block. By folding this we may be able to pull the and/or/xor into the block instead and avoid a mv instruction. The code here is based on code from ARM that uses this to create predicated instructions. I'm doing it on SELECT_CC so it happens late, but we could do it on select earlier which is what ARM does. I'm not sure if we lose any combine opportunities if we do it earlier. I left out add and sub because this can separate sext.w from the add/sub. It also made a conditional i64 addition/subtraction on RV32 worse. I guess both of those would be fixed by doing this earlier on select. The select-binop-identity.ll test has not been commited yet, but I made the diff show the changes to it. Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D101485 --- lib/Target/RISCV/RISCVISelLowering.cpp | 111 +++++- test/CodeGen/RISCV/rv32zbb-zbp.ll | 354 +++++++++----------- test/CodeGen/RISCV/rv32zbs.ll | 30 +- test/CodeGen/RISCV/rv32zbt.ll | 120 +++---- test/CodeGen/RISCV/select-binop-identity.ll | 83 ++--- 5 files changed, 364 insertions(+), 334 deletions(-) diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index 571eae6cb75..75ea3aa5d73 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -772,9 +772,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // We can use any register for comparisons setHasMultipleConditionRegisters(); - if (Subtarget.hasStdExtZbp()) { - setTargetDAGCombine(ISD::OR); - } + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); if (Subtarget.hasStdExtV()) { setTargetDAGCombine(ISD::FCOPYSIGN); setTargetDAGCombine(ISD::MGATHER); @@ -5213,6 +5213,99 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType())); } +// Combine a constant select operand into its use: +// +// (and (select_cc lhs, rhs, cc, -1, c), x) +// -> (select_cc lhs, rhs, cc, x, (and, x, c)) [AllOnes=1] +// (or (select_cc lhs, rhs, cc, 0, c), x) +// -> (select_cc lhs, rhs, cc, x, (or, x, c)) [AllOnes=0] +// (xor (select_cc lhs, rhs, cc, 0, c), x) +// -> (select_cc lhs, rhs, cc, x, (xor, x, c)) [AllOnes=0] +static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, + SelectionDAG &DAG, bool AllOnes) { + EVT VT = N->getValueType(0); + + if (Slct.getOpcode() != RISCVISD::SELECT_CC || !Slct.hasOneUse()) + return SDValue(); + + auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { + return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); + }; + + bool SwapSelectOps; + SDValue TrueVal = Slct.getOperand(3); + SDValue FalseVal = Slct.getOperand(4); + SDValue NonConstantVal; + if (isZeroOrAllOnes(TrueVal, AllOnes)) { + SwapSelectOps = false; + NonConstantVal = FalseVal; + } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { + SwapSelectOps = true; + NonConstantVal = TrueVal; + } else + return SDValue(); + + // Slct is now know to be the desired identity constant when CC is true. + TrueVal = OtherOp; + FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); + // Unless SwapSelectOps says CC should be false. + if (SwapSelectOps) + std::swap(TrueVal, FalseVal); + + return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, + {Slct.getOperand(0), Slct.getOperand(1), + Slct.getOperand(2), TrueVal, FalseVal}); +} + +// Attempt combineSelectAndUse on each operand of a commutative operator N. +static SDValue combineSelectCCAndUseCommutative(SDNode *N, SelectionDAG &DAG, + bool AllOnes) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue Result = combineSelectCCAndUse(N, N0, N1, DAG, AllOnes)) + return Result; + if (SDValue Result = combineSelectCCAndUse(N, N1, N0, DAG, AllOnes)) + return Result; + return SDValue(); +} + +static SDValue performANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + // fold (and (select_cc lhs, rhs, cc, -1, y), x) -> + // (select lhs, rhs, cc, x, (and x, y)) + return combineSelectCCAndUseCommutative(N, DAG, true); +} + +static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; + if (Subtarget.hasStdExtZbp()) { + if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget)) + return GREV; + if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget)) + return GORC; + if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget)) + return SHFL; + } + + // fold (or (select_cc lhs, rhs, cc, 0, y), x) -> + // (select lhs, rhs, cc, x, (or x, y)) + return combineSelectCCAndUseCommutative(N, DAG, false); +} + +static SDValue performXORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + // fold (xor (select_cc lhs, rhs, cc, 0, y), x) -> + // (select lhs, rhs, cc, x, (xor x, y)) + return combineSelectCCAndUseCommutative(N, DAG, false); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5431,14 +5524,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, DAG.getConstant(~SignBit, DL, MVT::i64)); } + case ISD::AND: + return performANDCombine(N, DCI, Subtarget); case ISD::OR: - if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget)) - return GREV; - if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget)) - return GORC; - if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget)) - return SHFL; - break; + return performORCombine(N, DCI, Subtarget); + case ISD::XOR: + return performXORCombine(N, DCI, Subtarget); case RISCVISD::SELECT_CC: { // Transform SDValue LHS = N->getOperand(0); diff --git a/test/CodeGen/RISCV/rv32zbb-zbp.ll b/test/CodeGen/RISCV/rv32zbb-zbp.ll index 18cca70b11b..b8856124bb9 100644 --- a/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -218,48 +218,43 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: rol_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a2, 63 -; RV32I-NEXT: addi t1, a3, -32 +; RV32I-NEXT: mv a7, a1 +; RV32I-NEXT: andi a1, a2, 63 +; RV32I-NEXT: addi t0, a1, -32 ; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz t1, .LBB7_2 +; RV32I-NEXT: bltz t0, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a7, a0, t1 +; RV32I-NEXT: sll a1, a0, t0 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a4, a1, a2 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: srl a3, a5, a3 -; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: sll a3, a7, a2 +; RV32I-NEXT: sub a1, a6, a1 +; RV32I-NEXT: srli a4, a0, 1 +; RV32I-NEXT: srl a1, a4, a1 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: andi a5, a4, 63 -; RV32I-NEXT: addi a3, a5, -32 -; RV32I-NEXT: bltz a3, .LBB7_7 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: andi a3, a5, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: bltz a4, .LBB7_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t0, zero -; RV32I-NEXT: bgez a3, .LBB7_8 +; RV32I-NEXT: srl a3, a7, a4 +; RV32I-NEXT: bltz t0, .LBB7_6 +; RV32I-NEXT: j .LBB7_7 ; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: srl a3, a0, a4 -; RV32I-NEXT: sub a4, a6, a5 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a4 -; RV32I-NEXT: or a4, a3, a1 -; RV32I-NEXT: or a1, a7, t0 -; RV32I-NEXT: bgez t1, .LBB7_9 +; RV32I-NEXT: srl a4, a7, a5 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: srl a4, a0, a5 +; RV32I-NEXT: sub a3, a6, a3 +; RV32I-NEXT: slli a5, a7, 1 +; RV32I-NEXT: sll a3, a5, a3 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: bgez t0, .LBB7_7 ; RV32I-NEXT: .LBB7_6: ; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: ret +; RV32I-NEXT: or a3, a3, a0 ; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: srl t0, a1, a4 -; RV32I-NEXT: bltz a3, .LBB7_5 -; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: srl a4, a1, a3 -; RV32I-NEXT: or a1, a7, t0 -; RV32I-NEXT: bltz t1, .LBB7_6 -; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: or a0, zero, a4 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: rol_i64: @@ -299,94 +294,84 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; ; RV32IBB-LABEL: rol_i64: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: andi a3, a2, 63 -; RV32IBB-NEXT: addi t1, a3, -32 +; RV32IBB-NEXT: mv a7, a1 +; RV32IBB-NEXT: andi a1, a2, 63 +; RV32IBB-NEXT: addi t0, a1, -32 ; RV32IBB-NEXT: addi a6, zero, 31 -; RV32IBB-NEXT: bltz t1, .LBB7_2 +; RV32IBB-NEXT: bltz t0, .LBB7_2 ; RV32IBB-NEXT: # %bb.1: -; RV32IBB-NEXT: sll a7, a0, t1 +; RV32IBB-NEXT: sll a1, a0, t0 ; RV32IBB-NEXT: j .LBB7_3 ; RV32IBB-NEXT: .LBB7_2: -; RV32IBB-NEXT: sll a4, a1, a2 -; RV32IBB-NEXT: sub a3, a6, a3 -; RV32IBB-NEXT: srli a5, a0, 1 -; RV32IBB-NEXT: srl a3, a5, a3 -; RV32IBB-NEXT: or a7, a4, a3 +; RV32IBB-NEXT: sll a3, a7, a2 +; RV32IBB-NEXT: sub a1, a6, a1 +; RV32IBB-NEXT: srli a4, a0, 1 +; RV32IBB-NEXT: srl a1, a4, a1 +; RV32IBB-NEXT: or a1, a3, a1 ; RV32IBB-NEXT: .LBB7_3: -; RV32IBB-NEXT: neg a4, a2 -; RV32IBB-NEXT: andi a5, a4, 63 -; RV32IBB-NEXT: addi a3, a5, -32 -; RV32IBB-NEXT: bltz a3, .LBB7_7 +; RV32IBB-NEXT: neg a5, a2 +; RV32IBB-NEXT: andi a3, a5, 63 +; RV32IBB-NEXT: addi a4, a3, -32 +; RV32IBB-NEXT: bltz a4, .LBB7_5 ; RV32IBB-NEXT: # %bb.4: -; RV32IBB-NEXT: mv t0, zero -; RV32IBB-NEXT: bgez a3, .LBB7_8 +; RV32IBB-NEXT: srl a3, a7, a4 +; RV32IBB-NEXT: bltz t0, .LBB7_6 +; RV32IBB-NEXT: j .LBB7_7 ; RV32IBB-NEXT: .LBB7_5: -; RV32IBB-NEXT: srl a3, a0, a4 -; RV32IBB-NEXT: sub a4, a6, a5 -; RV32IBB-NEXT: slli a1, a1, 1 -; RV32IBB-NEXT: sll a1, a1, a4 -; RV32IBB-NEXT: or a4, a3, a1 -; RV32IBB-NEXT: or a1, a7, t0 -; RV32IBB-NEXT: bgez t1, .LBB7_9 +; RV32IBB-NEXT: srl a4, a7, a5 +; RV32IBB-NEXT: or a1, a1, a4 +; RV32IBB-NEXT: srl a4, a0, a5 +; RV32IBB-NEXT: sub a3, a6, a3 +; RV32IBB-NEXT: slli a5, a7, 1 +; RV32IBB-NEXT: sll a3, a5, a3 +; RV32IBB-NEXT: or a3, a4, a3 +; RV32IBB-NEXT: bgez t0, .LBB7_7 ; RV32IBB-NEXT: .LBB7_6: ; RV32IBB-NEXT: sll a0, a0, a2 -; RV32IBB-NEXT: or a0, a0, a4 -; RV32IBB-NEXT: ret +; RV32IBB-NEXT: or a3, a3, a0 ; RV32IBB-NEXT: .LBB7_7: -; RV32IBB-NEXT: srl t0, a1, a4 -; RV32IBB-NEXT: bltz a3, .LBB7_5 -; RV32IBB-NEXT: .LBB7_8: -; RV32IBB-NEXT: srl a4, a1, a3 -; RV32IBB-NEXT: or a1, a7, t0 -; RV32IBB-NEXT: bltz t1, .LBB7_6 -; RV32IBB-NEXT: .LBB7_9: -; RV32IBB-NEXT: or a0, zero, a4 +; RV32IBB-NEXT: mv a0, a3 ; RV32IBB-NEXT: ret ; ; RV32IBP-LABEL: rol_i64: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: andi a3, a2, 63 -; RV32IBP-NEXT: addi t1, a3, -32 +; RV32IBP-NEXT: mv a7, a1 +; RV32IBP-NEXT: andi a1, a2, 63 +; RV32IBP-NEXT: addi t0, a1, -32 ; RV32IBP-NEXT: addi a6, zero, 31 -; RV32IBP-NEXT: bltz t1, .LBB7_2 +; RV32IBP-NEXT: bltz t0, .LBB7_2 ; RV32IBP-NEXT: # %bb.1: -; RV32IBP-NEXT: sll a7, a0, t1 +; RV32IBP-NEXT: sll a1, a0, t0 ; RV32IBP-NEXT: j .LBB7_3 ; RV32IBP-NEXT: .LBB7_2: -; RV32IBP-NEXT: sll a4, a1, a2 -; RV32IBP-NEXT: sub a3, a6, a3 -; RV32IBP-NEXT: srli a5, a0, 1 -; RV32IBP-NEXT: srl a3, a5, a3 -; RV32IBP-NEXT: or a7, a4, a3 +; RV32IBP-NEXT: sll a3, a7, a2 +; RV32IBP-NEXT: sub a1, a6, a1 +; RV32IBP-NEXT: srli a4, a0, 1 +; RV32IBP-NEXT: srl a1, a4, a1 +; RV32IBP-NEXT: or a1, a3, a1 ; RV32IBP-NEXT: .LBB7_3: -; RV32IBP-NEXT: neg a4, a2 -; RV32IBP-NEXT: andi a5, a4, 63 -; RV32IBP-NEXT: addi a3, a5, -32 -; RV32IBP-NEXT: bltz a3, .LBB7_7 +; RV32IBP-NEXT: neg a5, a2 +; RV32IBP-NEXT: andi a3, a5, 63 +; RV32IBP-NEXT: addi a4, a3, -32 +; RV32IBP-NEXT: bltz a4, .LBB7_5 ; RV32IBP-NEXT: # %bb.4: -; RV32IBP-NEXT: mv t0, zero -; RV32IBP-NEXT: bgez a3, .LBB7_8 +; RV32IBP-NEXT: srl a3, a7, a4 +; RV32IBP-NEXT: bltz t0, .LBB7_6 +; RV32IBP-NEXT: j .LBB7_7 ; RV32IBP-NEXT: .LBB7_5: -; RV32IBP-NEXT: srl a3, a0, a4 -; RV32IBP-NEXT: sub a4, a6, a5 -; RV32IBP-NEXT: slli a1, a1, 1 -; RV32IBP-NEXT: sll a1, a1, a4 -; RV32IBP-NEXT: or a4, a3, a1 -; RV32IBP-NEXT: or a1, a7, t0 -; RV32IBP-NEXT: bgez t1, .LBB7_9 +; RV32IBP-NEXT: srl a4, a7, a5 +; RV32IBP-NEXT: or a1, a1, a4 +; RV32IBP-NEXT: srl a4, a0, a5 +; RV32IBP-NEXT: sub a3, a6, a3 +; RV32IBP-NEXT: slli a5, a7, 1 +; RV32IBP-NEXT: sll a3, a5, a3 +; RV32IBP-NEXT: or a3, a4, a3 +; RV32IBP-NEXT: bgez t0, .LBB7_7 ; RV32IBP-NEXT: .LBB7_6: ; RV32IBP-NEXT: sll a0, a0, a2 -; RV32IBP-NEXT: or a0, a0, a4 -; RV32IBP-NEXT: ret +; RV32IBP-NEXT: or a3, a3, a0 ; RV32IBP-NEXT: .LBB7_7: -; RV32IBP-NEXT: srl t0, a1, a4 -; RV32IBP-NEXT: bltz a3, .LBB7_5 -; RV32IBP-NEXT: .LBB7_8: -; RV32IBP-NEXT: srl a4, a1, a3 -; RV32IBP-NEXT: or a1, a7, t0 -; RV32IBP-NEXT: bltz t1, .LBB7_6 -; RV32IBP-NEXT: .LBB7_9: -; RV32IBP-NEXT: or a0, zero, a4 +; RV32IBP-NEXT: mv a0, a3 ; RV32IBP-NEXT: ret %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) ret i64 %or @@ -431,48 +416,43 @@ declare i64 @llvm.fshr.i64(i64, i64, i64) define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: ror_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a2, 63 -; RV32I-NEXT: addi t1, a3, -32 +; RV32I-NEXT: mv t0, a0 +; RV32I-NEXT: andi a0, a2, 63 +; RV32I-NEXT: addi a7, a0, -32 ; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz t1, .LBB9_2 +; RV32I-NEXT: bltz a7, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a7, a1, t1 +; RV32I-NEXT: srl a0, a1, a7 ; RV32I-NEXT: j .LBB9_3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a4, a0, a2 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: slli a5, a1, 1 -; RV32I-NEXT: sll a3, a5, a3 -; RV32I-NEXT: or a7, a4, a3 +; RV32I-NEXT: srl a3, t0, a2 +; RV32I-NEXT: sub a0, a6, a0 +; RV32I-NEXT: slli a4, a1, 1 +; RV32I-NEXT: sll a0, a4, a0 +; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: .LBB9_3: -; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: andi a5, a4, 63 -; RV32I-NEXT: addi a3, a5, -32 -; RV32I-NEXT: bltz a3, .LBB9_7 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: andi a4, a5, 63 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: bltz a3, .LBB9_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t0, zero -; RV32I-NEXT: bgez a3, .LBB9_8 +; RV32I-NEXT: sll a3, t0, a3 +; RV32I-NEXT: bltz a7, .LBB9_6 +; RV32I-NEXT: j .LBB9_7 ; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: sll a3, a1, a4 -; RV32I-NEXT: sub a4, a6, a5 -; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: srl a0, a0, a4 -; RV32I-NEXT: or a4, a3, a0 -; RV32I-NEXT: or a0, a7, t0 -; RV32I-NEXT: bgez t1, .LBB9_9 +; RV32I-NEXT: sll a3, t0, a5 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: sll a3, a1, a5 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: srli a5, t0, 1 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: bgez a7, .LBB9_7 ; RV32I-NEXT: .LBB9_6: ; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: ret +; RV32I-NEXT: or a3, a3, a1 ; RV32I-NEXT: .LBB9_7: -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: bltz a3, .LBB9_5 -; RV32I-NEXT: .LBB9_8: -; RV32I-NEXT: sll a4, a0, a3 -; RV32I-NEXT: or a0, a7, t0 -; RV32I-NEXT: bltz t1, .LBB9_6 -; RV32I-NEXT: .LBB9_9: -; RV32I-NEXT: or a1, zero, a4 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: ror_i64: @@ -512,94 +492,84 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; ; RV32IBB-LABEL: ror_i64: ; RV32IBB: # %bb.0: -; RV32IBB-NEXT: andi a3, a2, 63 -; RV32IBB-NEXT: addi t1, a3, -32 +; RV32IBB-NEXT: mv t0, a0 +; RV32IBB-NEXT: andi a0, a2, 63 +; RV32IBB-NEXT: addi a7, a0, -32 ; RV32IBB-NEXT: addi a6, zero, 31 -; RV32IBB-NEXT: bltz t1, .LBB9_2 +; RV32IBB-NEXT: bltz a7, .LBB9_2 ; RV32IBB-NEXT: # %bb.1: -; RV32IBB-NEXT: srl a7, a1, t1 +; RV32IBB-NEXT: srl a0, a1, a7 ; RV32IBB-NEXT: j .LBB9_3 ; RV32IBB-NEXT: .LBB9_2: -; RV32IBB-NEXT: srl a4, a0, a2 -; RV32IBB-NEXT: sub a3, a6, a3 -; RV32IBB-NEXT: slli a5, a1, 1 -; RV32IBB-NEXT: sll a3, a5, a3 -; RV32IBB-NEXT: or a7, a4, a3 +; RV32IBB-NEXT: srl a3, t0, a2 +; RV32IBB-NEXT: sub a0, a6, a0 +; RV32IBB-NEXT: slli a4, a1, 1 +; RV32IBB-NEXT: sll a0, a4, a0 +; RV32IBB-NEXT: or a0, a3, a0 ; RV32IBB-NEXT: .LBB9_3: -; RV32IBB-NEXT: neg a4, a2 -; RV32IBB-NEXT: andi a5, a4, 63 -; RV32IBB-NEXT: addi a3, a5, -32 -; RV32IBB-NEXT: bltz a3, .LBB9_7 +; RV32IBB-NEXT: neg a5, a2 +; RV32IBB-NEXT: andi a4, a5, 63 +; RV32IBB-NEXT: addi a3, a4, -32 +; RV32IBB-NEXT: bltz a3, .LBB9_5 ; RV32IBB-NEXT: # %bb.4: -; RV32IBB-NEXT: mv t0, zero -; RV32IBB-NEXT: bgez a3, .LBB9_8 +; RV32IBB-NEXT: sll a3, t0, a3 +; RV32IBB-NEXT: bltz a7, .LBB9_6 +; RV32IBB-NEXT: j .LBB9_7 ; RV32IBB-NEXT: .LBB9_5: -; RV32IBB-NEXT: sll a3, a1, a4 -; RV32IBB-NEXT: sub a4, a6, a5 -; RV32IBB-NEXT: srli a0, a0, 1 -; RV32IBB-NEXT: srl a0, a0, a4 -; RV32IBB-NEXT: or a4, a3, a0 -; RV32IBB-NEXT: or a0, a7, t0 -; RV32IBB-NEXT: bgez t1, .LBB9_9 +; RV32IBB-NEXT: sll a3, t0, a5 +; RV32IBB-NEXT: or a0, a0, a3 +; RV32IBB-NEXT: sll a3, a1, a5 +; RV32IBB-NEXT: sub a4, a6, a4 +; RV32IBB-NEXT: srli a5, t0, 1 +; RV32IBB-NEXT: srl a4, a5, a4 +; RV32IBB-NEXT: or a3, a3, a4 +; RV32IBB-NEXT: bgez a7, .LBB9_7 ; RV32IBB-NEXT: .LBB9_6: ; RV32IBB-NEXT: srl a1, a1, a2 -; RV32IBB-NEXT: or a1, a1, a4 -; RV32IBB-NEXT: ret +; RV32IBB-NEXT: or a3, a3, a1 ; RV32IBB-NEXT: .LBB9_7: -; RV32IBB-NEXT: sll t0, a0, a4 -; RV32IBB-NEXT: bltz a3, .LBB9_5 -; RV32IBB-NEXT: .LBB9_8: -; RV32IBB-NEXT: sll a4, a0, a3 -; RV32IBB-NEXT: or a0, a7, t0 -; RV32IBB-NEXT: bltz t1, .LBB9_6 -; RV32IBB-NEXT: .LBB9_9: -; RV32IBB-NEXT: or a1, zero, a4 +; RV32IBB-NEXT: mv a1, a3 ; RV32IBB-NEXT: ret ; ; RV32IBP-LABEL: ror_i64: ; RV32IBP: # %bb.0: -; RV32IBP-NEXT: andi a3, a2, 63 -; RV32IBP-NEXT: addi t1, a3, -32 +; RV32IBP-NEXT: mv t0, a0 +; RV32IBP-NEXT: andi a0, a2, 63 +; RV32IBP-NEXT: addi a7, a0, -32 ; RV32IBP-NEXT: addi a6, zero, 31 -; RV32IBP-NEXT: bltz t1, .LBB9_2 +; RV32IBP-NEXT: bltz a7, .LBB9_2 ; RV32IBP-NEXT: # %bb.1: -; RV32IBP-NEXT: srl a7, a1, t1 +; RV32IBP-NEXT: srl a0, a1, a7 ; RV32IBP-NEXT: j .LBB9_3 ; RV32IBP-NEXT: .LBB9_2: -; RV32IBP-NEXT: srl a4, a0, a2 -; RV32IBP-NEXT: sub a3, a6, a3 -; RV32IBP-NEXT: slli a5, a1, 1 -; RV32IBP-NEXT: sll a3, a5, a3 -; RV32IBP-NEXT: or a7, a4, a3 +; RV32IBP-NEXT: srl a3, t0, a2 +; RV32IBP-NEXT: sub a0, a6, a0 +; RV32IBP-NEXT: slli a4, a1, 1 +; RV32IBP-NEXT: sll a0, a4, a0 +; RV32IBP-NEXT: or a0, a3, a0 ; RV32IBP-NEXT: .LBB9_3: -; RV32IBP-NEXT: neg a4, a2 -; RV32IBP-NEXT: andi a5, a4, 63 -; RV32IBP-NEXT: addi a3, a5, -32 -; RV32IBP-NEXT: bltz a3, .LBB9_7 +; RV32IBP-NEXT: neg a5, a2 +; RV32IBP-NEXT: andi a4, a5, 63 +; RV32IBP-NEXT: addi a3, a4, -32 +; RV32IBP-NEXT: bltz a3, .LBB9_5 ; RV32IBP-NEXT: # %bb.4: -; RV32IBP-NEXT: mv t0, zero -; RV32IBP-NEXT: bgez a3, .LBB9_8 +; RV32IBP-NEXT: sll a3, t0, a3 +; RV32IBP-NEXT: bltz a7, .LBB9_6 +; RV32IBP-NEXT: j .LBB9_7 ; RV32IBP-NEXT: .LBB9_5: -; RV32IBP-NEXT: sll a3, a1, a4 -; RV32IBP-NEXT: sub a4, a6, a5 -; RV32IBP-NEXT: srli a0, a0, 1 -; RV32IBP-NEXT: srl a0, a0, a4 -; RV32IBP-NEXT: or a4, a3, a0 -; RV32IBP-NEXT: or a0, a7, t0 -; RV32IBP-NEXT: bgez t1, .LBB9_9 +; RV32IBP-NEXT: sll a3, t0, a5 +; RV32IBP-NEXT: or a0, a0, a3 +; RV32IBP-NEXT: sll a3, a1, a5 +; RV32IBP-NEXT: sub a4, a6, a4 +; RV32IBP-NEXT: srli a5, t0, 1 +; RV32IBP-NEXT: srl a4, a5, a4 +; RV32IBP-NEXT: or a3, a3, a4 +; RV32IBP-NEXT: bgez a7, .LBB9_7 ; RV32IBP-NEXT: .LBB9_6: ; RV32IBP-NEXT: srl a1, a1, a2 -; RV32IBP-NEXT: or a1, a1, a4 -; RV32IBP-NEXT: ret +; RV32IBP-NEXT: or a3, a3, a1 ; RV32IBP-NEXT: .LBB9_7: -; RV32IBP-NEXT: sll t0, a0, a4 -; RV32IBP-NEXT: bltz a3, .LBB9_5 -; RV32IBP-NEXT: .LBB9_8: -; RV32IBP-NEXT: sll a4, a0, a3 -; RV32IBP-NEXT: or a0, a7, t0 -; RV32IBP-NEXT: bltz t1, .LBB9_6 -; RV32IBP-NEXT: .LBB9_9: -; RV32IBP-NEXT: or a1, zero, a4 +; RV32IBP-NEXT: mv a1, a3 ; RV32IBP-NEXT: ret %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) ret i64 %or diff --git a/test/CodeGen/RISCV/rv32zbs.ll b/test/CodeGen/RISCV/rv32zbs.ll index df832744653..049fa6a6c7e 100644 --- a/test/CodeGen/RISCV/rv32zbs.ll +++ b/test/CodeGen/RISCV/rv32zbs.ll @@ -63,17 +63,14 @@ define i64 @sbclr_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: addi a3, zero, 1 ; RV32I-NEXT: bltz a4, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a2, zero -; RV32I-NEXT: sll a4, a3, a4 -; RV32I-NEXT: j .LBB2_3 -; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: mv a4, zero -; RV32I-NEXT: sll a2, a3, a2 -; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: sll a2, a3, a4 ; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: and a0, a2, a0 -; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: sll a2, a3, a2 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: sbclr_i64: @@ -96,17 +93,10 @@ define i64 @sbclr_i64(i64 %a, i64 %b) nounwind { ; RV32IBS-NEXT: addi a3, a3, -32 ; RV32IBS-NEXT: bltz a3, .LBB2_2 ; RV32IBS-NEXT: # %bb.1: -; RV32IBS-NEXT: mv a2, zero -; RV32IBS-NEXT: bset a3, zero, a3 -; RV32IBS-NEXT: j .LBB2_3 +; RV32IBS-NEXT: bclr a1, a1, a3 +; RV32IBS-NEXT: ret ; RV32IBS-NEXT: .LBB2_2: -; RV32IBS-NEXT: mv a3, zero -; RV32IBS-NEXT: bset a2, zero, a2 -; RV32IBS-NEXT: .LBB2_3: -; RV32IBS-NEXT: not a3, a3 -; RV32IBS-NEXT: not a2, a2 -; RV32IBS-NEXT: and a0, a2, a0 -; RV32IBS-NEXT: and a1, a3, a1 +; RV32IBS-NEXT: bclr a0, a0, a2 ; RV32IBS-NEXT: ret %and = and i64 %b, 63 %shl = shl nuw i64 1, %and diff --git a/test/CodeGen/RISCV/rv32zbt.ll b/test/CodeGen/RISCV/rv32zbt.ll index 24b814d67a7..9dd68466e07 100644 --- a/test/CodeGen/RISCV/rv32zbt.ll +++ b/test/CodeGen/RISCV/rv32zbt.ll @@ -444,51 +444,45 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshl_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a5, a4, 63 -; RV32I-NEXT: addi t1, a5, -32 +; RV32I-NEXT: addi a7, a5, -32 ; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz t1, .LBB13_2 +; RV32I-NEXT: bltz a7, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a7, a0, t1 +; RV32I-NEXT: sll a1, a0, a7 ; RV32I-NEXT: j .LBB13_3 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: sll a7, a1, a4 +; RV32I-NEXT: sll t0, a1, a4 ; RV32I-NEXT: sub a5, a6, a5 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: srl a1, a1, a5 -; RV32I-NEXT: or a7, a7, a1 +; RV32I-NEXT: or a1, t0, a1 ; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: not a1, a4 -; RV32I-NEXT: andi t3, a1, 63 -; RV32I-NEXT: addi a5, t3, -32 -; RV32I-NEXT: srli t2, a3, 1 -; RV32I-NEXT: bltz a5, .LBB13_7 +; RV32I-NEXT: not t2, a4 +; RV32I-NEXT: andi t1, t2, 63 +; RV32I-NEXT: addi a5, t1, -32 +; RV32I-NEXT: srli t0, a3, 1 +; RV32I-NEXT: bltz a5, .LBB13_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t0, zero -; RV32I-NEXT: bgez a5, .LBB13_8 +; RV32I-NEXT: srl a2, t0, a5 +; RV32I-NEXT: bltz a7, .LBB13_6 +; RV32I-NEXT: j .LBB13_7 ; RV32I-NEXT: .LBB13_5: +; RV32I-NEXT: srl a5, t0, t2 +; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: slli a3, a3, 31 ; RV32I-NEXT: srli a2, a2, 1 ; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: srl a1, a2, a1 -; RV32I-NEXT: sub a2, a6, t3 -; RV32I-NEXT: slli a3, t2, 1 -; RV32I-NEXT: sll a2, a3, a2 -; RV32I-NEXT: or a2, a1, a2 -; RV32I-NEXT: or a1, a7, t0 -; RV32I-NEXT: bgez t1, .LBB13_9 +; RV32I-NEXT: srl a2, a2, t2 +; RV32I-NEXT: sub a3, a6, t1 +; RV32I-NEXT: slli a5, t0, 1 +; RV32I-NEXT: sll a3, a5, a3 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: bgez a7, .LBB13_7 ; RV32I-NEXT: .LBB13_6: ; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: ret +; RV32I-NEXT: or a2, a2, a0 ; RV32I-NEXT: .LBB13_7: -; RV32I-NEXT: srl t0, t2, a1 -; RV32I-NEXT: bltz a5, .LBB13_5 -; RV32I-NEXT: .LBB13_8: -; RV32I-NEXT: srl a2, t2, a5 -; RV32I-NEXT: or a1, a7, t0 -; RV32I-NEXT: bltz t1, .LBB13_6 -; RV32I-NEXT: .LBB13_9: -; RV32I-NEXT: or a0, zero, a2 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32IB-LABEL: fshl_i64: @@ -605,54 +599,48 @@ declare i64 @llvm.fshr.i64(i64, i64, i64) define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a5, a4, 63 -; RV32I-NEXT: addi t1, a5, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz t1, .LBB15_2 +; RV32I-NEXT: mv t0, a0 +; RV32I-NEXT: andi a0, a4, 63 +; RV32I-NEXT: addi a6, a0, -32 +; RV32I-NEXT: addi a7, zero, 31 +; RV32I-NEXT: bltz a6, .LBB15_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a7, a3, t1 +; RV32I-NEXT: srl a0, a3, a6 ; RV32I-NEXT: j .LBB15_3 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a7, a2, a4 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: slli a2, a3, 1 -; RV32I-NEXT: sll a2, a2, a5 -; RV32I-NEXT: or a7, a7, a2 +; RV32I-NEXT: srl a2, a2, a4 +; RV32I-NEXT: sub a0, a7, a0 +; RV32I-NEXT: slli a5, a3, 1 +; RV32I-NEXT: sll a0, a5, a0 +; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: not a2, a4 -; RV32I-NEXT: andi t2, a2, 63 -; RV32I-NEXT: addi a5, t2, -32 -; RV32I-NEXT: slli t3, a0, 1 -; RV32I-NEXT: bltz a5, .LBB15_7 +; RV32I-NEXT: not t2, a4 +; RV32I-NEXT: andi a5, t2, 63 +; RV32I-NEXT: addi a2, a5, -32 +; RV32I-NEXT: slli t1, t0, 1 +; RV32I-NEXT: bltz a2, .LBB15_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t0, zero -; RV32I-NEXT: bgez a5, .LBB15_8 +; RV32I-NEXT: sll a1, t1, a2 +; RV32I-NEXT: bltz a6, .LBB15_6 +; RV32I-NEXT: j .LBB15_7 ; RV32I-NEXT: .LBB15_5: -; RV32I-NEXT: lui a5, 524288 -; RV32I-NEXT: addi a5, a5, -1 -; RV32I-NEXT: and t3, a0, a5 -; RV32I-NEXT: sub a5, a6, t2 -; RV32I-NEXT: srl a5, t3, a5 -; RV32I-NEXT: srli a0, a0, 31 +; RV32I-NEXT: sll a2, t1, t2 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a2, t0, a2 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: srl a2, a2, a5 +; RV32I-NEXT: srli a5, t0, 31 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a1, a0, a5 -; RV32I-NEXT: or a0, t0, a7 -; RV32I-NEXT: bgez t1, .LBB15_9 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: sll a1, a1, t2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: bgez a6, .LBB15_7 ; RV32I-NEXT: .LBB15_6: ; RV32I-NEXT: srl a2, a3, a4 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: ret ; RV32I-NEXT: .LBB15_7: -; RV32I-NEXT: sll t0, t3, a2 -; RV32I-NEXT: bltz a5, .LBB15_5 -; RV32I-NEXT: .LBB15_8: -; RV32I-NEXT: sll a1, t3, a5 -; RV32I-NEXT: or a0, t0, a7 -; RV32I-NEXT: bltz t1, .LBB15_6 -; RV32I-NEXT: .LBB15_9: -; RV32I-NEXT: or a1, a1, zero ; RV32I-NEXT: ret ; ; RV32IB-LABEL: fshr_i64: diff --git a/test/CodeGen/RISCV/select-binop-identity.ll b/test/CodeGen/RISCV/select-binop-identity.ll index 79a2da033da..9739583b3f2 100644 --- a/test/CodeGen/RISCV/select-binop-identity.ll +++ b/test/CodeGen/RISCV/select-binop-identity.ll @@ -7,27 +7,26 @@ ; InstCombine canonicalizes (c ? x | y : x) to (x | (c ? y : 0)) similar for ; other binary operations using their identity value as the constant. -; TODO: We can reverse this for and/or/xor. Allowing us to pull the binop into +; We can reverse this for and/or/xor. Allowing us to pull the binop into ; the basic block we create when we expand select. define signext i32 @and_select_all_ones_i32(i1 zeroext %c, i32 signext %x, i32 %y) { ; RV32I-LABEL: and_select_all_ones_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bnez a0, .LBB0_2 +; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: addi a1, zero, -1 +; RV32I-NEXT: and a2, a2, a1 ; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: and a0, a1, a2 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_select_all_ones_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: bnez a0, .LBB0_2 +; RV64I-NEXT: beqz a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: addi a1, zero, -1 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: .LBB0_2: -; RV64I-NEXT: and a0, a1, a2 -; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sext.w a0, a2 ; RV64I-NEXT: ret %a = select i1 %c, i32 %x, i32 -1 %b = and i32 %a, %y @@ -37,25 +36,22 @@ define signext i32 @and_select_all_ones_i32(i1 zeroext %c, i32 signext %x, i32 % define i64 @and_select_all_ones_i64(i1 zeroext %c, i64 %x, i64 %y) { ; RV32I-LABEL: and_select_all_ones_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a6, zero, -1 -; RV32I-NEXT: addi a5, zero, -1 ; RV32I-NEXT: bnez a0, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a6, a2 -; RV32I-NEXT: mv a5, a1 +; RV32I-NEXT: and a3, a3, a1 +; RV32I-NEXT: and a4, a4, a2 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: and a0, a3, a5 -; RV32I-NEXT: and a1, a4, a6 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_select_all_ones_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi a3, zero, -1 ; RV64I-NEXT: bnez a0, .LBB1_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a3, a1 +; RV64I-NEXT: and a2, a2, a1 ; RV64I-NEXT: .LBB1_2: -; RV64I-NEXT: and a0, a2, a3 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret %a = select i1 %c, i64 -1, i64 %x %b = and i64 %y, %a @@ -65,20 +61,20 @@ define i64 @and_select_all_ones_i64(i1 zeroext %c, i64 %x, i64 %y) { define signext i32 @or_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32 signext %y) { ; RV32I-LABEL: or_select_all_zeros_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bnez a0, .LBB2_2 +; RV32I-NEXT: beqz a0, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: or a0, a2, a1 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_select_all_zeros_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: bnez a0, .LBB2_2 +; RV64I-NEXT: beqz a0, .LBB2_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a1, zero +; RV64I-NEXT: or a2, a2, a1 ; RV64I-NEXT: .LBB2_2: -; RV64I-NEXT: or a0, a2, a1 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret %a = select i1 %c, i32 %x, i32 0 %b = or i32 %y, %a @@ -88,25 +84,22 @@ define signext i32 @or_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32 s define i64 @or_select_all_zeros_i64(i1 zeroext %c, i64 %x, i64 %y) { ; RV32I-LABEL: or_select_all_zeros_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a6, zero -; RV32I-NEXT: mv a5, zero ; RV32I-NEXT: bnez a0, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a6, a2 -; RV32I-NEXT: mv a5, a1 +; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: or a4, a4, a2 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: or a0, a5, a3 -; RV32I-NEXT: or a1, a6, a4 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_select_all_zeros_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: bnez a0, .LBB3_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a3, a1 +; RV64I-NEXT: or a2, a2, a1 ; RV64I-NEXT: .LBB3_2: -; RV64I-NEXT: or a0, a3, a2 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret %a = select i1 %c, i64 0, i64 %x %b = or i64 %a, %y @@ -116,22 +109,20 @@ define i64 @or_select_all_zeros_i64(i1 zeroext %c, i64 %x, i64 %y) { define signext i32 @xor_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32 signext %y) { ; RV32I-LABEL: xor_select_all_zeros_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: bnez a0, .LBB4_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: xor a2, a2, a1 ; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: xor a0, a2, a3 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: xor_select_all_zeros_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: bnez a0, .LBB4_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a3, a1 +; RV64I-NEXT: xor a2, a2, a1 ; RV64I-NEXT: .LBB4_2: -; RV64I-NEXT: xor a0, a2, a3 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret %a = select i1 %c, i32 0, i32 %x %b = xor i32 %y, %a @@ -141,22 +132,22 @@ define signext i32 @xor_select_all_zeros_i32(i1 zeroext %c, i32 signext %x, i32 define i64 @xor_select_all_zeros_i64(i1 zeroext %c, i64 %x, i64 %y) { ; RV32I-LABEL: xor_select_all_zeros_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: bnez a0, .LBB5_2 +; RV32I-NEXT: beqz a0, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a2, zero -; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: xor a3, a3, a1 +; RV32I-NEXT: xor a4, a4, a2 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: xor a0, a1, a3 -; RV32I-NEXT: xor a1, a2, a4 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: ret ; ; RV64I-LABEL: xor_select_all_zeros_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bnez a0, .LBB5_2 +; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a1, zero +; RV64I-NEXT: xor a2, a2, a1 ; RV64I-NEXT: .LBB5_2: -; RV64I-NEXT: xor a0, a1, a2 +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret %a = select i1 %c, i64 %x, i64 0 %b = xor i64 %a, %y