diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index eb8e6007ab9..2c2fa62d551 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9976,6 +9976,87 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { return false; } + +/// This function is called when we have proved that a SETCC node can be replaced +/// by subtraction (and other supporting instructions) so that the result of +/// comparison is kept in a GPR instead of CR. This function is purely for +/// codegen purposes and has some flags to guide the codegen process. +static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, + bool Swap, SDLoc &DL, SelectionDAG &DAG) { + + assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); + + // Zero extend the operands to the largest legal integer. Originally, they + // must be of a strictly smaller size. + auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), + DAG.getConstant(Size, DL, MVT::i32)); + auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), + DAG.getConstant(Size, DL, MVT::i32)); + + // Swap if needed. Depends on the condition code. + if (Swap) + std::swap(Op0, Op1); + + // Subtract extended integers. + auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); + + // Move the sign bit to the least significant position and zero out the rest. + // Now the least significant bit carries the result of original comparison. + auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, + DAG.getConstant(Size - 1, DL, MVT::i32)); + auto Final = Shifted; + + // Complement the result if needed. Based on the condition code. + if (Complement) + Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, + DAG.getConstant(1, DL, MVT::i64)); + + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); +} + +SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, + DAGCombinerInfo &DCI) const { + + assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + // Size of integers being compared has a critical role in the following + // analysis, so we prefer to do this when all types are legal. + if (!DCI.isAfterLegalizeVectorOps()) + return SDValue(); + + // If all users of SETCC extend its value to a legal integer type + // then we replace SETCC with a subtraction + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + } + + ISD::CondCode CC = cast(N->getOperand(2))->get(); + auto OpSize = N->getOperand(0).getValueSizeInBits(); + + unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); + + if (OpSize < Size) { + switch (CC) { + default: break; + case ISD::SETULT: + return generateEquivalentSub(N, Size, false, false, DL, DAG); + case ISD::SETULE: + return generateEquivalentSub(N, Size, true, true, DL, DAG); + case ISD::SETUGT: + return generateEquivalentSub(N, Size, false, true, DL, DAG); + case ISD::SETUGE: + return generateEquivalentSub(N, Size, true, false, DL, DAG); + } + } + + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -10017,7 +10098,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) - return SDValue(); + return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) + : SDValue()); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6f9cc2491c5..1723d12301d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -977,6 +977,11 @@ namespace llvm { SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; + /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces + /// SETCC with integer subtraction when (1) there is a legal way of doing it + /// (2) keeping the result of comparison in GPR has performance benefit. + SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override; diff --git a/test/CodeGen/PowerPC/setcc-to-sub.ll b/test/CodeGen/PowerPC/setcc-to-sub.ll new file mode 100644 index 00000000000..335bb403cd7 --- /dev/null +++ b/test/CodeGen/PowerPC/setcc-to-sub.ll @@ -0,0 +1,96 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s + +%class.PB2 = type { [1 x i32], %class.PB1* } +%class.PB1 = type { [1 x i32], i64, i64, i32 } + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ult i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test1 +; CHECK: rlwinm [[REG1:[0-9]*]] +; CHECK-NEXT: rlwinm [[REG2:[0-9]*]] +; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]] +; CHECK-NEXT: rldicl 3, [[REG3]] +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ule i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test2 +; CHECK: rlwinm [[REG1:[0-9]*]] +; CHECK-NEXT: rlwinm [[REG2:[0-9]*]] +; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]] +; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]] +; CHECK-NEXT: xori 3, [[REG4]], 1 +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test3(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ugt i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test3 +; CHECK: rlwinm [[REG1:[0-9]*]] +; CHECK-NEXT: rlwinm [[REG2:[0-9]*]] +; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]] +; CHECK-NEXT: rldicl 3, [[REG3]] +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @test4(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp uge i32 %and.i, %and.i4 + ret i1 %cmp.i5 + +; CHECK-LABEL: @test4 +; CHECK: rlwinm [[REG1:[0-9]*]] +; CHECK-NEXT: rlwinm [[REG2:[0-9]*]] +; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]] +; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]] +; CHECK-NEXT: xori 3, [[REG4]], 1 +; CHECK: blr + +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"}