From 45f5d3d85cc277553bd085ee2e3ad43a43d8c55a Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 1 Aug 2018 11:57:58 +0000 Subject: [PATCH] [SystemZ, TableGen] Fix shift count handling The DAG combiner logic to simplify AND masks in shift counts is invalid. While it is true that the SystemZ shift instructions ignore all but the low 6 bits of the shift count, it is still invalid to simplify the AND masks while the DAG still uses the standard shift operators (which are *not* defined to match the SystemZ instruction behavior). Instead, this patch performs equivalent operations during instruction selection. For completely removing the AND, this now happens via additional DAG match patterns implemented by a multi-alternative PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG patterns were already mostly OK, they just needed an output XForm to actually truncate the immediate value. Unfortunately, the latter change also exposed a bug in TableGen: it seems XForms are currently only handled correctly for direct operands of the outermost operation node. This patch also fixes that bug by simply recurring through the whole pattern. This should be NFC for all other targets. Differential Revision: https://reviews.llvm.org/D50096 llvm-svn: 338521 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 78 ---------------------- lib/Target/SystemZ/SystemZISelLowering.h | 1 - lib/Target/SystemZ/SystemZInstrInfo.td | 48 ++++++------- lib/Target/SystemZ/SystemZOperands.td | 1 + lib/Target/SystemZ/SystemZOperators.td | 10 +++ test/CodeGen/SystemZ/shift-12.ll | 12 ++++ utils/TableGen/CodeGenDAGPatterns.cpp | 44 ++++++------ 7 files changed, 67 insertions(+), 127 deletions(-) diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 1ad0e964c1e..e76fa71dacd 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -527,10 +527,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::BSWAP); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::ROTL); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -5524,76 +5520,6 @@ SDValue SystemZTargetLowering::combineBSWAP( return SDValue(); } -SDValue SystemZTargetLowering::combineSHIFTROT( - SDNode *N, DAGCombinerInfo &DCI) const { - - SelectionDAG &DAG = DCI.DAG; - - // Shift/rotate instructions only use the last 6 bits of the second operand - // register. If the second operand is the result of an AND with an immediate - // value that has its last 6 bits set, we can safely remove the AND operation. - // - // If the AND operation doesn't have the last 6 bits set, we can't remove it - // entirely, but we can still truncate it to a 16-bit value. This prevents - // us from ending up with a NILL with a signed operand, which will cause the - // instruction printer to abort. - SDValue N1 = N->getOperand(1); - if (N1.getOpcode() == ISD::AND) { - SDValue AndMaskOp = N1->getOperand(1); - auto *AndMask = dyn_cast(AndMaskOp); - - // The AND mask is constant - if (AndMask) { - auto AmtVal = AndMask->getZExtValue(); - - // Bottom 6 bits are set - if ((AmtVal & 0x3f) == 0x3f) { - SDValue AndOp = N1->getOperand(0); - - // This is the only use, so remove the node - if (N1.hasOneUse()) { - // Combine the AND away - DCI.CombineTo(N1.getNode(), AndOp); - - // Return N so it isn't rechecked - return SDValue(N, 0); - - // The node will be reused, so create a new node for this one use - } else { - SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0), N->getOperand(0), - AndOp); - DCI.AddToWorklist(Replace.getNode()); - - return Replace; - } - - // We can't remove the AND, but we can use NILL here (normally we would - // use NILF). Only keep the last 16 bits of the mask. The actual - // transformation will be handled by .td definitions. - } else if (AmtVal >> 16 != 0) { - SDValue AndOp = N1->getOperand(0); - - auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff, - SDLoc(AndMaskOp), - AndMaskOp.getValueType()); - - auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(), - AndOp, NewMask); - - SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0), N->getOperand(0), - NewAnd); - DCI.AddToWorklist(Replace.getNode()); - - return Replace; - } - } - } - - return SDValue(); -} - static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, @@ -5752,10 +5678,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: return combineSHIFTROT(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 0ca93a38a01..267e31a8521 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -602,7 +602,6 @@ private: SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 9d731226995..bb5b7aae883 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1352,8 +1352,8 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), //===----------------------------------------------------------------------===// // Logical shift left. -defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; -def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; +defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop, GR32>; +def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop, GR64>; def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; // Arithmetic shift left. @@ -1364,20 +1364,20 @@ let Defs = [CC] in { } // Logical shift right. -defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; -def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; +defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop, GR32>; +def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop, GR64>; def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; - def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop, GR64>; def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>; } // Rotate left. -def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; -def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; +def RLL : BinaryRSY<"rll", 0xEB1D, shiftop, GR32>; +def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop, GR64>; // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and @@ -2162,29 +2162,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), // Complexity is added so that we match this before we match NILF on the AND // operation alone. let AddedComplexity = 4 in { - def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; } // Peepholes for turning scalar operations into block operations. diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index da682cb4e5a..7bf32bf19a4 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -357,6 +357,7 @@ def imm32zx16 : Immediate; def imm32sx16trunc : Immediate; +def imm32zx16trunc : Immediate; // Full 32-bit immediates. we need both signed and unsigned versions // because the assembler is picky. E.g. AFI requires signed operands diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 3cfe23aec41..5103867e2d9 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -697,6 +697,16 @@ class storei : PatFrag<(ops node:$addr), (store (operator), node:$addr)>; +// Create a shift operator that optionally ignores an AND of the +// shift count with an immediate if the bottom 6 bits are all set. +def imm32bottom6set : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x3f) == 0x3f; +}]>; +class shiftop + : PatFrags<(ops node:$val, node:$count), + [(operator node:$val, node:$count), + (operator node:$val, (and node:$count, imm32bottom6set))]>; + // Vector representation of all-zeros and all-ones. def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll index 4ebc42b44a4..53d3d5362df 100644 --- a/test/CodeGen/SystemZ/shift-12.ll +++ b/test/CodeGen/SystemZ/shift-12.ll @@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) { %reuse = add i32 %and, %shift ret i32 %reuse } + +; Test that AND is not removed for i128 (which calls __ashlti3) +define i128 @f11(i128 %a, i32 %sh) { +; CHECK-LABEL: f11: +; CHECK: risbg %r4, %r4, 57, 191, 0 +; CHECK: brasl %r14, __ashlti3@PLT + %and = and i32 %sh, 127 + %ext = zext i32 %and to i128 + %shift = shl i128 %a, %ext + ret i128 %shift +} + diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 1abe3a88bfb..f13f8391b02 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -3946,6 +3946,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) { return false; } +// Promote xform function to be an explicit node wherever set. +static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) { + if (Record *Xform = N->getTransformFn()) { + N->setTransformFn(nullptr); + std::vector Children; + Children.push_back(PromoteXForms(N)); + return std::make_shared(Xform, std::move(Children), + N->getNumTypes()); + } + + if (!N->isLeaf()) + for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) { + TreePatternNodePtr Child = N->getChildShared(i); + N->setChild(i, std::move(PromoteXForms(Child))); + } + return N; +} + void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef, TreePattern &Pattern, TreePattern &Result, const std::vector &InstImpResults) { @@ -4011,30 +4029,8 @@ void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef, Result.error("Could not infer all types in pattern result!"); } - // Promote the xform function to be an explicit node if set. - const TreePatternNodePtr &DstPattern = Result.getOnlyTree(); - std::vector ResultNodeOperands; - for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) { - TreePatternNodePtr OpNode = DstPattern->getChildShared(ii); - if (Record *Xform = OpNode->getTransformFn()) { - OpNode->setTransformFn(nullptr); - std::vector Children; - Children.push_back(OpNode); - OpNode = std::make_shared(Xform, std::move(Children), - OpNode->getNumTypes()); - } - ResultNodeOperands.push_back(OpNode); - } - - TreePatternNodePtr DstShared = - DstPattern->isLeaf() - ? DstPattern - : std::make_shared(DstPattern->getOperator(), - std::move(ResultNodeOperands), - DstPattern->getNumTypes()); - - for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i) - DstShared->setType(i, Result.getOnlyTree()->getExtType(i)); + // Promote xform function to be an explicit node wherever set. + TreePatternNodePtr DstShared = PromoteXForms(Result.getOnlyTree()); TreePattern Temp(Result.getRecord(), DstShared, false, *this); Temp.InferAllTypes();