From d3b6b8bf2bacc05759637b5302adb76aede45966 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 8 Oct 2010 02:07:32 +0000 Subject: [PATCH] Revert "Reimplement (part of) the or -> add optimization. Matching 'or' into 'add'", which seems to have broken just about everything. llvm-svn: 116033 --- lib/Target/X86/X86InstrCompiler.td | 80 +++++++-------------------- lib/Target/X86/X86InstrInfo.cpp | 89 ++++++++++-------------------- lib/Target/X86/X86InstrInfo.td | 14 +++++ lib/Target/X86/X86MCInstLower.cpp | 8 --- 4 files changed, 65 insertions(+), 126 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 2e935db9f5a..b8e1b808171 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -997,63 +997,6 @@ def def32 : PatLeaf<(i32 GR32:$src), [{ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; -//===----------------------------------------------------------------------===// -// Pattern match OR as ADD -//===----------------------------------------------------------------------===// - -// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be -// 3-addressified into an LEA instruction to avoid copies. However, we also -// want to finally emit these instructions as an or at the end of the code -// generator to make the generated code easier to read. To do this, we select -// into "disjoint bits" pseudo ops. - -// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. -def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) - return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; -}]>; - - -// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR - -let isCommutable = 1, isConvertibleToThreeAddress = 1, - Constraints = "$src1 = $dst" in { -def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "", // orw/addw REG, REG - [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; -def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "", // orl/addl REG, REG - [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; -def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "", // orq/addq REG, REG - [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; -} - -def : Pat<(or_is_add GR16:$src1, imm:$src2), - (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(or_is_add GR32:$src1, imm:$src2), - (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; - -def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2), - (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -} // AddedComplexity - - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// @@ -1366,8 +1309,27 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; - - +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(or_is_add GR16:$src1, imm:$src2), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(or_is_add GR32:$src1, imm:$src2), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(or_is_add GR16:$src1, GR16:$src2), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(or_is_add GR32:$src1, GR32:$src2), + (ADD32rr GR32:$src1, GR32:$src2)>; +def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(or_is_add GR64:$src1, GR64:$src2), + (ADD64rr GR64:$src1, GR64:$src2)>; +} // AddedComplexity //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 12a5a955152..5abdce45e27 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -54,11 +54,6 @@ ReMatPICStubLoad("remat-pic-stub-load", X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), TM(tm), RI(tm, *this) { - enum { - TB_NOT_REVERSABLE = 1U << 31, - TB_FLAGS = TB_NOT_REVERSABLE - }; - static const unsigned OpTbl2Addr[][2] = { { X86::ADC32ri, X86::ADC32mi }, { X86::ADC32ri8, X86::ADC32mi8 }, @@ -69,15 +64,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ADD16ri, X86::ADD16mi }, { X86::ADD16ri8, X86::ADD16mi8 }, { X86::ADD16rr, X86::ADD16mr }, - { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, { X86::ADD32ri, X86::ADD32mi }, { X86::ADD32ri8, X86::ADD32mi8 }, { X86::ADD32rr, X86::ADD32mr }, - { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, { X86::ADD64ri32, X86::ADD64mi32 }, { X86::ADD64ri8, X86::ADD64mi8 }, { X86::ADD64rr, X86::ADD64mr }, - { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, { X86::ADD8ri, X86::ADD8mi }, { X86::ADD8rr, X86::ADD8mr }, { X86::AND16ri, X86::AND16mi }, @@ -222,21 +214,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; - assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); - RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); - - // If this is not a reversable operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) - continue; - + unsigned MemOp = OpTbl2Addr[i][1]; + if (!RegOp2MemOpTable2Addr.insert(std::make_pair(RegOp, + std::make_pair(MemOp,0))).second) + assert(false && "Duplicated entries?"); // Index 0, folded load and store, no alignment requirement. unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); - - assert(!MemOp2RegOpTable.count(MemOp) && - "Duplicated entries in unfolding maps?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); + if (!MemOp2RegOpTable.insert(std::make_pair(MemOp, + std::make_pair(RegOp, + AuxInfo))).second) + assert(false && "Duplicated entries in unfolding maps?"); } // If the third value is 1, then it's folding either a load or a store. @@ -466,11 +453,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, - { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD32rr, X86::ADD32rm, 0 }, - { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD64rr, X86::ADD64rm, 0 }, - { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD8rr, X86::ADD8rm, 0 }, { X86::ADDPDrr, X86::ADDPDrm, 16 }, { X86::ADDPSrr, X86::ADDPSrm, 16 }, @@ -665,23 +649,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; + unsigned MemOp = OpTbl2[i][1]; unsigned Align = OpTbl2[i][2]; - - assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); - RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); - - - // If this is not a reversable operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl2[i][1] & TB_NOT_REVERSABLE) - continue; - + if (!RegOp2MemOpTable2.insert(std::make_pair(RegOp, + std::make_pair(MemOp,Align))).second) + assert(false && "Duplicated entries?"); // Index 2, folded load unsigned AuxInfo = 2 | (1 << 4); - assert(!MemOp2RegOpTable.count(MemOp) && - "Duplicated entries in unfolding maps?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); + if (!MemOp2RegOpTable.insert(std::make_pair(MemOp, + std::make_pair(RegOp, AuxInfo))).second) + assert(false && "Duplicated entries in unfolding maps?"); } } @@ -1156,8 +1133,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::ADD16ri8: addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; - case X86::ADD16rr: - case X86::ADD16rr_DB: { + case X86::ADD16rr: { unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); unsigned leaInReg2 = 0; @@ -1370,27 +1346,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, -1); break; case X86::ADD64rr: - case X86::ADD64rr_DB: - case X86::ADD32rr: - case X86::ADD32rr_DB: { + case X86::ADD32rr: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - unsigned Opc; - TargetRegisterClass *RC; - if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { - Opc = X86::LEA64r; - RC = X86::GR64_NOSPRegisterClass; - } else { - Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - RC = X86::GR32_NOSPRegisterClass; - } - - + unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r + : (is64Bit ? X86::LEA64_32r : X86::LEA32r); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src2) && - !MF.getRegInfo().constrainRegClass(Src2, RC)) + !MF.getRegInfo().constrainRegClass(Src2, + MIOpc == X86::ADD64rr ? X86::GR64_NOSPRegisterClass : + X86::GR32_NOSPRegisterClass)) return 0; NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1401,8 +1368,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Src2, MI, NewMI); break; } - case X86::ADD16rr: - case X86::ADD16rr_DB: { + case X86::ADD16rr: { if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -2630,8 +2596,13 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, OpcodeTablePtr = &RegOp2MemOpTable2; } - if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) - return true; + if (OpcodeTablePtr) { + // Find the Opcode to fuse + DenseMap >::const_iterator I = + OpcodeTablePtr->find(Opc); + if (I != OpcodeTablePtr->end()) + return true; + } return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 3f420f5419a..c1e15e465b9 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -544,6 +544,20 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; +}]>; + //===----------------------------------------------------------------------===// // Instruction list. // diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 662717bae71..29ee2bfe8bf 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -347,7 +347,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } // Handle a few special cases to eliminate operand modifiers. -ReSimplify: switch (OutMI.getOpcode()) { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); @@ -434,13 +433,6 @@ ReSimplify: break; } - // These are pseudo-ops for OR to help with the OR->ADD transformation. We do - // this with an ugly goto in case the resultant OR uses EAX and needs the - // short form. - case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; - case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; - case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; - // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the