Improve tail call optimized call's argument lowering. Before this

commit all arguments where moved to the stack slot where they would reside on a normal function call before the lowering to the tail call stack slot. This was done to prevent arguments overwriting each other. Now only arguments sourcing from a FORMAL_ARGUMENTS node or a CopyFromReg node with virtual register (could also be a caller's argument) are lowered indirectly. --This line, and those below, will be ignored-- M X86/X86ISelLowering.cpp M X86/README.txt llvm-svn: 45867
2024-10-20 11:33:24 +02:00 · 2008-01-11 16:49:42 +00:00 · 2008-01-11 16:49:42 +00:00 · d58f91f376
commit d58f91f376
parent d2318e2f41
2 changed files with 79 additions and 91 deletions
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@ -1330,10 +1330,11 @@ L5:

 Tail call optimization improvements: Tail call optimization currently
 pushes all arguments on the top of the stack (their normal place for
-non-tail call optimized calls) before moving them to actual stack
-slot. This is done to prevent overwriting of parameters (see example
-below) that might be used, since the arguments of the callee
-overwrites caller's arguments.
+non-tail call optimized calls) that source from the callers arguments
+or  that source from a virtual register (also possibly sourcing from
+callers arguments).
+This is done to prevent overwriting of parameters (see example
+below) that might be used later.

 example:  

@ -1352,13 +1353,6 @@ arg2 of the caller.

 Possible optimizations:

- - Only push those arguments to the top of the stack that are actual
-   parameters of the caller function and have no local value in the
-   caller.
-
-   In the above example local does not need to be pushed onto the top
-   of the stack as it is definitely not a caller's function
-   parameter.

 - Analyse the actual parameters of the callee to see which would
   overwrite a caller parameter which is used by the callee and only
@ -1380,35 +1374,6 @@ Possible optimizations:
   Here we need to push the arguments because they overwrite each
   other.

-
-   Code for lowering directly onto callers arguments:
-+  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
-+  SmallVector<SDOperand, 8> MemOpChains;
-+
-+  SDOperand FramePtr;
-+  SDOperand PtrOff;
-+  SDOperand FIN;
-+  int FI = 0;
-+  // Walk the register/memloc assignments, inserting copies/loads.
-+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-+    CCValAssign &VA = ArgLocs[i];
-+    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-+    
-+    ....
-+    
-+    if (VA.isRegLoc()) {
-+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-+    } else {
-+      assert(VA.isMemLoc());
-+      // create frame index
-+      int32_t Offset = VA.getLocMemOffset()+FPDiff;
-+      uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
-+      FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
-+      FIN = DAG.getFrameIndex(FI, MVT::i32);
-+      // store relative to framepointer
-+      MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0));
-+    }
-+  }
 //===---------------------------------------------------------------------===//

 main ()
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -1007,6 +1007,45 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
  return None;
 }

+
+// IsPossiblyOverriddenArgumentOfTailCall - Check if the operand could possibly
+// be overridden when lowering the outgoing arguments in a tail call. Currently
+// the implementation of this call is very conservative and assumes all
+// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
+// registers would be overridden by direct lowering. 
+// Possible improvement:
+// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
+// indicating inreg passed arguments which also need not be lowered to a safe
+// stack slot.
+static bool IsPossiblyOverriddenArgumentOfTailCall(SDOperand Op) {
+  RegisterSDNode * OpReg = NULL;
+  if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+      (Op.getOpcode()== ISD::CopyFromReg &&
+       (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
+       OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
+    return true;
+  return false;
+}
+
+// GetMemCpyWithFlags - Create a MemCpy using function's parameter flag.
+static SDOperand 
+GetMemCpyWithFlags(SelectionDAG &DAG, unsigned Flags, SDOperand From,
+                   SDOperand To, SDOperand Chain) {
+
+  unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
+                         ISD::ParamFlags::ByValAlignOffs);
+
+  unsigned  Size = (Flags & ISD::ParamFlags::ByValSize) >>
+    ISD::ParamFlags::ByValSizeOffs;
+
+  SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
+  SDOperand  SizeNode = DAG.getConstant(Size, MVT::i32);
+  SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
+
+  return DAG.getMemcpy(Chain, To, From, SizeNode, AlignNode,
+                       AlwaysInline);
+}
+
 SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
                                              const CCValAssign &VA,
                                              MachineFrameInfo *MFI,
@ -1221,18 +1260,7 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
  SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
  unsigned Flags    = cast<ConstantSDNode>(FlagsOp)->getValue();
  if (Flags & ISD::ParamFlags::ByVal) {
-    unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
-                           ISD::ParamFlags::ByValAlignOffs);
-
-    unsigned  Size = (Flags & ISD::ParamFlags::ByValSize) >>
-        ISD::ParamFlags::ByValSizeOffs;
-
-    SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
-    SDOperand  SizeNode = DAG.getConstant(Size, MVT::i32);
-    SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
-
-    return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
-                         AlwaysInline);
+    return GetMemCpyWithFlags(DAG, Flags, Arg, PtrOff, Chain);
  } else {
    return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
  }
@ -1306,9 +1334,9 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {

  SDOperand StackPtr;

-  // Walk the register/memloc assignments, inserting copies/loads.
-  // For tail calls, lower arguments first to the stack slot where they would 
-  // normally - in case of a normal function call - be.
+  // Walk the register/memloc assignments, inserting copies/loads.  For tail
+  // calls, lower arguments which could otherwise be possibly overwritten to the
+  // stack slot where they would go on normal function calls.
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
@ -1331,12 +1359,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
    if (VA.isRegLoc()) {
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else {
-      assert(VA.isMemLoc());
-      if (StackPtr.Val == 0)
-        StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
-
-      MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
-                                             Arg));
+      if (!IsTailCall || IsPossiblyOverriddenArgumentOfTailCall(Arg)) {
+        assert(VA.isMemLoc());
+        if (StackPtr.Val == 0)
+          StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+        
+        MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
+                                               Arg));
+      }
    }
  }
  
@ -1390,52 +1420,45 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
    InFlag = Chain.getValue(1);
  }

-  // Copy from stack slots to stack slot of a tail called function. This needs
-  // to be done because if we would lower the arguments directly to their real
-  // stack slot we might end up overwriting each other.
-  // TODO: To make this more efficient (sometimes saving a store/load) we could
-  // analyse the arguments and emit this store/load/store sequence only for
-  // arguments which would be overwritten otherwise.
+  // For tail calls lower the arguments to the 'real' stack slot.
  if (IsTailCall) {
    SmallVector<SDOperand, 8> MemOpChains2;
-    SDOperand PtrOff;
    SDOperand FIN;
    int FI = 0;
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
      CCValAssign &VA = ArgLocs[i];
      if (!VA.isRegLoc()) {
+        assert(VA.isMemLoc());
+        SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
        SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
        unsigned Flags    = cast<ConstantSDNode>(FlagsOp)->getValue();
-        
-        // Get source stack slot. 
-        SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(),
-                                           getPointerTy());
-        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
        // Create frame index.
        int32_t Offset = VA.getLocMemOffset()+FPDiff;
        uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
        FIN = DAG.getFrameIndex(FI, MVT::i32);
+        SDOperand Source = Arg;
+        if (IsPossiblyOverriddenArgumentOfTailCall(Arg)){
+          // Copy from stack slots to stack slot of a tail called function. This
+          // needs to be done because if we would lower the arguments directly
+          // to their real stack slot we might end up overwriting each other.
+          // Get source stack slot.
+          Source = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
+          if (StackPtr.Val == 0)
+            StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+          Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
+          if ((Flags & ISD::ParamFlags::ByVal)==0) 
+            Source = DAG.getLoad(VA.getValVT(), Chain, Source,NULL, 0);
+        } 
+
        if (Flags & ISD::ParamFlags::ByVal) {
-          // Copy relative to framepointer.
-          unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
-                                 ISD::ParamFlags::ByValAlignOffs);
-
-          unsigned  Size = (Flags & ISD::ParamFlags::ByValSize) >>
-            ISD::ParamFlags::ByValSizeOffs;
- 
-          SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
-          SDOperand  SizeNode = DAG.getConstant(Size, MVT::i32);
-          SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
-
-          MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode, 
-                                               AlignNode,AlwaysInline));
+            // Copy relative to framepointer.
+            MemOpChains2.
+              push_back(GetMemCpyWithFlags(DAG, Flags, Source, FIN, Chain));
        } else {
-          SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff,
-                                            NULL, 0);
-          // Store relative to framepointer.
-          MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
-        }
+            // Store relative to framepointer.
+            MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
+        }            
      }
    }