- Avoid using floating point stores to implement memset unless the value is zero.

- Do not try to infer GV alignment unless its type is sized. It's not possible to infer alignment if it has opaque type. llvm-svn: 100118
2024-11-24 11:42:57 +01:00 · 2010-04-01 18:19:11 +00:00 · 2010-04-01 18:19:11 +00:00 · 8728924812
commit 8728924812
parent 52c509b85d
7 changed files with 32 additions and 17 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -638,7 +638,7 @@ public:
  /// determining it.
  virtual EVT getOptimalMemOpType(uint64_t Size,
                                  unsigned DstAlign, unsigned SrcAlign,
-                                  SelectionDAG &DAG) const {
+                                  bool SafeToUseFP, SelectionDAG &DAG) const {
    return MVT::Other;
  }
  
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -3195,9 +3195,9 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
 /// is below the threshold. It returns the types of the sequence of
 /// memory ops to perform memset / memcpy by reference.
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
-                                     SDValue Dst, SDValue Src,
                                     unsigned Limit, uint64_t Size,
                                     unsigned DstAlign, unsigned SrcAlign,
+                                     bool SafeToUseFP,
                                     SelectionDAG &DAG,
                                     const TargetLowering &TLI) {
  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@ -3207,7 +3207,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
  // specified alignment of the memory operation. If it is zero, that means
  // it's possible to change the alignment of the destination.
-  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG);
+  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG);

  if (VT == MVT::Other) {
    VT = TLI.getPointerTy();
@ -3285,9 +3285,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
  std::string Str;
  bool CopyFromStr = isMemSrcFromString(Src, Str);
  bool isZeroStr = CopyFromStr && Str.empty();
-  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                (DstAlignCanChange ? 0 : Align),
-                                (isZeroStr ? 0 : SrcAlign), DAG, TLI))
+                                (isZeroStr ? 0 : SrcAlign), true, DAG, TLI))
    return SDValue();

  if (DstAlignCanChange) {
@ -3369,9 +3369,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
  if (Align > SrcAlign)
    SrcAlign = Align;

-  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                (DstAlignCanChange ? 0 : Align),
-                                SrcAlign, DAG, TLI))
+                                SrcAlign, true, DAG, TLI))
    return SDValue();

  if (DstAlignCanChange) {
@ -3436,9 +3436,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
    DstAlignCanChange = true;
-  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+  bool IsZero = isa<ConstantSDNode>(Src) &&
+    cast<ConstantSDNode>(Src)->isNullValue();
+  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
                                Size, (DstAlignCanChange ? 0 : Align), 0,
-                                DAG, TLI))
+                                IsZero, DAG, TLI))
    return SDValue();

  if (DstAlignCanChange) {
@ -6150,8 +6152,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
    unsigned Align = GV->getAlignment();
    if (!Align) {
      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
-        const TargetData *TD = TLI.getTargetData();
-        Align = TD->getPreferredAlignment(GVar);
+        if (GV->getType()->getElementType()->isSized()) {
+          const TargetData *TD = TLI.getTargetData();
+          Align = TD->getPreferredAlignment(GVar);
+        }
      }
    }
    return MinAlign(Align, GVOffset);
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -5541,6 +5541,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                           unsigned DstAlign, unsigned SrcAlign,
+                                           bool SafeToUseFP,
                                           SelectionDAG &DAG) const {
  if (this->PPCSubTarget.isPPC64()) {
    return MVT::i64;
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -349,7 +349,7 @@ namespace llvm {
    
    virtual EVT getOptimalMemOpType(uint64_t Size,
                                    unsigned DstAlign, unsigned SrcAlign,
-                                    SelectionDAG &DAG) const;
+                                    bool SafeToUseFP, SelectionDAG &DAG) const;

    /// getFunctionAlignment - Return the Log2 alignment of this function.
    virtual unsigned getFunctionAlignment(const Function *F) const;
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -1076,6 +1076,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                       unsigned DstAlign, unsigned SrcAlign,
+                                       bool SafeToUseFP,
                                       SelectionDAG &DAG) const {
  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
  // linux.  This is because the stack realignment code can't handle certain
@ -1089,9 +1090,10 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
        Subtarget->getStackAlignment() >= 16) {
      if (Subtarget->hasSSE2())
        return MVT::v4i32;
-      if (Subtarget->hasSSE1())
+      if (SafeToUseFP && Subtarget->hasSSE1())
        return MVT::v4f32;
-    } else if (Size >= 8 &&
+    } else if (SafeToUseFP &&
+               Size >= 8 &&
               Subtarget->getStackAlignment() >= 8 &&
               Subtarget->hasSSE2())
      return MVT::f64;
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -425,7 +425,7 @@ namespace llvm {
    /// determining it.
    virtual EVT getOptimalMemOpType(uint64_t Size,
                                    unsigned DstAlign, unsigned SrcAlign,
-                                    SelectionDAG &DAG) const;
+                                    bool SafeToUseFP, SelectionDAG &DAG) const;

    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
    /// unaligned memory accesses. of the specified type.
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@ -4,10 +4,18 @@ target triple = "i386"

 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind

-define fastcc void @t() nounwind {
+define fastcc void @t1() nounwind {
 entry:
-; CHECK: t:
+; CHECK: t1:
 ; CHECK: call memset
  call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
  unreachable
 }
+
+define fastcc void @t2(i8 signext %c) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: call memset
+  call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
+  unreachable
+}