From 168b2b13002fcb509fbab28fce85e90160c76273 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 1 Apr 2008 20:38:36 +0000 Subject: [PATCH] Speculatively micro-optimize memory-zeroing calls on Darwin 10. llvm-svn: 49048 --- lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++----- lib/Target/X86/X86Subtarget.cpp | 14 ++++++++++++++ lib/Target/X86/X86Subtarget.h | 6 ++++++ test/CodeGen/X86/darwin-bzero.ll | 8 ++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/X86/darwin-bzero.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 275d4a04f9a..f4ec6afd540 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4560,6 +4560,11 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { // address value and run time information about the CPU. if ((Align & 3) != 0 || (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { + + // Check to see if there is a specialized entry-point for memory zeroing. + const char *bzeroEntry = Subtarget->getBZeroEntry(); + ConstantSDNode *V = dyn_cast(Op.getOperand(2)); + MVT::ValueType IntPtr = getPointerTy(); const Type *IntPtrTy = getTargetData()->getIntPtrType(); TargetLowering::ArgListTy Args; @@ -4567,15 +4572,20 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Entry.Node = Op.getOperand(1); Entry.Ty = IntPtrTy; Args.push_back(Entry); - // Extend the unsigned i8 argument to be an int value for the call. - Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); - Entry.Ty = IntPtrTy; - Args.push_back(Entry); + + if (!bzeroEntry) { + // Extend the unsigned i8 argument to be an int value for the call. + Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + } + Entry.Node = Op.getOperand(3); Args.push_back(Entry); + const char *Name = bzeroEntry ? bzeroEntry : "memset"; std::pair CallResult = LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol("memset", IntPtr), Args, DAG); + false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG); return CallResult.second; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 483b8a4fd92..56bb7acdb07 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -53,6 +53,20 @@ bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV, return false; } +/// This function returns the name of a function which has an interface +/// like the non-standard bzero function, if such a function exists on +/// the current subtarget and it is considered prefereable over +/// memset with zero passed as the second argument. Otherwise it +/// returns null. +const char *X86Subtarget::getBZeroEntry() const { + + // Darwin 10 has a __bzero entry point for this purpose. + if (getDarwinVers() >= 10) + return "__bzero"; + + return 0; +} + /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the /// specified arguments. If we can't run cpuid on the host, return true. bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 6488d474d19..c91088dcdf6 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -171,6 +171,12 @@ public: bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM, bool isDirectCall) const; + /// This function returns the name of a function which has an interface + /// like the non-standard bzero function, if such a function exists on + /// the current subtarget and it is considered prefereable over + /// memset with zero passed as the second argument. Otherwise it + /// returns null. + const char *getBZeroEntry() const; }; namespace X86 { diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll new file mode 100644 index 00000000000..c292140e108 --- /dev/null +++ b/test/CodeGen/X86/darwin-bzero.ll @@ -0,0 +1,8 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | grep __bzero + +declare void @llvm.memset.i32(i8*, i8, i32, i32) + +define void @foo(i8* %p, i32 %len) { + call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1); + ret void +}