From bf8e0c60a61873d5b2d2b2efd1effc587480de28 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 26 Jan 2016 04:01:11 +0000 Subject: [PATCH] [WebAssembly] Optimize memcpy/memmove/memcpy calls. These calls return their first argument, but because LLVM uses an intrinsic with a void return type, they can't use the returned attribute. Generalize the store results pass to optimize these calls too. llvm-svn: 258781 --- lib/Target/WebAssembly/README.txt | 5 - .../WebAssembly/WebAssemblyPeephole.cpp | 69 +++++++++++--- .../WebAssembly/WebAssemblyStoreResults.cpp | 94 ++++++++++++++----- test/CodeGen/WebAssembly/global.ll | 4 +- test/CodeGen/WebAssembly/mem-intrinsics.ll | 60 ++++++++++++ 5 files changed, 189 insertions(+), 43 deletions(-) create mode 100644 test/CodeGen/WebAssembly/mem-intrinsics.ll diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 45245d532c8..5415d25c3dd 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -78,11 +78,6 @@ stores. //===---------------------------------------------------------------------===// -Memset/memcpy/memmove should be marked with the "returned" attribute somehow, -even when they are translated through intrinsics. - -//===---------------------------------------------------------------------===// - Consider implementing optimizeSelect, optimizeCompareInstr, optimizeCondBranch, optimizeLoadInstr, and/or getMachineCombinerPatterns. diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index 76ef5c061d6..39a4bf9dce5 100644 --- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -15,7 +15,10 @@ #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "wasm-peephole" @@ -28,6 +31,7 @@ class WebAssemblyPeephole final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -44,11 +48,36 @@ FunctionPass *llvm::createWebAssemblyPeephole() { return new WebAssemblyPeephole(); } -bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { +/// If desirable, rewrite NewReg to a discard register. +static bool MaybeRewriteToDiscard(unsigned OldReg, unsigned NewReg, + MachineOperand &MO, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { bool Changed = false; + // TODO: Handle SP/physregs + if (OldReg == NewReg && TargetRegisterInfo::isVirtualRegister(NewReg)) { + Changed = true; + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg); + } + return Changed; +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Store Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget().getTargetLowering(); + auto &LibInfo = getAnalysis().getTLI(); + bool Changed = false; for (auto &MBB : MF) for (auto &MI : MBB) @@ -69,17 +98,33 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { // can use $discard instead. MachineOperand &MO = MI.getOperand(0); unsigned OldReg = MO.getReg(); - // TODO: Handle SP/physregs - if (OldReg == - MI.getOperand(WebAssembly::StoreValueOperandNo).getReg() && - TargetRegisterInfo::isVirtualRegister( - MI.getOperand(WebAssembly::StoreValueOperandNo).getReg())) { - Changed = true; - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - MO.setReg(NewReg); - MO.setIsDead(); - MFI.stackifyVReg(NewReg); - MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg); + unsigned NewReg = + MI.getOperand(WebAssembly::StoreValueOperandNo).getReg(); + Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI); + break; + } + case WebAssembly::CALL_I32: + case WebAssembly::CALL_I64: { + MachineOperand &Op1 = MI.getOperand(1); + if (Op1.isSymbol()) { + StringRef Name(Op1.getSymbolName()); + if (Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET)) { + LibFunc::Func Func; + if (LibInfo.getLibFunc(Name, Func)) { + if (!MI.getOperand(2).isReg()) + report_fatal_error( + "Call to builtin function with wrong signature"); + MachineOperand &MO = MI.getOperand(0); + unsigned OldReg = MO.getReg(); + unsigned NewReg = MI.getOperand(2).getReg(); + if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg)) + report_fatal_error( + "Call to builtin function with wrong signature"); + Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI); + } + } } } } diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index a32fb171555..1d7fe387c41 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -17,12 +17,18 @@ /// potentially also exposing the store to register stackifying. These both can /// reduce get_local/set_local traffic. /// +/// This pass also performs this optimization for memcpy, memmove, and memset +/// calls, since the LLVM intrinsics for these return void so they can't use the +/// returned attribute and consequently aren't handled by the OptimizeReturned +/// pass. +/// //===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -49,6 +55,7 @@ public: AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -63,6 +70,40 @@ FunctionPass *llvm::createWebAssemblyStoreResults() { return new WebAssemblyStoreResults(); } +// Replace uses of FromReg with ToReg if they are dominated by MI. +static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI, + unsigned FromReg, unsigned ToReg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT) { + bool Changed = false; + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + Changed = true; + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from " + << MI << "\n"); + O.setReg(ToReg); + // If the store's def was previously dead, it is no longer. But the + // dead flag shouldn't be set yet. + assert(!MI.getOperand(0).isDead() && "Unexpected dead flag"); + } + return Changed; +} + bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Store Results **********\n" @@ -71,6 +112,9 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineDominatorTree &MDT = getAnalysis(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget().getTargetLowering(); + auto &LibInfo = getAnalysis().getTLI(); bool Changed = false; assert(MRI.isSSA() && "StoreResults depends on SSA form"); @@ -89,36 +133,38 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::STORE_F32: case WebAssembly::STORE_F64: case WebAssembly::STORE_I32: - case WebAssembly::STORE_I64: + case WebAssembly::STORE_I64: { unsigned ToReg = MI.getOperand(0).getReg(); unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg(); - for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { - MachineOperand &O = *I++; - MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) { - // PHIs use their operands on their incoming CFG edges rather than - // in their parent blocks. Get the basic block paired with this use - // of FromReg and check that MI's block dominates it. - MachineBasicBlock *Pred = - Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); - if (!MDT.dominates(&MBB, Pred)) - continue; - } else { - // For a non-PHI, check that MI dominates the instruction in the - // normal way. - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; + Changed |= ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT); + break; + } + case WebAssembly::CALL_I32: + case WebAssembly::CALL_I64: { + MachineOperand &Op1 = MI.getOperand(1); + if (Op1.isSymbol()) { + StringRef Name(Op1.getSymbolName()); + if (Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET)) { + LibFunc::Func Func; + if (LibInfo.getLibFunc(Name, Func)) { + if (!MI.getOperand(2).isReg()) + report_fatal_error( + "Call to builtin function with wrong signature"); + unsigned FromReg = MI.getOperand(2).getReg(); + unsigned ToReg = MI.getOperand(0).getReg(); + if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) + report_fatal_error( + "Call to builtin function with wrong signature"); + Changed |= + ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT); + } } - Changed = true; - DEBUG(dbgs() << "Setting operand " << O << " in " << *Where - << " from " << MI << "\n"); - O.setReg(ToReg); - // If the store's def was previously dead, it is no longer. But the - // dead flag shouldn't be set yet. - assert(!MI.getOperand(0).isDead() && "Dead flag set on store result"); } } + } } return Changed; diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll index 42a8f58251c..704fb3f7dfa 100644 --- a/test/CodeGen/WebAssembly/global.ll +++ b/test/CodeGen/WebAssembly/global.ll @@ -21,8 +21,8 @@ define i32 @foo() { ; CHECK-LABEL: call_memcpy: ; CHECK-NEXT: .param i32, i32, i32{{$}} ; CHECK-NEXT: .result i32{{$}} -; CHECK-NEXT: i32.call $discard=, memcpy@FUNCTION, $0, $1, $2{{$}} -; CHECK-NEXT: return $0{{$}} +; CHECK-NEXT: i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) { tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) diff --git a/test/CodeGen/WebAssembly/mem-intrinsics.ll b/test/CodeGen/WebAssembly/mem-intrinsics.ll new file mode 100644 index 00000000000..ba2f1e702af --- /dev/null +++ b/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test memcpy, memmove, and memset intrinsics. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) + +; Test that return values are optimized. + +; CHECK-LABEL: copy_yes: +; CHECK: i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i8* @copy_yes(i8* %dst, i8* %src, i32 %len) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false) + ret i8* %dst +} + +; CHECK-LABEL: copy_no: +; CHECK: i32.call $discard=, memcpy@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return{{$}} +define void @copy_no(i8* %dst, i8* %src, i32 %len) { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: move_yes: +; CHECK: i32.call $push0=, memmove@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i8* @move_yes(i8* %dst, i8* %src, i32 %len) { + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false) + ret i8* %dst +} + +; CHECK-LABEL: move_no: +; CHECK: i32.call $discard=, memmove@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return{{$}} +define void @move_no(i8* %dst, i8* %src, i32 %len) { + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: set_yes: +; CHECK: i32.call $push0=, memset@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i8* @set_yes(i8* %dst, i8 %src, i32 %len) { + call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false) + ret i8* %dst +} + +; CHECK-LABEL: set_no: +; CHECK: i32.call $discard=, memset@FUNCTION, $0, $1, $2{{$}} +; CHECK-NEXT: return{{$}} +define void @set_no(i8* %dst, i8 %src, i32 %len) { + call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false) + ret void +}