mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[WebAssembly][CodeGen] IR support for WebAssembly local variables
This patch adds TargetStackID::WasmLocal. This stack holds locations of values that are only addressable by name -- not via a pointer to memory. For the WebAssembly target, these objects are lowered to WebAssembly local variables, which are managed by the WebAssembly run-time and are not addressable by linear memory. For the WebAssembly target IR indicates that an AllocaInst should be put on TargetStackID::WasmLocal by putting it in the non-integral address space WASM_ADDRESS_SPACE_WASM_VAR, with value 1. SROA will mostly lift these allocations to SSA locals, but any alloca that reaches instruction selection (usually in non-optimized builds) will be assigned the new TargetStackID there. Loads and stores to those values are transformed to new WebAssemblyISD::LOCAL_GET / WebAssemblyISD::LOCAL_SET nodes, which then lower to the type-specific LOCAL_GET_I32 etc instructions via tablegen patterns. Differential Revision: https://reviews.llvm.org/D101140
This commit is contained in:
parent
49cafe1d7b
commit
a2b88794ad
@ -348,6 +348,7 @@ struct ScalarEnumerationTraits<TargetStackID::Value> {
|
||||
IO.enumCase(ID, "default", TargetStackID::Default);
|
||||
IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
|
||||
IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector);
|
||||
IO.enumCase(ID, "wasm-local", TargetStackID::WasmLocal);
|
||||
IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
|
||||
}
|
||||
};
|
||||
|
@ -24,12 +24,13 @@ namespace llvm {
|
||||
class RegScavenger;
|
||||
|
||||
namespace TargetStackID {
|
||||
enum Value {
|
||||
Default = 0,
|
||||
SGPRSpill = 1,
|
||||
ScalableVector = 2,
|
||||
NoAlloc = 255
|
||||
};
|
||||
enum Value {
|
||||
Default = 0,
|
||||
SGPRSpill = 1,
|
||||
ScalableVector = 2,
|
||||
WasmLocal = 3,
|
||||
NoAlloc = 255
|
||||
};
|
||||
}
|
||||
|
||||
/// Information about stack frame layout on the target. It holds the direction
|
||||
|
@ -661,6 +661,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
||||
case TargetStackID::SGPRSpill:
|
||||
return true;
|
||||
case TargetStackID::ScalableVector:
|
||||
case TargetStackID::WasmLocal:
|
||||
return false;
|
||||
}
|
||||
llvm_unreachable("Invalid TargetStackID::Value");
|
||||
|
@ -1099,6 +1099,7 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
||||
return true;
|
||||
case TargetStackID::NoAlloc:
|
||||
case TargetStackID::SGPRSpill:
|
||||
case TargetStackID::WasmLocal:
|
||||
return false;
|
||||
}
|
||||
llvm_unreachable("Invalid TargetStackID::Value");
|
||||
|
@ -239,8 +239,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
// Start assigning local numbers after the last parameter.
|
||||
// Start assigning local numbers after the last parameter and after any
|
||||
// already-assigned locals.
|
||||
unsigned CurLocal = static_cast<unsigned>(MFI.getParams().size());
|
||||
CurLocal += static_cast<unsigned>(MFI.getLocals().size());
|
||||
|
||||
// Precompute the set of registers that are unused, so that we can insert
|
||||
// drops to their defs.
|
||||
|
@ -25,11 +25,13 @@
|
||||
#include "WebAssemblyMachineFunctionInfo.h"
|
||||
#include "WebAssemblySubtarget.h"
|
||||
#include "WebAssemblyTargetMachine.h"
|
||||
#include "llvm/CodeGen/Analysis.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
using namespace llvm;
|
||||
@ -39,6 +41,52 @@ using namespace llvm;
|
||||
// TODO: wasm64
|
||||
// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions
|
||||
|
||||
// In an ideal world, when objects are added to the MachineFrameInfo by
|
||||
// FunctionLoweringInfo::set, we could somehow hook into target-specific code to
|
||||
// ensure they are assigned the right stack ID. However there isn't a hook that
|
||||
// runs between then and DAG building time, though, so instead we hoist stack
|
||||
// objects lazily when they are first used, and comprehensively after the DAG is
|
||||
// built via the PreprocessISelDAG hook, called by the
|
||||
// SelectionDAGISel::runOnMachineFunction. We have to do it in two places
|
||||
// because we want to do it while building the selection DAG for uses of alloca,
|
||||
// but not all alloca instructions are used so we have to follow up afterwards.
|
||||
Optional<unsigned>
|
||||
WebAssemblyFrameLowering::getLocalForStackObject(MachineFunction &MF,
|
||||
int FrameIndex) {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
// If already hoisted to a local, done.
|
||||
if (MFI.getStackID(FrameIndex) == TargetStackID::WasmLocal)
|
||||
return static_cast<unsigned>(MFI.getObjectOffset(FrameIndex));
|
||||
|
||||
// If not allocated in the object address space, this object will be in
|
||||
// linear memory.
|
||||
const AllocaInst *AI = MFI.getObjectAllocation(FrameIndex);
|
||||
if (!AI ||
|
||||
!WebAssembly::isWasmVarAddressSpace(AI->getType()->getAddressSpace()))
|
||||
return None;
|
||||
|
||||
// Otherwise, allocate this object in the named value stack, outside of linear
|
||||
// memory.
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
const WebAssemblyTargetLowering &TLI =
|
||||
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
|
||||
WebAssemblyFunctionInfo *FuncInfo = MF.getInfo<WebAssemblyFunctionInfo>();
|
||||
ComputeValueVTs(TLI, MF.getDataLayout(), AI->getAllocatedType(), ValueVTs);
|
||||
MFI.setStackID(FrameIndex, TargetStackID::WasmLocal);
|
||||
// Abuse SP offset to record the index of the first local in the object.
|
||||
unsigned Local = FuncInfo->getParams().size() + FuncInfo->getLocals().size();
|
||||
MFI.setObjectOffset(FrameIndex, Local);
|
||||
// Allocate WebAssembly locals for each non-aggregate component of the
|
||||
// allocation.
|
||||
for (EVT ValueVT : ValueVTs)
|
||||
FuncInfo->addLocal(ValueVT.getSimpleVT());
|
||||
// Abuse object size to record number of WebAssembly locals allocated to
|
||||
// this object.
|
||||
MFI.setObjectSize(FrameIndex, ValueVTs.size());
|
||||
return static_cast<unsigned>(Local);
|
||||
}
|
||||
|
||||
/// We need a base pointer in the case of having items on the stack that
|
||||
/// require stricter alignment than the stack pointer itself. Because we need
|
||||
/// to shift the stack pointer by some unknown amount to force the alignment,
|
||||
@ -314,6 +362,16 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
|
||||
}
|
||||
|
||||
bool WebAssemblyFrameLowering::isSupportedStackID(
|
||||
TargetStackID::Value ID) const {
|
||||
// Use the Object stack for WebAssembly locals which can only be accessed
|
||||
// by name, not via an address in linear memory.
|
||||
if (ID == TargetStackID::WasmLocal)
|
||||
return true;
|
||||
|
||||
return TargetFrameLowering::isSupportedStackID(ID);
|
||||
}
|
||||
|
||||
TargetFrameLowering::DwarfFrameBase
|
||||
WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
|
||||
DwarfFrameBase Loc;
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
|
||||
bool hasFP(const MachineFunction &MF) const override;
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
||||
bool isSupportedStackID(TargetStackID::Value ID) const override;
|
||||
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override;
|
||||
|
||||
bool needsPrologForEH(const MachineFunction &MF) const;
|
||||
@ -53,6 +54,11 @@ public:
|
||||
MachineBasicBlock::iterator &InsertStore,
|
||||
const DebugLoc &DL) const;
|
||||
|
||||
// Returns the index of the WebAssembly local to which the stack object
|
||||
// FrameIndex in MF should be allocated, or None.
|
||||
static Optional<unsigned> getLocalForStackObject(MachineFunction &MF,
|
||||
int FrameIndex);
|
||||
|
||||
static unsigned getSPReg(const MachineFunction &MF);
|
||||
static unsigned getFPReg(const MachineFunction &MF);
|
||||
static unsigned getOpcConst(const MachineFunction &MF);
|
||||
|
@ -17,6 +17,8 @@ HANDLE_NODETYPE(CALL)
|
||||
HANDLE_NODETYPE(RET_CALL)
|
||||
HANDLE_NODETYPE(RETURN)
|
||||
HANDLE_NODETYPE(ARGUMENT)
|
||||
HANDLE_NODETYPE(LOCAL_GET)
|
||||
HANDLE_NODETYPE(LOCAL_SET)
|
||||
// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol
|
||||
HANDLE_NODETYPE(Wrapper)
|
||||
// A special wapper used in PIC code for __memory_base/__table_base relative
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
|
||||
#include "WebAssembly.h"
|
||||
#include "WebAssemblyTargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/Function.h" // To access function attributes.
|
||||
@ -56,6 +57,8 @@ public:
|
||||
return SelectionDAGISel::runOnMachineFunction(MF);
|
||||
}
|
||||
|
||||
void PreprocessISelDAG() override;
|
||||
|
||||
void Select(SDNode *Node) override;
|
||||
|
||||
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
|
||||
@ -69,6 +72,18 @@ private:
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
void WebAssemblyDAGToDAGISel::PreprocessISelDAG() {
|
||||
// Stack objects that should be allocated to locals are hoisted to WebAssembly
|
||||
// locals when they are first used. However for those without uses, we hoist
|
||||
// them here. It would be nice if there were some hook to do this when they
|
||||
// are added to the MachineFrameInfo, but that's not the case right now.
|
||||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||
for (int Idx = 0; Idx < FrameInfo.getObjectIndexEnd(); Idx++)
|
||||
WebAssemblyFrameLowering::getLocalForStackObject(*MF, Idx);
|
||||
|
||||
SelectionDAGISel::PreprocessISelDAG();
|
||||
}
|
||||
|
||||
void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
|
||||
// If we have a custom node, we already have selected!
|
||||
if (Node->isMachineOpcode()) {
|
||||
|
@ -1276,6 +1276,15 @@ static bool IsWebAssemblyGlobal(SDValue Op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static Optional<unsigned> IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) {
|
||||
const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op);
|
||||
if (!FI)
|
||||
return None;
|
||||
|
||||
auto &MF = DAG.getMachineFunction();
|
||||
return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());
|
||||
}
|
||||
|
||||
SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
@ -1295,6 +1304,17 @@ SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
|
||||
SN->getMemoryVT(), SN->getMemOperand());
|
||||
}
|
||||
|
||||
if (Optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
|
||||
if (!Offset->isUndef())
|
||||
report_fatal_error("unexpected offset when storing to webassembly local",
|
||||
false);
|
||||
|
||||
SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
|
||||
SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
|
||||
SDValue Ops[] = {SN->getChain(), Idx, Value};
|
||||
return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
|
||||
}
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
@ -1316,6 +1336,20 @@ SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
|
||||
LN->getMemoryVT(), LN->getMemOperand());
|
||||
}
|
||||
|
||||
if (Optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
|
||||
if (!Offset->isUndef())
|
||||
report_fatal_error(
|
||||
"unexpected offset when loading from webassembly local", false);
|
||||
|
||||
SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
|
||||
EVT LocalVT = LN->getValueType(0);
|
||||
SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
|
||||
{LN->getChain(), Idx});
|
||||
SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
|
||||
assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
|
||||
return Result;
|
||||
}
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
|
@ -72,6 +72,8 @@ def SDT_WebAssemblyCallSeqEnd :
|
||||
SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
|
||||
def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
|
||||
def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
|
||||
def SDT_WebAssemblyLocalGet : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
|
||||
def SDT_WebAssemblyLocalSet : SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>;
|
||||
def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
|
||||
def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
|
||||
SDTCisPtrTy<0>]>;
|
||||
@ -114,6 +116,12 @@ def WebAssemblyglobal_get :
|
||||
def WebAssemblyglobal_set :
|
||||
SDNode<"WebAssemblyISD::GLOBAL_SET", SDT_WebAssemblyGlobalSet,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def WebAssemblylocal_get :
|
||||
SDNode<"WebAssemblyISD::LOCAL_GET", SDT_WebAssemblyLocalGet,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
def WebAssemblylocal_set :
|
||||
SDNode<"WebAssemblyISD::LOCAL_SET", SDT_WebAssemblyLocalSet,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// WebAssembly-specific Operands.
|
||||
@ -332,6 +340,10 @@ multiclass LOCAL<WebAssemblyRegClass rc, Operand global_op> {
|
||||
def : Pat<(WebAssemblyglobal_set
|
||||
vt:$src, (WebAssemblywrapper tglobaladdr:$addr)),
|
||||
(!cast<NI>("GLOBAL_SET_" # rc) tglobaladdr:$addr, vt:$src)>;
|
||||
def : Pat<(vt (WebAssemblylocal_get (i32 timm:$local))),
|
||||
(!cast<NI>("LOCAL_GET_" # rc) timm:$local)>;
|
||||
def : Pat<(WebAssemblylocal_set timm:$local, vt:$src),
|
||||
(!cast<NI>("LOCAL_SET_" # rc) timm:$local, vt:$src)>;
|
||||
}
|
||||
}
|
||||
defm "" : LOCAL<I32, global_op32>;
|
||||
|
22
test/CodeGen/WebAssembly/ir-locals-stackid.ll
Normal file
22
test/CodeGen/WebAssembly/ir-locals-stackid.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llc -mtriple=wasm32-unknown-unknown -asm-verbose=false < %s | FileCheck %s --check-prefix=CHECKCG
|
||||
; RUN: llc -mtriple=wasm32-unknown-unknown -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECKISEL
|
||||
|
||||
%f32_cell = type float addrspace(1)*
|
||||
|
||||
; CHECKISEL-LABEL: name: ir_local_f32
|
||||
; CHECKISEL: stack:
|
||||
; CHECKISEL: id: 0, name: retval, type: default, offset: 1, size: 1, alignment: 4,
|
||||
; CHECKISEL-NEXT: stack-id: wasm-local
|
||||
|
||||
; CHECKCG-LABEL: ir_local_f32:
|
||||
; CHECKCG-NEXT: .functype ir_local_f32 (f32) -> (f32)
|
||||
; CHECKCG-NEXT: .local f32
|
||||
; CHECKCG-NEXT: local.get 0
|
||||
; CHECKCG-NEXT: local.set 1
|
||||
|
||||
define float @ir_local_f32(float %arg) {
|
||||
%retval = alloca float, addrspace(1)
|
||||
store float %arg, %f32_cell %retval
|
||||
%reloaded = load float, %f32_cell %retval
|
||||
ret float %reloaded
|
||||
}
|
87
test/CodeGen/WebAssembly/ir-locals.ll
Normal file
87
test/CodeGen/WebAssembly/ir-locals.ll
Normal file
@ -0,0 +1,87 @@
|
||||
; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false | FileCheck %s
|
||||
|
||||
%i32_cell = type i32 addrspace(1)*
|
||||
%i64_cell = type i64 addrspace(1)*
|
||||
%f32_cell = type float addrspace(1)*
|
||||
%f64_cell = type double addrspace(1)*
|
||||
|
||||
; We have a set of tests in which we set a local and then reload the
|
||||
; local. If the load immediately follows the set, the DAG combiner will
|
||||
; infer that the reloaded value is the same value that was set, which
|
||||
; isn't what we want to test. To inhibit this optimization, we include
|
||||
; an opaque call between the store and the load.
|
||||
declare void @inhibit_store_to_load_forwarding()
|
||||
|
||||
define i32 @ir_local_i32(i32 %arg) {
|
||||
; CHECK-LABEL: ir_local_i32:
|
||||
; CHECK-NEXT: .functype ir_local_i32 (i32) -> (i32)
|
||||
%retval = alloca i32, addrspace(1)
|
||||
; CHECK-NEXT: .local i32
|
||||
store i32 %arg, %i32_cell %retval
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: local.set 1
|
||||
call void @inhibit_store_to_load_forwarding()
|
||||
; CHECK-NEXT: call inhibit_store_to_load_forwarding
|
||||
%reloaded = load i32, %i32_cell %retval
|
||||
; CHECK-NEXT: local.get 1
|
||||
ret i32 %reloaded
|
||||
; CHECK-NEXT: end_function
|
||||
}
|
||||
|
||||
define i64 @ir_local_i64(i64 %arg) {
|
||||
; CHECK-LABEL: ir_local_i64:
|
||||
; CHECK-NEXT: .functype ir_local_i64 (i64) -> (i64)
|
||||
%retval = alloca i64, addrspace(1)
|
||||
; CHECK-NEXT: .local i64
|
||||
store i64 %arg, %i64_cell %retval
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: local.set 1
|
||||
call void @inhibit_store_to_load_forwarding()
|
||||
; CHECK-NEXT: call inhibit_store_to_load_forwarding
|
||||
%reloaded = load i64, %i64_cell %retval
|
||||
; See note in ir_local_i32.
|
||||
; CHECK-NEXT: local.get 1
|
||||
ret i64 %reloaded
|
||||
; CHECK-NEXT: end_function
|
||||
}
|
||||
|
||||
define float @ir_local_f32(float %arg) {
|
||||
; CHECK-LABEL: ir_local_f32:
|
||||
; CHECK-NEXT: .functype ir_local_f32 (f32) -> (f32)
|
||||
%retval = alloca float, addrspace(1)
|
||||
; CHECK-NEXT: .local f32
|
||||
store float %arg, %f32_cell %retval
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: local.set 1
|
||||
call void @inhibit_store_to_load_forwarding()
|
||||
; CHECK-NEXT: call inhibit_store_to_load_forwarding
|
||||
%reloaded = load float, %f32_cell %retval
|
||||
; CHECK-NEXT: local.get 1
|
||||
; CHECK-NEXT: end_function
|
||||
ret float %reloaded
|
||||
}
|
||||
|
||||
define double @ir_local_f64(double %arg) {
|
||||
; CHECK-LABEL: ir_local_f64:
|
||||
; CHECK-NEXT: .functype ir_local_f64 (f64) -> (f64)
|
||||
%retval = alloca double, addrspace(1)
|
||||
; CHECK-NEXT: .local f64
|
||||
store double %arg, %f64_cell %retval
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: local.set 1
|
||||
call void @inhibit_store_to_load_forwarding()
|
||||
; CHECK-NEXT: call inhibit_store_to_load_forwarding
|
||||
%reloaded = load double, %f64_cell %retval
|
||||
; CHECK-NEXT: local.get 1
|
||||
; CHECK-NEXT: end_function
|
||||
ret double %reloaded
|
||||
}
|
||||
|
||||
define void @ir_unreferenced_local() {
|
||||
; CHECK-LABEL: ir_unreferenced_local:
|
||||
; CHECK-NEXT: .functype ir_unreferenced_local () -> ()
|
||||
%unused = alloca i32, addrspace(1)
|
||||
; CHECK-NEXT: .local i32
|
||||
ret void
|
||||
; CHECK-NEXT: end_function
|
||||
}
|
Loading…
Reference in New Issue
Block a user