1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[WebAssembly] Model the stack evaluation order more precisely.

We currently don't represent get_local and set_local explicitly; they
are just implied by virtual register use and def. This avoids a lot of
clutter, but it does complicate stackifying: get_locals read their
operands at their position in the stack evaluation order, rather than
at their parent instruction. This patch adds code to walk the stack to
determine the precise ordering, when needed.

llvm-svn: 269854
This commit is contained in:
Dan Gohman 2016-05-17 22:24:18 +00:00
parent 6eb7829d59
commit df1ae0caf4
3 changed files with 73 additions and 32 deletions

View File

@ -163,10 +163,12 @@ static void Query(const MachineInstr *MI, AliasAnalysis &AA,
// of memoperands as having a potential unknown memory reference.
break;
default:
// Record potential stores, unless it's a call, as calls are handled
// Record volatile accesses, unless it's a call, as calls are handled
// specially below.
if (!MI->isCall())
if (!MI->isCall()) {
Write = true;
Effects = true;
}
break;
}
}
@ -197,24 +199,15 @@ static void Query(const MachineInstr *MI, AliasAnalysis &AA,
if (MI->isCall()) {
switch (MI->getOpcode()) {
case WebAssembly::CALL_VOID:
case WebAssembly::CALL_INDIRECT_VOID:
QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
break;
case WebAssembly::CALL_I32:
case WebAssembly::CALL_I64:
case WebAssembly::CALL_F32:
case WebAssembly::CALL_F64:
case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
break;
case WebAssembly::CALL_INDIRECT_VOID:
case WebAssembly::CALL_INDIRECT_I32:
case WebAssembly::CALL_INDIRECT_I64:
case WebAssembly::CALL_INDIRECT_F32:
case WebAssembly::CALL_INDIRECT_F64:
Read = true;
Write = true;
Effects = true;
StackPointer = true;
break;
default:
llvm_unreachable("unexpected call opcode");
}
@ -360,7 +353,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
const MachineBasicBlock &MBB,
const MachineRegisterInfo &MRI,
const MachineDominatorTree &MDT,
LiveIntervals &LIS) {
LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI) {
const LiveInterval &LI = LIS.getInterval(Reg);
const MachineInstr *OneUseInst = OneUse.getParent();
@ -384,8 +378,31 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
return false;
} else {
// Test that the use is dominated by the one selected use.
if (!MDT.dominates(OneUseInst, UseInst))
return false;
while (!MDT.dominates(OneUseInst, UseInst)) {
// Actually, dominating is over-conservative. Test that the use would
// happen after the one selected use in the stack evaluation order.
//
// This is needed as a consequence of using implicit get_locals for
// uses and implicit set_locals for defs.
if (UseInst->getDesc().getNumDefs() == 0)
return false;
const MachineOperand &MO = UseInst->getOperand(0);
if (!MO.isReg())
return false;
unsigned DefReg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
!MFI.isVRegStackified(DefReg))
return false;
assert(MRI.hasOneUse(DefReg));
const MachineOperand &NewUse = *MRI.use_begin(DefReg);
const MachineInstr *NewUseInst = NewUse.getParent();
if (NewUseInst == OneUseInst) {
if (&OneUse > &NewUse)
return false;
break;
}
UseInst = NewUseInst;
}
}
}
return true;
@ -619,6 +636,9 @@ public:
/// Test whether the given register is present on the stack, indicating an
/// operand in the tree that we haven't visited yet. Moving a definition of
/// Reg to a point in the tree after that would change its value.
///
/// This is needed as a consequence of using implicit get_locals for
/// uses and implicit set_locals for defs.
bool IsOnStack(unsigned Reg) const {
for (const RangeTy &Range : Worklist)
for (const MachineOperand &MO : Range)
@ -763,7 +783,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Insert = RematerializeCheapDef(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII, TRI);
} else if (CanMove &&
OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS)) {
OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII);
} else {

View File

@ -281,15 +281,36 @@ define i32 @commute() {
; an implicit get_local for the register.
; CHECK-LABEL: no_stackify_past_use:
; CHECK: i32.call $1=, callee@FUNCTION, $0
; CHECK: i32.call $1=, callee@FUNCTION, $0
; CHECK-NEXT: i32.const $push0=, 1
; CHECK-NEXT: i32.add $push1=, $0, $pop0
; CHECK-NEXT: i32.call $push2=, callee@FUNCTION, $pop1
; CHECK-NEXT: i32.sub $push3=, $pop2, $1
; CHECK-NEXT: i32.div_s $push4=, $pop3, $1
; CHECK-NEXT: return $pop4
declare i32 @callee(i32)
define i32 @no_stackify_past_use(i32 %arg) {
%tmp1 = call i32 @callee(i32 %arg)
%tmp2 = add i32 %arg, 1
%tmp3 = call i32 @callee(i32 %tmp2)
%tmp5 = sub i32 %tmp3, %tmp1
%tmp6 = sdiv i32 %tmp5, %tmp1
ret i32 %tmp6
}
; This is the same as no_stackify_past_use, except using a commutative operator,
; so we can reorder the operands and stackify.
; CHECK-LABEL: commute_to_fix_ordering:
; CHECK: i32.call $push[[L0:.+]]=, callee@FUNCTION, $0
; CHECK: tee_local $push[[L1:.+]]=, $1=, $pop[[L0]]
; CHECK: i32.const $push0=, 1
; CHECK: i32.add $push1=, $0, $pop0
; CHECK: i32.call $push2=, callee@FUNCTION, $pop1
; CHECK: i32.add $push3=, $1, $pop2
; CHECK: i32.mul $push4=, $1, $pop3
; CHECK: i32.mul $push4=, $pop[[L1]], $pop3
; CHECK: return $pop4
declare i32 @callee(i32)
define i32 @no_stackify_past_use(i32 %arg) {
define i32 @commute_to_fix_ordering(i32 %arg) {
%tmp1 = call i32 @callee(i32 %arg)
%tmp2 = add i32 %arg, 1
%tmp3 = call i32 @callee(i32 %tmp2)

View File

@ -56,14 +56,14 @@ define void @allocarray() {
; CHECK-NEXT: i32.load $push[[L5:.+]]=, 0($pop[[L4]])
; CHECK-NEXT: i32.const $push[[L6:.+]]=, 144{{$}}
; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L5]], $pop[[L6]]
; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L7]]), $pop[[L11]]
; CHECK-NEXT: i32.store ${{.+}}=, 0($pop[[L7]]), $pop[[L11]]
%r = alloca [33 x i32]
; CHECK-NEXT: i32.const $push[[L2:.+]]=, 24
; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop[[L2]]
; CHECK: i32.const $push{{.+}}=, 24
; CHECK-NEXT: i32.add $push[[L3:.+]]=, $[[SP]], $pop{{.+}}
; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}}
; CHECK-NEXT: i32.store $push[[L0:.+]]=, 0($pop[[L3]]), $pop[[L1]]{{$}}
; CHECK-NEXT: i32.store $discard=, 12($[[SP]]), $pop[[L0]]{{$}}
; CHECK-NEXT: i32.store $discard=, 12(${{.+}}), $pop[[L0]]{{$}}
%p = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 0
store i32 1, i32* %p
%p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3
@ -111,20 +111,20 @@ define void @allocarray_inbounds() {
; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($pop[[L3]])
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32{{$}}
; CHECK-NEXT: i32.sub $push[[L10:.+]]=, $pop[[L4]], $pop[[L5]]
; CHECK-NEXT: i32.store $[[SP:.+]]=, 0($pop[[L6]]), $pop[[L10]]{{$}}
; CHECK-NEXT: i32.store ${{.+}}=, 0($pop[[L6]]), $pop[[L10]]{{$}}
%r = alloca [5 x i32]
; CHECK: i32.const $push[[L3:.+]]=, 1
; CHECK: i32.store {{.*}}=, 12($[[SP]]), $pop[[L3]]
; CHECK: i32.store {{.*}}=, 12(${{.+}}), $pop[[L3]]
%p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0
store i32 1, i32* %p
; This store should have both the GEP and the FI folded into it.
; CHECK-NEXT: i32.store {{.*}}=, 24($[[SP]]), $pop
; CHECK-NEXT: i32.store {{.*}}=, 24(${{.+}}), $pop
%p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3
store i32 1, i32* %p2
call void @ext_func(i64* null);
; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer
; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32
; CHECK-NEXT: i32.add $push[[L7:.+]]=, $[[SP]], $pop[[L5]]
; CHECK-NEXT: i32.add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]]
ret void
}