1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[WebAssembly] Use named operands to identify loads and stores

Summary:
Uses the named operands tablegen feature to look up the indices of
offset, address, and p2align operands for all load and store
instructions. This replaces brittle, incorrect logic for identifying
loads and store when eliminating frame indices, which previously
crashed on bulk-memory ops. It also cleans up the SetP2Alignment pass.

Reviewers: aheejin, dschuff

Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59007

llvm-svn: 355770
This commit is contained in:
Thomas Lively 2019-03-09 04:31:37 +00:00
parent 7ca29a3649
commit f98161801a
9 changed files with 107 additions and 135 deletions

View File

@ -331,14 +331,6 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
}
}
/// The operand number of the load or store address in load/store instructions.
static const unsigned LoadAddressOperandNo = 3;
static const unsigned StoreAddressOperandNo = 2;
/// The operand number of the load or store p2align in load/store instructions.
static const unsigned LoadP2AlignOperandNo = 1;
static const unsigned StoreP2AlignOperandNo = 0;
/// This is used to indicate block signatures.
enum class ExprType : unsigned {
Void = 0x40,

View File

@ -11,6 +11,7 @@
///
//===----------------------------------------------------------------------===//
let UseNamedOperandTable = 1 in
multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "",
string asmstr_s = "", bits<32> atomic_op = -1> {
@ -810,9 +811,9 @@ multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name,
defm "" :
ATOMIC_I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
rc:$new),
rc:$new_),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
!strconcat(name, "\t${off}${p2align}"), atomic_op>;
}

View File

@ -27,6 +27,10 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "WebAssemblyGenInstrInfo.inc"
// defines WebAssembly::getNamedOperandIdx
#define GET_INSTRINFO_NAMED_OPS
#include "WebAssemblyGenInstrInfo.inc"
WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
: WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
WebAssembly::ADJCALLSTACKUP,

View File

@ -21,8 +21,17 @@
#define GET_INSTRINFO_HEADER
#include "WebAssemblyGenInstrInfo.inc"
#define GET_INSTRINFO_OPERAND_ENUM
#include "WebAssemblyGenInstrInfo.inc"
namespace llvm {
namespace WebAssembly {
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
}
class WebAssemblySubtarget;
class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {

View File

@ -52,7 +52,7 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
// Defines atomic and non-atomic loads, regular and extending.
multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
let mayLoad = 1 in
let mayLoad = 1, UseNamedOperandTable = 1 in
defm "": I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off),
@ -294,7 +294,7 @@ def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
// Defines atomic and non-atomic stores, regular and truncating
multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
let mayStore = 1 in
let mayStore = 1, UseNamedOperandTable = 1 in
defm "" : I<(outs),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs),

View File

@ -41,12 +41,12 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
// Load: v128.load
multiclass SIMDLoad<ValueType vec_t> {
let mayLoad = 1 in
let mayLoad = 1, UseNamedOperandTable = 1 in
defm LOAD_#vec_t :
SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$align, offset32_op:$off), [],
"v128.load\t$dst, ${off}(${addr})$align",
"v128.load\t$off$align", 0>;
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"v128.load\t$dst, ${off}(${addr})$p2align",
"v128.load\t$off$p2align", 0>;
}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
@ -65,12 +65,12 @@ def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
// Store: v128.store
multiclass SIMDStore<ValueType vec_t> {
let mayStore = 1 in
let mayStore = 1, UseNamedOperandTable = 1 in
defm STORE_#vec_t :
SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
(outs), (ins P2Align:$align, offset32_op:$off), [],
"v128.store\t${off}(${addr})$align, $vec",
"v128.store\t$off$align", 1>;
SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"v128.store\t${off}(${addr})$p2align, $vec",
"v128.store\t$off$p2align", 1>;
}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {

View File

@ -70,13 +70,16 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
// If this is the address operand of a load or store, make it relative to SP
// and fold the frame offset directly in.
if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
(MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx(
MI.getOpcode(), WebAssembly::OpName::addr);
if (AddrOperandNum == FIOperandNum) {
unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx(
MI.getOpcode(), WebAssembly::OpName::off);
assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0);
int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset;
if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
MI.getOperand(FIOperandNum - 1).setImm(Offset);
MI.getOperand(OffsetOperandNum).setImm(Offset);
MI.getOperand(FIOperandNum)
.ChangeToRegister(FrameRegister, /*IsDef=*/false);
return;

View File

@ -13,6 +13,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyInstrInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@ -83,114 +84,11 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
for (auto &MBB : MF) {
for (auto &MI : MBB) {
switch (MI.getOpcode()) {
case WebAssembly::LOAD_I32:
case WebAssembly::LOAD_I64:
case WebAssembly::LOAD_F32:
case WebAssembly::LOAD_F64:
case WebAssembly::LOAD_v16i8:
case WebAssembly::LOAD_v8i16:
case WebAssembly::LOAD_v4i32:
case WebAssembly::LOAD_v2i64:
case WebAssembly::LOAD_v4f32:
case WebAssembly::LOAD_v2f64:
case WebAssembly::LOAD8_S_I32:
case WebAssembly::LOAD8_U_I32:
case WebAssembly::LOAD16_S_I32:
case WebAssembly::LOAD16_U_I32:
case WebAssembly::LOAD8_S_I64:
case WebAssembly::LOAD8_U_I64:
case WebAssembly::LOAD16_S_I64:
case WebAssembly::LOAD16_U_I64:
case WebAssembly::LOAD32_S_I64:
case WebAssembly::LOAD32_U_I64:
case WebAssembly::ATOMIC_LOAD_I32:
case WebAssembly::ATOMIC_LOAD8_U_I32:
case WebAssembly::ATOMIC_LOAD16_U_I32:
case WebAssembly::ATOMIC_LOAD_I64:
case WebAssembly::ATOMIC_LOAD8_U_I64:
case WebAssembly::ATOMIC_LOAD16_U_I64:
case WebAssembly::ATOMIC_LOAD32_U_I64:
case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
case WebAssembly::ATOMIC_RMW8_U_AND_I32:
case WebAssembly::ATOMIC_RMW8_U_AND_I64:
case WebAssembly::ATOMIC_RMW8_U_OR_I32:
case WebAssembly::ATOMIC_RMW8_U_OR_I64:
case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
case WebAssembly::ATOMIC_RMW16_U_AND_I32:
case WebAssembly::ATOMIC_RMW16_U_AND_I64:
case WebAssembly::ATOMIC_RMW16_U_OR_I32:
case WebAssembly::ATOMIC_RMW16_U_OR_I64:
case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW_ADD_I32:
case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
case WebAssembly::ATOMIC_RMW_SUB_I32:
case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
case WebAssembly::ATOMIC_RMW_AND_I32:
case WebAssembly::ATOMIC_RMW32_U_AND_I64:
case WebAssembly::ATOMIC_RMW_OR_I32:
case WebAssembly::ATOMIC_RMW32_U_OR_I64:
case WebAssembly::ATOMIC_RMW_XOR_I32:
case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I32:
case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
case WebAssembly::ATOMIC_RMW_ADD_I64:
case WebAssembly::ATOMIC_RMW_SUB_I64:
case WebAssembly::ATOMIC_RMW_AND_I64:
case WebAssembly::ATOMIC_RMW_OR_I64:
case WebAssembly::ATOMIC_RMW_XOR_I64:
case WebAssembly::ATOMIC_RMW_XCHG_I64:
case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
case WebAssembly::ATOMIC_NOTIFY:
case WebAssembly::ATOMIC_WAIT_I32:
case WebAssembly::ATOMIC_WAIT_I64:
rewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
break;
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64:
case WebAssembly::STORE_F32:
case WebAssembly::STORE_F64:
case WebAssembly::STORE_v16i8:
case WebAssembly::STORE_v8i16:
case WebAssembly::STORE_v4i32:
case WebAssembly::STORE_v2i64:
case WebAssembly::STORE_v4f32:
case WebAssembly::STORE_v2f64:
case WebAssembly::STORE8_I32:
case WebAssembly::STORE16_I32:
case WebAssembly::STORE8_I64:
case WebAssembly::STORE16_I64:
case WebAssembly::STORE32_I64:
case WebAssembly::ATOMIC_STORE_I32:
case WebAssembly::ATOMIC_STORE8_I32:
case WebAssembly::ATOMIC_STORE16_I32:
case WebAssembly::ATOMIC_STORE_I64:
case WebAssembly::ATOMIC_STORE8_I64:
case WebAssembly::ATOMIC_STORE16_I64:
case WebAssembly::ATOMIC_STORE32_I64:
rewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
break;
default:
break;
int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx(
MI.getOpcode(), WebAssembly::OpName::p2align);
if (P2AlignOpNum != -1) {
rewriteP2Align(MI, P2AlignOpNum);
Changed = true;
}
}
}

View File

@ -140,3 +140,68 @@ define void @memset_1024(i8* %dest, i8 %val) {
call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0)
ret void
}
; The following tests check that frame index elimination works for
; bulk memory instructions. The stack pointer is bumped by 16 instead
; of 10 because the stack pointer in WebAssembly is currently always
; 16-byte aligned, even in leaf functions, although it is not written
; back to the global in this case.
; TODO: Change TransientStackAlignment to 1 to avoid this extra
; arithmetic. This will require forcing the use of StackAlignment in
; PrologEpilogEmitter.cpp when
; WebAssemblyFrameLowering::needsSPWriteback would be true.
; CHECK-LABEL: memcpy_alloca_src:
; NO-BULK-MEM-NOT: memory.copy
; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memcpy_alloca_src(i8* %dst) {
%a = alloca [10 x i8]
%p = bitcast [10 x i8]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %p, i32 10, i1 false)
ret void
}
; CHECK-LABEL: memcpy_alloca_dst:
; NO-BULK-MEM-NOT: memory.copy
; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memcpy_alloca_dst(i8* %src) {
%a = alloca [10 x i8]
%p = bitcast [10 x i8]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %src, i32 10, i1 false)
ret void
}
; CHECK-LABEL: memset_alloca:
; NO-BULK-MEM-NOT: memory.fill
; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 16
; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 6
; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 10
; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
; BULK-MEM-NEXT: return
define void @memset_alloca(i8 %val) {
%a = alloca [10 x i8]
%p = bitcast [10 x i8]* %a to i8*
call void @llvm.memset.p0i8.i32(i8* %p, i8 %val, i32 10, i1 false)
ret void
}