mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AArch64] LSE Atomics reorg - part 1
Add memory synchronization semantics to LSE Atomics. The memory semantics feature will be added in a subsequent patch. In this patch, several corrections were added to the existing LSE Atomics implementation, based on the ARM Errata D11904 from 05/12/2017. Patch by: steleman Differential Revision: https://reviews.llvm.org/D35319 llvm-svn: 310167
This commit is contained in:
parent
cd13c2cca4
commit
cee6711d56
@ -1015,44 +1015,98 @@ def setle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
def setne : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(setcc node:$lhs, node:$rhs, SETNE)>;
|
||||
|
||||
def atomic_cmp_swap_8 :
|
||||
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
|
||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
def atomic_cmp_swap_16 :
|
||||
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
|
||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
def atomic_cmp_swap_32 :
|
||||
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
|
||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
def atomic_cmp_swap_64 :
|
||||
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
|
||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
multiclass binary_atomic_op_ord<SDNode atomic_op> {
|
||||
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic;
|
||||
}]>;
|
||||
def #NAME#_acquire : PatFrag<(ops node:$ptr, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire;
|
||||
}]>;
|
||||
def #NAME#_release : PatFrag<(ops node:$ptr, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Release;
|
||||
}]>;
|
||||
def #NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::AcquireRelease;
|
||||
}]>;
|
||||
def #NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent;
|
||||
}]>;
|
||||
}
|
||||
|
||||
multiclass ternary_atomic_op_ord<SDNode atomic_op> {
|
||||
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic;
|
||||
}]>;
|
||||
def #NAME#_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire;
|
||||
}]>;
|
||||
def #NAME#_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Release;
|
||||
}]>;
|
||||
def #NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::AcquireRelease;
|
||||
}]>;
|
||||
def #NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(!cast<SDNode>(#NAME) node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent;
|
||||
}]>;
|
||||
}
|
||||
|
||||
multiclass binary_atomic_op<SDNode atomic_op> {
|
||||
def _8 : PatFrag<(ops node:$ptr, node:$val),
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
def _16 : PatFrag<(ops node:$ptr, node:$val),
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
def _32 : PatFrag<(ops node:$ptr, node:$val),
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
def _64 : PatFrag<(ops node:$ptr, node:$val),
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
(atomic_op node:$ptr, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
|
||||
defm NAME#_8 : binary_atomic_op_ord<atomic_op>;
|
||||
defm NAME#_16 : binary_atomic_op_ord<atomic_op>;
|
||||
defm NAME#_32 : binary_atomic_op_ord<atomic_op>;
|
||||
defm NAME#_64 : binary_atomic_op_ord<atomic_op>;
|
||||
}
|
||||
|
||||
multiclass ternary_atomic_op<SDNode atomic_op> {
|
||||
def _8 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(atomic_op node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||
}]>;
|
||||
def _16 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(atomic_op node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
def _32 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(atomic_op node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
def _64 : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
|
||||
(atomic_op node:$ptr, node:$cmp, node:$val), [{
|
||||
return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
|
||||
}]>;
|
||||
|
||||
defm NAME#_8 : ternary_atomic_op_ord<atomic_op>;
|
||||
defm NAME#_16 : ternary_atomic_op_ord<atomic_op>;
|
||||
defm NAME#_32 : ternary_atomic_op_ord<atomic_op>;
|
||||
defm NAME#_64 : ternary_atomic_op_ord<atomic_op>;
|
||||
}
|
||||
|
||||
defm atomic_load_add : binary_atomic_op<atomic_load_add>;
|
||||
@ -1067,6 +1121,7 @@ defm atomic_load_max : binary_atomic_op<atomic_load_max>;
|
||||
defm atomic_load_umin : binary_atomic_op<atomic_load_umin>;
|
||||
defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
|
||||
defm atomic_store : binary_atomic_op<atomic_store>;
|
||||
defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>;
|
||||
|
||||
def atomic_load_8 :
|
||||
PatFrag<(ops node:$ptr),
|
||||
|
@ -55,6 +55,8 @@ public:
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool shouldSkip(const MachineInstr &MI, const MachineFunction &MF) const;
|
||||
};
|
||||
char AArch64DeadRegisterDefinitions::ID = 0;
|
||||
} // end anonymous namespace
|
||||
@ -69,6 +71,63 @@ static bool usesFrameIndex(const MachineInstr &MI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AArch64DeadRegisterDefinitions::shouldSkip(const MachineInstr &MI,
|
||||
const MachineFunction &MF) const {
|
||||
if (!MF.getSubtarget<AArch64Subtarget>().hasLSE())
|
||||
return false;
|
||||
|
||||
#define CASE_AARCH64_ATOMIC_(PREFIX) \
|
||||
case AArch64::PREFIX##X: \
|
||||
case AArch64::PREFIX##W: \
|
||||
case AArch64::PREFIX##H: \
|
||||
case AArch64::PREFIX##B
|
||||
|
||||
for (const MachineMemOperand *MMO : MI.memoperands()) {
|
||||
if (MMO->isAtomic()) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDADDA):
|
||||
CASE_AARCH64_ATOMIC_(LDADDAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDCLRA):
|
||||
CASE_AARCH64_ATOMIC_(LDCLRAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDEORA):
|
||||
CASE_AARCH64_ATOMIC_(LDEORAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDSETA):
|
||||
CASE_AARCH64_ATOMIC_(LDSETAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDSMAXA):
|
||||
CASE_AARCH64_ATOMIC_(LDSMAXAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDSMINA):
|
||||
CASE_AARCH64_ATOMIC_(LDSMINAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDUMAXA):
|
||||
CASE_AARCH64_ATOMIC_(LDUMAXAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(LDUMINA):
|
||||
CASE_AARCH64_ATOMIC_(LDUMINAL):
|
||||
|
||||
CASE_AARCH64_ATOMIC_(SWPA):
|
||||
CASE_AARCH64_ATOMIC_(SWPAL):
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef CASE_AARCH64_ATOMIC_
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
|
||||
MachineBasicBlock &MBB) {
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
@ -86,55 +145,12 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
|
||||
DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n");
|
||||
continue;
|
||||
}
|
||||
if (MF.getSubtarget<AArch64Subtarget>().hasLSE()) {
|
||||
// XZ/WZ for LSE can only be used when acquire semantics are not used,
|
||||
// LDOPAL WZ is an invalid opcode.
|
||||
switch (MI.getOpcode()) {
|
||||
case AArch64::CASALB:
|
||||
case AArch64::CASALH:
|
||||
case AArch64::CASALW:
|
||||
case AArch64::CASALX:
|
||||
case AArch64::SWPALB:
|
||||
case AArch64::SWPALH:
|
||||
case AArch64::SWPALW:
|
||||
case AArch64::SWPALX:
|
||||
case AArch64::LDADDALB:
|
||||
case AArch64::LDADDALH:
|
||||
case AArch64::LDADDALW:
|
||||
case AArch64::LDADDALX:
|
||||
case AArch64::LDCLRALB:
|
||||
case AArch64::LDCLRALH:
|
||||
case AArch64::LDCLRALW:
|
||||
case AArch64::LDCLRALX:
|
||||
case AArch64::LDEORALB:
|
||||
case AArch64::LDEORALH:
|
||||
case AArch64::LDEORALW:
|
||||
case AArch64::LDEORALX:
|
||||
case AArch64::LDSETALB:
|
||||
case AArch64::LDSETALH:
|
||||
case AArch64::LDSETALW:
|
||||
case AArch64::LDSETALX:
|
||||
case AArch64::LDSMINALB:
|
||||
case AArch64::LDSMINALH:
|
||||
case AArch64::LDSMINALW:
|
||||
case AArch64::LDSMINALX:
|
||||
case AArch64::LDSMAXALB:
|
||||
case AArch64::LDSMAXALH:
|
||||
case AArch64::LDSMAXALW:
|
||||
case AArch64::LDSMAXALX:
|
||||
case AArch64::LDUMINALB:
|
||||
case AArch64::LDUMINALH:
|
||||
case AArch64::LDUMINALW:
|
||||
case AArch64::LDUMINALX:
|
||||
case AArch64::LDUMAXALB:
|
||||
case AArch64::LDUMAXALH:
|
||||
case AArch64::LDUMAXALW:
|
||||
case AArch64::LDUMAXALX:
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (shouldSkip(MI, MF)) {
|
||||
DEBUG(dbgs() << " Ignoring, Atomic instruction with acquire semantics using WZR/XZR\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
const MCInstrDesc &Desc = MI.getDesc();
|
||||
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
|
||||
MachineOperand &MO = MI.getOperand(I);
|
||||
|
@ -407,57 +407,17 @@ def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
|
||||
Sched<[WriteAtomic]>;
|
||||
|
||||
// v8.1 Atomic instructions:
|
||||
def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
let Predicates = [HasLSE] in {
|
||||
defm : LDOPregister_patterns<"LDADD", "atomic_load_add">;
|
||||
defm : LDOPregister_patterns<"LDSET", "atomic_load_or">;
|
||||
defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">;
|
||||
defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">;
|
||||
defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">;
|
||||
defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">;
|
||||
defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">;
|
||||
defm : LDOPregister_patterns<"SWP", "atomic_swap">;
|
||||
defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
|
||||
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
|
||||
defm : CASregister_patterns<"CAS", "atomic_cmp_swap">;
|
||||
}
|
||||
|
||||
def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALB GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALH GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALW GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALX GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALB GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALH GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALW GPR32:$Rs, GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALX GPR64:$Rs, GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALB (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALH (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALW (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALX (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
|
||||
|
||||
def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALB (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALH (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALW (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
|
||||
def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALX (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
|
||||
|
@ -9490,6 +9490,86 @@ multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
|
||||
def X : BaseLDOPregister<op, order, "", GPR64>;
|
||||
}
|
||||
|
||||
// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more
|
||||
// complex DAG for DstRHS.
|
||||
let Predicates = [HasLSE] in
|
||||
multiclass LDOPregister_patterns_ord_dag<string inst, string suffix, string op,
|
||||
string size, dag SrcRHS, dag DstRHS> {
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS),
|
||||
(!cast<Instruction>(inst # suffix) DstRHS, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS),
|
||||
(!cast<Instruction>(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS),
|
||||
(!cast<Instruction>(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS),
|
||||
(!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS),
|
||||
(!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
|
||||
}
|
||||
|
||||
multiclass LDOPregister_patterns_ord<string inst, string suffix, string op,
|
||||
string size, dag RHS> {
|
||||
defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, RHS, RHS>;
|
||||
}
|
||||
|
||||
multiclass LDOPregister_patterns_ord_mod<string inst, string suffix, string op,
|
||||
string size, dag LHS, dag RHS> {
|
||||
defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, LHS, RHS>;
|
||||
}
|
||||
|
||||
multiclass LDOPregister_patterns<string inst, string op> {
|
||||
defm : LDOPregister_patterns_ord<inst, "X", op, "64", (i64 GPR64:$Rm)>;
|
||||
defm : LDOPregister_patterns_ord<inst, "W", op, "32", (i32 GPR32:$Rm)>;
|
||||
defm : LDOPregister_patterns_ord<inst, "H", op, "16", (i32 GPR32:$Rm)>;
|
||||
defm : LDOPregister_patterns_ord<inst, "B", op, "8", (i32 GPR32:$Rm)>;
|
||||
}
|
||||
|
||||
multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
|
||||
defm : LDOPregister_patterns_ord_mod<inst, "X", op, "64",
|
||||
(i64 GPR64:$Rm),
|
||||
(i64 (!cast<Instruction>(mod#Xrr) XZR, GPR64:$Rm))>;
|
||||
defm : LDOPregister_patterns_ord_mod<inst, "W", op, "32",
|
||||
(i32 GPR32:$Rm),
|
||||
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
|
||||
defm : LDOPregister_patterns_ord_mod<inst, "H", op, "16",
|
||||
(i32 GPR32:$Rm),
|
||||
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
|
||||
defm : LDOPregister_patterns_ord_mod<inst, "B", op, "8",
|
||||
(i32 GPR32:$Rm),
|
||||
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasLSE] in
|
||||
multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
|
||||
string size, dag OLD, dag NEW> {
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW),
|
||||
(!cast<Instruction>(inst # suffix) OLD, NEW, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW),
|
||||
(!cast<Instruction>(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW),
|
||||
(!cast<Instruction>(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW),
|
||||
(!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
|
||||
def : Pat<(!cast<SDNode>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW),
|
||||
(!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
|
||||
}
|
||||
|
||||
multiclass CASregister_patterns_ord<string inst, string suffix, string op,
|
||||
string size, dag OLD, dag NEW> {
|
||||
defm : CASregister_patterns_ord_dag<inst, suffix, op, size, OLD, NEW>;
|
||||
}
|
||||
|
||||
multiclass CASregister_patterns<string inst, string op> {
|
||||
defm : CASregister_patterns_ord<inst, "X", op, "64",
|
||||
(i64 GPR64:$Rold), (i64 GPR64:$Rnew)>;
|
||||
defm : CASregister_patterns_ord<inst, "W", op, "32",
|
||||
(i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
|
||||
defm : CASregister_patterns_ord<inst, "H", op, "16",
|
||||
(i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
|
||||
defm : CASregister_patterns_ord<inst, "B", op, "8",
|
||||
(i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasLSE] in
|
||||
class BaseSTOPregister<string asm, RegisterClass OP, Register Reg,
|
||||
Instruction inst> :
|
||||
|
@ -315,6 +315,36 @@ def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
|
||||
// 8 cycles on LS0 or LS1 and I0, I1, or I2.
|
||||
def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 4;
|
||||
}
|
||||
|
||||
// 12 cycles on LS0 or LS1 and I0, I1, or I2.
|
||||
def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
|
||||
let Latency = 12;
|
||||
let NumMicroOps = 6;
|
||||
}
|
||||
|
||||
// 16 cycles on LS0 or LS1 and I0, I1, or I2.
|
||||
def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
|
||||
let Latency = 16;
|
||||
let NumMicroOps = 8;
|
||||
}
|
||||
|
||||
// 24 cycles on LS0 or LS1 and I0, I1, or I2.
|
||||
def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
|
||||
let Latency = 24;
|
||||
let NumMicroOps = 12;
|
||||
}
|
||||
|
||||
// 32 cycles on LS0 or LS1 and I0, I1, or I2.
|
||||
def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
|
||||
let Latency = 32;
|
||||
let NumMicroOps = 16;
|
||||
}
|
||||
|
||||
// Define commonly used read types.
|
||||
|
||||
// No forwarding is provided for these types.
|
||||
@ -1741,5 +1771,108 @@ def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
|
||||
def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
|
||||
(instregex "^ST4i(8|16|32|64)_POST$")>;
|
||||
|
||||
// V8.1a Atomics (LSE)
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs CASB, CASH, CASW, CASX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs CASAB, CASAH, CASAW, CASAX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs CASLB, CASLH, CASLW, CASLX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
|
||||
(instrs CASALB, CASALH, CASALW, CASALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
|
||||
(instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
|
||||
(instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
|
||||
(instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
|
||||
(instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
|
||||
LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
|
||||
LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
|
||||
LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
|
||||
LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
|
||||
LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
|
||||
LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
|
||||
LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
|
||||
LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
|
||||
LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
|
||||
LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
|
||||
LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
|
||||
LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs SWPB, SWPH, SWPW, SWPX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
|
||||
(instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
|
||||
(instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
|
||||
|
||||
def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
|
||||
(instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
|
||||
|
||||
} // SchedModel = ThunderX2T99Model
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user