1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[Mips] Add support for min/max/umin/umax atomics

In order to properly implement these atomic we need one register more than other
binary atomics. It is used for storing result from comparing values in addition
to the one that is used for actual result of operation.

https://reviews.llvm.org/D71028
This commit is contained in:
Mirko Brkusanin 2019-12-12 11:19:41 +01:00
parent 07073d8479
commit 14b2d0ed1f
9 changed files with 5218 additions and 29 deletions

View File

@ -83,6 +83,10 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
def ATOMIC_SWAP_I64 : Atomic2Ops<atomic_swap_64, GPR64>;
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
def ATOMIC_LOAD_MIN_I64 : Atomic2Ops<atomic_load_min_64, GPR64>;
def ATOMIC_LOAD_MAX_I64 : Atomic2Ops<atomic_load_max_64, GPR64>;
def ATOMIC_LOAD_UMIN_I64 : Atomic2Ops<atomic_load_umin_64, GPR64>;
def ATOMIC_LOAD_UMAX_I64 : Atomic2Ops<atomic_load_umax_64, GPR64>;
}
def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
@ -96,6 +100,11 @@ def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA<GPR64>;
def ATOMIC_LOAD_MIN_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
def ATOMIC_LOAD_MAX_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
def ATOMIC_LOAD_UMIN_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
def ATOMIC_LOAD_UMAX_I64_POSTRA : Atomic2OpsPostRA<GPR64>;
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
def LOAD_ACC128 : Load<"", ACC128>;

View File

@ -308,7 +308,7 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
DebugLoc DL = I->getDebugLoc();
unsigned LL, SC;
unsigned LL, SC, SLT, SLTu, OR, MOVN, MOVZ, SELNEZ, SELEQZ;
unsigned BEQ = Mips::BEQ;
unsigned SEOp = Mips::SEH;
@ -316,15 +316,32 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
SLT = Mips::SLT_MM;
SLTu = Mips::SLTu_MM;
OR = STI->hasMips32r6() ? Mips::OR_MMR6 : Mips::OR_MM;
MOVN = Mips::MOVN_I_MM;
MOVZ = Mips::MOVZ_I_MM;
SELNEZ = STI->hasMips32r6() ? Mips::SELNEZ_MMR6 : Mips::SELNEZ;
SELEQZ = STI->hasMips32r6() ? Mips::SELEQZ_MMR6 : Mips::SELEQZ;
} else {
LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
: (ArePtrs64bit ? Mips::LL64 : Mips::LL);
SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
: (ArePtrs64bit ? Mips::SC64 : Mips::SC);
SLT = Mips::SLT;
SLTu = Mips::SLTu;
OR = Mips::OR;
MOVN = Mips::MOVN_I_I;
MOVZ = Mips::MOVZ_I_I;
SELNEZ = Mips::SELNEZ;
SELEQZ = Mips::SELEQZ;
}
bool IsSwap = false;
bool IsNand = false;
bool IsMin = false;
bool IsMax = false;
bool IsUnsigned = false;
unsigned Opcode = 0;
switch (I->getOpcode()) {
@ -370,6 +387,22 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
case Mips::ATOMIC_LOAD_XOR_I16_POSTRA:
Opcode = Mips::XOR;
break;
case Mips::ATOMIC_LOAD_UMIN_I8_POSTRA:
case Mips::ATOMIC_LOAD_UMIN_I16_POSTRA:
IsUnsigned = true;
LLVM_FALLTHROUGH;
case Mips::ATOMIC_LOAD_MIN_I8_POSTRA:
case Mips::ATOMIC_LOAD_MIN_I16_POSTRA:
IsMin = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I8_POSTRA:
case Mips::ATOMIC_LOAD_UMAX_I16_POSTRA:
IsUnsigned = true;
LLVM_FALLTHROUGH;
case Mips::ATOMIC_LOAD_MAX_I8_POSTRA:
case Mips::ATOMIC_LOAD_MAX_I16_POSTRA:
IsMax = true;
break;
default:
llvm_unreachable("Unknown subword atomic pseudo for expansion!");
}
@ -415,6 +448,68 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword(
BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
.addReg(BinOpRes)
.addReg(Mask);
} else if (IsMin || IsMax) {
assert(I->getNumOperands() == 10 &&
"Atomics min|max|umin|umax use an additional register");
Register Scratch4 = I->getOperand(9).getReg();
unsigned SLTScratch4 = IsUnsigned ? SLTu : SLT;
unsigned SELIncr = IsMax ? SELNEZ : SELEQZ;
unsigned SELOldVal = IsMax ? SELEQZ : SELNEZ;
unsigned MOVIncr = IsMax ? MOVN : MOVZ;
// For little endian we need to clear uninterested bits.
if (STI->isLittle()) {
// and OldVal, OldVal, Mask
// and Incr, Incr, Mask
BuildMI(loopMBB, DL, TII->get(Mips::AND), OldVal)
.addReg(OldVal)
.addReg(Mask);
BuildMI(loopMBB, DL, TII->get(Mips::AND), Incr).addReg(Incr).addReg(Mask);
}
// unsigned: sltu Scratch4, oldVal, Incr
// signed: slt Scratch4, oldVal, Incr
BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4)
.addReg(OldVal)
.addReg(Incr);
if (STI->hasMips64r6() || STI->hasMips32r6()) {
// max: seleqz BinOpRes, OldVal, Scratch4
// selnez Scratch4, Incr, Scratch4
// or BinOpRes, BinOpRes, Scratch4
// min: selnqz BinOpRes, OldVal, Scratch4
// seleqz Scratch4, Incr, Scratch4
// or BinOpRes, BinOpRes, Scratch4
BuildMI(loopMBB, DL, TII->get(SELOldVal), BinOpRes)
.addReg(OldVal)
.addReg(Scratch4);
BuildMI(loopMBB, DL, TII->get(SELIncr), Scratch4)
.addReg(Incr)
.addReg(Scratch4);
BuildMI(loopMBB, DL, TII->get(OR), BinOpRes)
.addReg(BinOpRes)
.addReg(Scratch4);
} else {
// max: move BinOpRes, OldVal
// movn BinOpRes, Incr, Scratch4, BinOpRes
// min: move BinOpRes, OldVal
// movz BinOpRes, Incr, Scratch4, BinOpRes
BuildMI(loopMBB, DL, TII->get(OR), BinOpRes)
.addReg(OldVal)
.addReg(Mips::ZERO);
BuildMI(loopMBB, DL, TII->get(MOVIncr), BinOpRes)
.addReg(Incr)
.addReg(Scratch4)
.addReg(BinOpRes);
}
// and BinOpRes, BinOpRes, Mask
BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes)
.addReg(BinOpRes)
.addReg(Mask);
} else if (!IsSwap) {
// <binop> binopres, oldval, incr2
// and newval, binopres, mask
@ -488,13 +583,20 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
DebugLoc DL = I->getDebugLoc();
unsigned LL, SC, ZERO, BEQ;
unsigned LL, SC, ZERO, BEQ, SLT, SLTu, OR, MOVN, MOVZ, SELNEZ, SELEQZ;
if (Size == 4) {
if (STI->inMicroMipsMode()) {
LL = STI->hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM;
SC = STI->hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM;
BEQ = STI->hasMips32r6() ? Mips::BEQC_MMR6 : Mips::BEQ_MM;
SLT = Mips::SLT_MM;
SLTu = Mips::SLTu_MM;
OR = STI->hasMips32r6() ? Mips::OR_MMR6 : Mips::OR_MM;
MOVN = Mips::MOVN_I_MM;
MOVZ = Mips::MOVZ_I_MM;
SELNEZ = STI->hasMips32r6() ? Mips::SELNEZ_MMR6 : Mips::SELNEZ;
SELEQZ = STI->hasMips32r6() ? Mips::SELEQZ_MMR6 : Mips::SELEQZ;
} else {
LL = STI->hasMips32r6()
? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
@ -503,6 +605,13 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
: (ArePtrs64bit ? Mips::SC64 : Mips::SC);
BEQ = Mips::BEQ;
SLT = Mips::SLT;
SLTu = Mips::SLTu;
OR = Mips::OR;
MOVN = Mips::MOVN_I_I;
MOVZ = Mips::MOVZ_I_I;
SELNEZ = Mips::SELNEZ;
SELEQZ = Mips::SELEQZ;
}
ZERO = Mips::ZERO;
@ -511,6 +620,13 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
ZERO = Mips::ZERO_64;
BEQ = Mips::BEQ64;
SLT = Mips::SLT64;
SLTu = Mips::SLTu64;
OR = Mips::OR64;
MOVN = Mips::MOVN_I64_I64;
MOVZ = Mips::MOVZ_I64_I64;
SELNEZ = Mips::SELNEZ64;
SELEQZ = Mips::SELEQZ64;
}
Register OldVal = I->getOperand(0).getReg();
@ -519,10 +635,15 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
Register Scratch = I->getOperand(3).getReg();
unsigned Opcode = 0;
unsigned OR = 0;
unsigned AND = 0;
unsigned NOR = 0;
bool IsOr = false;
bool IsNand = false;
bool IsMin = false;
bool IsMax = false;
bool IsUnsigned = false;
switch (I->getOpcode()) {
case Mips::ATOMIC_LOAD_ADD_I32_POSTRA:
Opcode = Mips::ADDu;
@ -545,7 +666,7 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
NOR = Mips::NOR;
break;
case Mips::ATOMIC_SWAP_I32_POSTRA:
OR = Mips::OR;
IsOr = true;
break;
case Mips::ATOMIC_LOAD_ADD_I64_POSTRA:
Opcode = Mips::DADDu;
@ -568,7 +689,23 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
NOR = Mips::NOR64;
break;
case Mips::ATOMIC_SWAP_I64_POSTRA:
OR = Mips::OR64;
IsOr = true;
break;
case Mips::ATOMIC_LOAD_UMIN_I32_POSTRA:
case Mips::ATOMIC_LOAD_UMIN_I64_POSTRA:
IsUnsigned = true;
LLVM_FALLTHROUGH;
case Mips::ATOMIC_LOAD_MIN_I32_POSTRA:
case Mips::ATOMIC_LOAD_MIN_I64_POSTRA:
IsMin = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I32_POSTRA:
case Mips::ATOMIC_LOAD_UMAX_I64_POSTRA:
IsUnsigned = true;
LLVM_FALLTHROUGH;
case Mips::ATOMIC_LOAD_MAX_I32_POSTRA:
case Mips::ATOMIC_LOAD_MAX_I64_POSTRA:
IsMax = true;
break;
default:
llvm_unreachable("Unknown pseudo atomic!");
@ -592,7 +729,59 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!");
assert((OldVal != Incr) && "Clobbered the wrong reg!");
if (Opcode) {
if (IsMin || IsMax) {
assert(I->getNumOperands() == 5 &&
"Atomics min|max|umin|umax use an additional register");
Register Scratch2 = I->getOperand(4).getReg();
// On Mips64 result of slt is GPR32.
Register Scratch2_32 =
(Size == 8) ? STI->getRegisterInfo()->getSubReg(Scratch2, Mips::sub_32)
: Scratch2;
unsigned SLTScratch2 = IsUnsigned ? SLTu : SLT;
unsigned SELIncr = IsMax ? SELNEZ : SELEQZ;
unsigned SELOldVal = IsMax ? SELEQZ : SELNEZ;
unsigned MOVIncr = IsMax ? MOVN : MOVZ;
// unsigned: sltu Scratch2, oldVal, Incr
// signed: slt Scratch2, oldVal, Incr
BuildMI(loopMBB, DL, TII->get(SLTScratch2), Scratch2_32)
.addReg(OldVal)
.addReg(Incr);
if (STI->hasMips64r6() || STI->hasMips32r6()) {
// max: seleqz Scratch, OldVal, Scratch2
// selnez Scratch2, Incr, Scratch2
// or Scratch, Scratch, Scratch2
// min: selnez Scratch, OldVal, Scratch2
// seleqz Scratch2, Incr, Scratch2
// or Scratch, Scratch, Scratch2
BuildMI(loopMBB, DL, TII->get(SELOldVal), Scratch)
.addReg(OldVal)
.addReg(Scratch2);
BuildMI(loopMBB, DL, TII->get(SELIncr), Scratch2)
.addReg(Incr)
.addReg(Scratch2);
BuildMI(loopMBB, DL, TII->get(OR), Scratch)
.addReg(Scratch)
.addReg(Scratch2);
} else {
// max: move Scratch, OldVal
// movn Scratch, Incr, Scratch2, Scratch
// min: move Scratch, OldVal
// movz Scratch, Incr, Scratch2, Scratch
BuildMI(loopMBB, DL, TII->get(OR), Scratch)
.addReg(OldVal)
.addReg(ZERO);
BuildMI(loopMBB, DL, TII->get(MOVIncr), Scratch)
.addReg(Incr)
.addReg(Scratch2)
.addReg(Scratch);
}
} else if (Opcode) {
BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr);
} else if (IsNand) {
assert(AND && NOR &&
@ -600,7 +789,7 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr);
BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch);
} else {
assert(OR && "Unknown instruction for atomic pseudo expansion!");
assert(IsOr && OR && "Unknown instruction for atomic pseudo expansion!");
BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO);
}
@ -650,6 +839,14 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
case Mips::ATOMIC_LOAD_OR_I16_POSTRA:
case Mips::ATOMIC_LOAD_XOR_I8_POSTRA:
case Mips::ATOMIC_LOAD_XOR_I16_POSTRA:
case Mips::ATOMIC_LOAD_MIN_I8_POSTRA:
case Mips::ATOMIC_LOAD_MIN_I16_POSTRA:
case Mips::ATOMIC_LOAD_MAX_I8_POSTRA:
case Mips::ATOMIC_LOAD_MAX_I16_POSTRA:
case Mips::ATOMIC_LOAD_UMIN_I8_POSTRA:
case Mips::ATOMIC_LOAD_UMIN_I16_POSTRA:
case Mips::ATOMIC_LOAD_UMAX_I8_POSTRA:
case Mips::ATOMIC_LOAD_UMAX_I16_POSTRA:
return expandAtomicBinOpSubword(MBB, MBBI, NMBB);
case Mips::ATOMIC_LOAD_ADD_I32_POSTRA:
case Mips::ATOMIC_LOAD_SUB_I32_POSTRA:
@ -658,6 +855,10 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
case Mips::ATOMIC_LOAD_XOR_I32_POSTRA:
case Mips::ATOMIC_LOAD_NAND_I32_POSTRA:
case Mips::ATOMIC_SWAP_I32_POSTRA:
case Mips::ATOMIC_LOAD_MIN_I32_POSTRA:
case Mips::ATOMIC_LOAD_MAX_I32_POSTRA:
case Mips::ATOMIC_LOAD_UMIN_I32_POSTRA:
case Mips::ATOMIC_LOAD_UMAX_I32_POSTRA:
return expandAtomicBinOp(MBB, MBBI, NMBB, 4);
case Mips::ATOMIC_LOAD_ADD_I64_POSTRA:
case Mips::ATOMIC_LOAD_SUB_I64_POSTRA:
@ -666,6 +867,10 @@ bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
case Mips::ATOMIC_LOAD_XOR_I64_POSTRA:
case Mips::ATOMIC_LOAD_NAND_I64_POSTRA:
case Mips::ATOMIC_SWAP_I64_POSTRA:
case Mips::ATOMIC_LOAD_MIN_I64_POSTRA:
case Mips::ATOMIC_LOAD_MAX_I64_POSTRA:
case Mips::ATOMIC_LOAD_UMIN_I64_POSTRA:
case Mips::ATOMIC_LOAD_UMAX_I64_POSTRA:
return expandAtomicBinOp(MBB, MBBI, NMBB, 8);
default:
return Modified;

View File

@ -1366,6 +1366,43 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitAtomicCmpSwap(MI, BB);
case Mips::ATOMIC_CMP_SWAP_I64:
return emitAtomicCmpSwap(MI, BB);
case Mips::ATOMIC_LOAD_MIN_I8:
return emitAtomicBinaryPartword(MI, BB, 1);
case Mips::ATOMIC_LOAD_MIN_I16:
return emitAtomicBinaryPartword(MI, BB, 2);
case Mips::ATOMIC_LOAD_MIN_I32:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_MIN_I64:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_MAX_I8:
return emitAtomicBinaryPartword(MI, BB, 1);
case Mips::ATOMIC_LOAD_MAX_I16:
return emitAtomicBinaryPartword(MI, BB, 2);
case Mips::ATOMIC_LOAD_MAX_I32:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_MAX_I64:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_UMIN_I8:
return emitAtomicBinaryPartword(MI, BB, 1);
case Mips::ATOMIC_LOAD_UMIN_I16:
return emitAtomicBinaryPartword(MI, BB, 2);
case Mips::ATOMIC_LOAD_UMIN_I32:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_UMIN_I64:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_UMAX_I8:
return emitAtomicBinaryPartword(MI, BB, 1);
case Mips::ATOMIC_LOAD_UMAX_I16:
return emitAtomicBinaryPartword(MI, BB, 2);
case Mips::ATOMIC_LOAD_UMAX_I32:
return emitAtomicBinary(MI, BB);
case Mips::ATOMIC_LOAD_UMAX_I64:
return emitAtomicBinary(MI, BB);
case Mips::PseudoSDIV:
case Mips::PseudoUDIV:
case Mips::DIV:
@ -1427,6 +1464,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
unsigned AtomicOp;
bool NeedsAdditionalReg = false;
switch (MI.getOpcode()) {
case Mips::ATOMIC_LOAD_ADD_I32:
AtomicOp = Mips::ATOMIC_LOAD_ADD_I32_POSTRA;
@ -1470,6 +1508,38 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
case Mips::ATOMIC_SWAP_I64:
AtomicOp = Mips::ATOMIC_SWAP_I64_POSTRA;
break;
case Mips::ATOMIC_LOAD_MIN_I32:
AtomicOp = Mips::ATOMIC_LOAD_MIN_I32_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_MAX_I32:
AtomicOp = Mips::ATOMIC_LOAD_MAX_I32_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMIN_I32:
AtomicOp = Mips::ATOMIC_LOAD_UMIN_I32_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I32:
AtomicOp = Mips::ATOMIC_LOAD_UMAX_I32_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_MIN_I64:
AtomicOp = Mips::ATOMIC_LOAD_MIN_I64_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_MAX_I64:
AtomicOp = Mips::ATOMIC_LOAD_MAX_I64_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMIN_I64:
AtomicOp = Mips::ATOMIC_LOAD_UMIN_I64_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I64:
AtomicOp = Mips::ATOMIC_LOAD_UMAX_I64_POSTRA;
NeedsAdditionalReg = true;
break;
default:
llvm_unreachable("Unknown pseudo atomic for replacement!");
}
@ -1522,12 +1592,19 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI,
BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr);
BuildMI(*BB, II, DL, TII->get(AtomicOp))
.addReg(OldVal, RegState::Define | RegState::EarlyClobber)
.addReg(PtrCopy)
.addReg(IncrCopy)
.addReg(Scratch, RegState::Define | RegState::EarlyClobber |
RegState::Implicit | RegState::Dead);
MachineInstrBuilder MIB =
BuildMI(*BB, II, DL, TII->get(AtomicOp))
.addReg(OldVal, RegState::Define | RegState::EarlyClobber)
.addReg(PtrCopy)
.addReg(IncrCopy)
.addReg(Scratch, RegState::Define | RegState::EarlyClobber |
RegState::Implicit | RegState::Dead);
if (NeedsAdditionalReg) {
Register Scratch2 =
RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal));
MIB.addReg(Scratch2, RegState::Define | RegState::EarlyClobber |
RegState::Implicit | RegState::Dead);
}
MI.eraseFromParent();
@ -1595,6 +1672,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
Register Scratch3 = RegInfo.createVirtualRegister(RC);
unsigned AtomicOp = 0;
bool NeedsAdditionalReg = false;
switch (MI.getOpcode()) {
case Mips::ATOMIC_LOAD_NAND_I8:
AtomicOp = Mips::ATOMIC_LOAD_NAND_I8_POSTRA;
@ -1638,6 +1716,38 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
case Mips::ATOMIC_LOAD_XOR_I16:
AtomicOp = Mips::ATOMIC_LOAD_XOR_I16_POSTRA;
break;
case Mips::ATOMIC_LOAD_MIN_I8:
AtomicOp = Mips::ATOMIC_LOAD_MIN_I8_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_MIN_I16:
AtomicOp = Mips::ATOMIC_LOAD_MIN_I16_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_MAX_I8:
AtomicOp = Mips::ATOMIC_LOAD_MAX_I8_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_MAX_I16:
AtomicOp = Mips::ATOMIC_LOAD_MAX_I16_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMIN_I8:
AtomicOp = Mips::ATOMIC_LOAD_UMIN_I8_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMIN_I16:
AtomicOp = Mips::ATOMIC_LOAD_UMIN_I16_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I8:
AtomicOp = Mips::ATOMIC_LOAD_UMAX_I8_POSTRA;
NeedsAdditionalReg = true;
break;
case Mips::ATOMIC_LOAD_UMAX_I16:
AtomicOp = Mips::ATOMIC_LOAD_UMAX_I16_POSTRA;
NeedsAdditionalReg = true;
break;
default:
llvm_unreachable("Unknown subword atomic pseudo for expansion!");
}
@ -1692,19 +1802,25 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
// emitAtomicBinary. In summary, we need a scratch register which is going to
// be undef, that is unique among registers chosen for the instruction.
BuildMI(BB, DL, TII->get(AtomicOp))
.addReg(Dest, RegState::Define | RegState::EarlyClobber)
.addReg(AlignedAddr)
.addReg(Incr2)
.addReg(Mask)
.addReg(Mask2)
.addReg(ShiftAmt)
.addReg(Scratch, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit)
.addReg(Scratch2, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit)
.addReg(Scratch3, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit);
MachineInstrBuilder MIB =
BuildMI(BB, DL, TII->get(AtomicOp))
.addReg(Dest, RegState::Define | RegState::EarlyClobber)
.addReg(AlignedAddr)
.addReg(Incr2)
.addReg(Mask)
.addReg(Mask2)
.addReg(ShiftAmt)
.addReg(Scratch, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit)
.addReg(Scratch2, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit)
.addReg(Scratch3, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit);
if (NeedsAdditionalReg) {
Register Scratch4 = RegInfo.createVirtualRegister(RC);
MIB.addReg(Scratch4, RegState::EarlyClobber | RegState::Define |
RegState::Dead | RegState::Implicit);
}
MI.eraseFromParent(); // The instruction is gone now.

View File

@ -1925,6 +1925,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
def ATOMIC_LOAD_MIN_I8 : Atomic2Ops<atomic_load_min_8, GPR32>;
def ATOMIC_LOAD_MIN_I16 : Atomic2Ops<atomic_load_min_16, GPR32>;
def ATOMIC_LOAD_MIN_I32 : Atomic2Ops<atomic_load_min_32, GPR32>;
def ATOMIC_LOAD_MAX_I8 : Atomic2Ops<atomic_load_max_8, GPR32>;
def ATOMIC_LOAD_MAX_I16 : Atomic2Ops<atomic_load_max_16, GPR32>;
def ATOMIC_LOAD_MAX_I32 : Atomic2Ops<atomic_load_max_32, GPR32>;
def ATOMIC_LOAD_UMIN_I8 : Atomic2Ops<atomic_load_umin_8, GPR32>;
def ATOMIC_LOAD_UMIN_I16 : Atomic2Ops<atomic_load_umin_16, GPR32>;
def ATOMIC_LOAD_UMIN_I32 : Atomic2Ops<atomic_load_umin_32, GPR32>;
def ATOMIC_LOAD_UMAX_I8 : Atomic2Ops<atomic_load_umax_8, GPR32>;
def ATOMIC_LOAD_UMAX_I16 : Atomic2Ops<atomic_load_umax_16, GPR32>;
def ATOMIC_LOAD_UMAX_I32 : Atomic2Ops<atomic_load_umax_32, GPR32>;
}
def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
@ -1954,6 +1966,19 @@ def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA<GPR32>;
def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA<GPR32>;
def ATOMIC_LOAD_MIN_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_MIN_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_MIN_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
def ATOMIC_LOAD_MAX_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_MAX_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_MAX_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
def ATOMIC_LOAD_UMIN_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_UMIN_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_UMIN_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
def ATOMIC_LOAD_UMAX_I8_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_UMAX_I16_POSTRA : Atomic2OpsSubwordPostRA<GPR32>;
def ATOMIC_LOAD_UMAX_I32_POSTRA : Atomic2OpsPostRA<GPR32>;
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
def LOAD_ACC64 : Load<"", ACC64>;

View File

@ -1615,5 +1615,6 @@ def : InstRW<[GenericWriteAtomic],
def : InstRW<[GenericWriteAtomic],
(instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
def : InstRW<[GenericWriteAtomic],
(instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
(instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND|MIN|MAX|UMIN|UMAX)"
"_I(8|16|32|64)_POSTRA$")>;
}

View File

@ -632,5 +632,6 @@ def : InstRW<[P5600WriteAtomic],
def : InstRW<[P5600WriteAtomic],
(instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
def : InstRW<[P5600WriteAtomic],
(instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
(instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND|MIN|MAX|UMIN|UMAX)"
"_I(8|16|32|64)_POSTRA$")>;
}

View File

@ -0,0 +1,158 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=mips64 -O0 -mcpu=mips64r2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPS
; RUN: llc -march=mips64el -O0 -mcpu=mips64r2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPS
; RUN: llc -march=mips64 -O0 -mcpu=mips64r6 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPSR6
; RUN: llc -march=mips64el -O0 -mcpu=mips64r6 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=MIPSR6
define i64 @test_max(i64* nocapture %ptr, i64 signext %val) {
; MIPS-LABEL: test_max:
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: .LBB0_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS-NEXT: lld $2, 0($4)
; MIPS-NEXT: slt $3, $2, $5
; MIPS-NEXT: move $1, $2
; MIPS-NEXT: movn $1, $5, $3
; MIPS-NEXT: scd $1, 0($4)
; MIPS-NEXT: beqz $1, .LBB0_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
;
; MIPSR6-LABEL: test_max:
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: .LBB0_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPSR6-NEXT: lld $2, 0($4)
; MIPSR6-NEXT: slt $3, $2, $5
; MIPSR6-NEXT: seleqz $1, $2, $3
; MIPSR6-NEXT: selnez $3, $5, $3
; MIPSR6-NEXT: or $1, $1, $3
; MIPSR6-NEXT: scd $1, 0($4)
; MIPSR6-NEXT: beqzc $1, .LBB0_1
; MIPSR6-NEXT: # %bb.2: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: jrc $ra
entry:
%0 = atomicrmw max i64* %ptr, i64 %val seq_cst
ret i64 %0
}
define i64 @test_min(i64* nocapture %ptr, i64 signext %val) {
; MIPS-LABEL: test_min:
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: .LBB1_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS-NEXT: lld $2, 0($4)
; MIPS-NEXT: slt $3, $2, $5
; MIPS-NEXT: move $1, $2
; MIPS-NEXT: movz $1, $5, $3
; MIPS-NEXT: scd $1, 0($4)
; MIPS-NEXT: beqz $1, .LBB1_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
;
; MIPSR6-LABEL: test_min:
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: .LBB1_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPSR6-NEXT: lld $2, 0($4)
; MIPSR6-NEXT: slt $3, $2, $5
; MIPSR6-NEXT: selnez $1, $2, $3
; MIPSR6-NEXT: seleqz $3, $5, $3
; MIPSR6-NEXT: or $1, $1, $3
; MIPSR6-NEXT: scd $1, 0($4)
; MIPSR6-NEXT: beqzc $1, .LBB1_1
; MIPSR6-NEXT: # %bb.2: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: jrc $ra
entry:
%0 = atomicrmw min i64* %ptr, i64 %val seq_cst
ret i64 %0
}
define i64 @test_umax(i64* nocapture %ptr, i64 zeroext %val) {
; MIPS-LABEL: test_umax:
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: .LBB2_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS-NEXT: lld $2, 0($4)
; MIPS-NEXT: sltu $3, $2, $5
; MIPS-NEXT: move $1, $2
; MIPS-NEXT: movn $1, $5, $3
; MIPS-NEXT: scd $1, 0($4)
; MIPS-NEXT: beqz $1, .LBB2_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
;
; MIPSR6-LABEL: test_umax:
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: .LBB2_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPSR6-NEXT: lld $2, 0($4)
; MIPSR6-NEXT: sltu $3, $2, $5
; MIPSR6-NEXT: seleqz $1, $2, $3
; MIPSR6-NEXT: selnez $3, $5, $3
; MIPSR6-NEXT: or $1, $1, $3
; MIPSR6-NEXT: scd $1, 0($4)
; MIPSR6-NEXT: beqzc $1, .LBB2_1
; MIPSR6-NEXT: # %bb.2: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: jrc $ra
entry:
%0 = atomicrmw umax i64* %ptr, i64 %val seq_cst
ret i64 %0
}
define i64 @test_umin(i64* nocapture %ptr, i64 zeroext %val) {
; MIPS-LABEL: test_umin:
; MIPS: # %bb.0: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: .LBB3_1: # %entry
; MIPS-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS-NEXT: lld $2, 0($4)
; MIPS-NEXT: sltu $3, $2, $5
; MIPS-NEXT: move $1, $2
; MIPS-NEXT: movz $1, $5, $3
; MIPS-NEXT: scd $1, 0($4)
; MIPS-NEXT: beqz $1, .LBB3_1
; MIPS-NEXT: nop
; MIPS-NEXT: # %bb.2: # %entry
; MIPS-NEXT: sync
; MIPS-NEXT: jr $ra
; MIPS-NEXT: nop
;
; MIPSR6-LABEL: test_umin:
; MIPSR6: # %bb.0: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: .LBB3_1: # %entry
; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPSR6-NEXT: lld $2, 0($4)
; MIPSR6-NEXT: sltu $3, $2, $5
; MIPSR6-NEXT: selnez $1, $2, $3
; MIPSR6-NEXT: seleqz $3, $5, $3
; MIPSR6-NEXT: or $1, $1, $3
; MIPSR6-NEXT: scd $1, 0($4)
; MIPSR6-NEXT: beqzc $1, .LBB3_1
; MIPSR6-NEXT: # %bb.2: # %entry
; MIPSR6-NEXT: sync
; MIPSR6-NEXT: jrc $ra
entry:
%0 = atomicrmw umin i64* %ptr, i64 %val seq_cst
ret i64 %0
}

File diff suppressed because it is too large Load Diff

View File

@ -1790,7 +1790,7 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind {
; MM32-NEXT: $BB6_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: move $3, $4
; MM32-NEXT: or $3, $4, $zero
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB6_1
; MM32-NEXT: # %bb.2: # %entry