1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

Basic codegen for MTE stack tagging.

Implement IR intrinsics for stack tagging. Generated code is very
unoptimized for now.

Two special intrinsics, llvm.aarch64.irg.sp and llvm.aarch64.tagp are
used to implement a tagged stack frame pointer in a virtual register.

Differential Revision: https://reviews.llvm.org/D64172

llvm-svn: 366360
This commit is contained in:
Evgeniy Stepanov 2019-07-17 19:24:02 +00:00
parent d9da0d9f91
commit e010508942
22 changed files with 818 additions and 8 deletions

View File

@ -147,6 +147,14 @@ public:
return std::make_pair(SDValue(), SDValue());
}
virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Addr,
SDValue Size,
MachinePointerInfo DstPtrInfo,
bool ZeroData) const {
return SDValue();
}
// Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
// than FMUL and ADD is delegated to the machine combiner.
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {

View File

@ -702,4 +702,34 @@ def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[IntrWriteMem]>;
def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
[IntrNoMem]>;
// The following are codegen-only intrinsics for stack instrumentation.
// Generate a randomly tagged stack base pointer.
def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
[IntrInaccessibleMemOnly]>;
// Transfer pointer tag with offset.
// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
// * address is the address in ptr0
// * tag is a function of (tag in baseptr, tag_offset).
// Address bits in baseptr and tag bits in ptr0 are ignored.
// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
// ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<2>]>;
// Update allocation tags for the memory range to match the tag in the pointer argument.
def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
// Update allocation tags for the memory range to match the tag in the pointer argument,
// and set memory contents to zero.
def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
}

View File

@ -3666,7 +3666,8 @@ bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
const CallBase *Call) {
return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
Call->getIntrinsicID() == Intrinsic::aarch64_irg;
Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
}
/// \p PN defines a loop-variant pointer to an object. Check if the

View File

@ -6805,6 +6805,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
// delete it now.
return;
case Intrinsic::aarch64_settag:
case Intrinsic::aarch64_settag_zero: {
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
SDValue Val = TSI.EmitTargetCodeForSetTag(
DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
ZeroMemory);
DAG.setRoot(Val);
setValue(&I, Val);
return;
}
}
}

View File

@ -15,6 +15,7 @@
#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
@ -74,6 +75,9 @@ private:
bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandSetTagLoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
};
} // end anonymous namespace
@ -336,6 +340,64 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
return true;
}
bool AArch64ExpandPseudo::expandSetTagLoop(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
Register SizeReg = MI.getOperand(2).getReg();
Register AddressReg = MI.getOperand(3).getReg();
MachineFunction *MF = MBB.getParent();
bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
const unsigned OpCode =
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopBB);
MF->insert(++LoopBB->getIterator(), DoneBB);
BuildMI(LoopBB, DL, TII->get(OpCode))
.addDef(AddressReg)
.addReg(AddressReg)
.addReg(AddressReg)
.addImm(2)
.cloneMemRefs(MI)
.setMIFlags(MI.getFlags());
BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
.addDef(SizeReg)
.addReg(SizeReg)
.addImm(16 * 2)
.addImm(0);
BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
LoopBB->addSuccessor(LoopBB);
LoopBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
// Recompute liveness bottom up.
LivePhysRegs LiveRegs;
computeAndAddLiveIns(LiveRegs, *DoneBB);
computeAndAddLiveIns(LiveRegs, *LoopBB);
// Do an extra pass in the loop to get the loop carried dependencies right.
// FIXME: is this necessary?
LoopBB->clearLiveIns();
computeAndAddLiveIns(LiveRegs, *LoopBB);
DoneBB->clearLiveIns();
computeAndAddLiveIns(LiveRegs, *DoneBB);
return true;
}
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@ -569,6 +631,46 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
case AArch64::IRGstack: {
MachineFunction &MF = *MBB.getParent();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const AArch64FrameLowering *TFI =
MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
// IRG does not allow immediate offset. getTaggedBasePointerOffset should
// almost always point to SP-after-prologue; if not, emit a longer
// instruction sequence.
int BaseOffset = -AFI->getTaggedBasePointerOffset();
unsigned FrameReg;
int FrameRegOffset = TFI->resolveFrameOffsetReference(
MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false,
/*ForSimm=*/true);
Register SrcReg = FrameReg;
if (FrameRegOffset != 0) {
// Use output register as temporary.
SrcReg = MI.getOperand(0).getReg();
emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
FrameRegOffset, TII);
}
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
.add(MI.getOperand(0))
.addUse(SrcReg)
.add(MI.getOperand(2));
MI.eraseFromParent();
return true;
}
case AArch64::TAGPstack: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
case AArch64::STGloop:
case AArch64::STZGloop:
return expandSetTagLoop(MBB, MBBI, NextMBBI);
}
return false;
}

View File

@ -842,6 +842,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
// Set tagged base pointer to the bottom of the stack frame.
// Ideally it should match SP value after prologue.
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame

View File

@ -157,6 +157,9 @@ public:
bool tryIndexedLoad(SDNode *N);
bool trySelectStackSlotTagP(SDNode *N);
void SelectTagP(SDNode *N);
void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
unsigned SubRegIdx);
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
@ -703,7 +706,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSigned
return true;
}
// As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
// selected here doesn't support labels/immediates, only base+offset.
if (CurDAG->isBaseWithConstantOffset(N)) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@ -2790,6 +2793,58 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
// tagp(FrameIndex, IRGstack, tag_offset):
// since the offset between FrameIndex and IRGstack is a compile-time
// constant, this can be lowered to a single ADDG instruction.
if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
return false;
}
SDValue IRG_SP = N->getOperand(2);
if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
Intrinsic::aarch64_irg_sp) {
return false;
}
const TargetLowering *TLI = getTargetLowering();
SDLoc DL(N);
int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
SDValue FiOp = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
SDNode *Out = CurDAG->getMachineNode(
AArch64::TAGPstack, DL, MVT::i64,
{FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
ReplaceNode(N, Out);
return true;
}
void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
assert(isa<ConstantSDNode>(N->getOperand(3)) &&
"llvm.aarch64.tagp third argument must be an immediate");
if (trySelectStackSlotTagP(N))
return;
// FIXME: above applies in any case when offset between Op1 and Op2 is a
// compile-time constant, not just for stack allocations.
// General case for unrelated pointers in Op1 and Op2.
SDLoc DL(N);
int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
{N->getOperand(1), N->getOperand(2)});
SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
{SDValue(N1, 0), N->getOperand(2)});
SDNode *N3 = CurDAG->getMachineNode(
AArch64::ADDG, DL, MVT::i64,
{SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
ReplaceNode(N, N3);
}
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@ -3283,6 +3338,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_tagp:
SelectTagP(Node);
return;
case Intrinsic::aarch64_neon_tbl2:
SelectTable(Node, 2,
VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,

View File

@ -1234,6 +1234,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
case AArch64ISD::STG: return "AArch64ISD::STG";
case AArch64ISD::STZG: return "AArch64ISD::STZG";
case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
}
return nullptr;
}

View File

@ -214,7 +214,13 @@ enum NodeType : unsigned {
LD4LANEpost,
ST2LANEpost,
ST3LANEpost,
ST4LANEpost
ST4LANEpost,
STG,
STZG,
ST2G,
STZ2G
};
} // end namespace AArch64ISD

View File

@ -4067,12 +4067,12 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
(outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def PreIndex :
BaseMemTagStore<opc1, 0b11, insn, "\t$Rt, [$Rn, $offset]!",
"$Rn = $wback,@earlyclobber $wback",
"$Rn = $wback",
(outs GPR64sp:$wback),
(ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def PostIndex :
BaseMemTagStore<opc1, 0b01, insn, "\t$Rt, [$Rn], $offset",
"$Rn = $wback,@earlyclobber $wback",
"$Rn = $wback",
(outs GPR64sp:$wback),
(ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;

View File

@ -1772,6 +1772,7 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPWi:
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
@ -2151,6 +2152,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
MaxOffset = 4095;
break;
case AArch64::ADDG:
case AArch64::TAGPstack:
Scale = 16;
Width = 0;
MinOffset = 0;
@ -2158,10 +2160,23 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
break;
case AArch64::LDG:
case AArch64::STGOffset:
case AArch64::STZGOffset:
Scale = Width = 16;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
Scale = 16;
Width = 32;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
Scale = Width = 16;
MinOffset = -64;
MaxOffset = 63;
break;
}
return true;
@ -3257,6 +3272,8 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
case AArch64::ST1Twov1d:
case AArch64::ST1Threev1d:
case AArch64::ST1Fourv1d:
case AArch64::IRG:
case AArch64::IRGstack:
return AArch64FrameOffsetCannotUpdate;
}

View File

@ -409,6 +409,12 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@ -1289,6 +1295,15 @@ defm STZG : MemTagStore<0b01, "stzg">;
defm ST2G : MemTagStore<0b10, "st2g">;
defm STZ2G : MemTagStore<0b11, "stz2g">;
def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STGOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STZGOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(ST2GOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STZ2GOffset $Rn, $Rm, $imm)>;
defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
@ -1296,6 +1311,36 @@ def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
(STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
(STGPi $Rt, $Rt2, $Rn, $imm)>;
def IRGstack
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
Sched<[]>;
def TAGPstack
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
Sched<[]>;
// Explicit SP in the first operand prevents ShrinkWrap optimization
// from leaving this instruction out of the stack frame. When IRGstack
// is transformed into IRG, this operand is replaced with the actual
// register / expression for the tagged base pointer of the current function.
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
// $Rn_wback is one past the end of the range.
let isCodeGenOnly=1, mayStore=1 in {
def STGloop
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
Sched<[WriteAdr, WriteST]>;
def STZGloop
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
Sched<[WriteAdr, WriteST]>;
}
} // Predicates = [HasMTE]
//===----------------------------------------------------------------------===//

View File

@ -105,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
// Offset from SP-at-entry to the tagged base pointer.
// Tagged base pointer is set up to point to the first (lowest address) tagged
// stack slot.
unsigned TaggedBasePointerOffset;
public:
AArch64FunctionInfo() = default;
@ -224,6 +230,13 @@ public:
return ForwardedMustTailRegParms;
}
unsigned getTaggedBasePointerOffset() const {
return TaggedBasePointerOffset;
}
void setTaggedBasePointerOffset(unsigned Offset) {
TaggedBasePointerOffset = Offset;
}
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;

View File

@ -468,10 +468,19 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
if (MI.getOpcode() == AArch64::TAGPstack) {
// TAGPstack must use the virtual frame register in its 3rd operand.
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
Offset =
MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
}
// Modify MI as necessary to handle as much of 'Offset' as possible
if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;

View File

@ -56,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
CodeGenOpt::Level OptLevel) const {
return OptLevel >= CodeGenOpt::Aggressive;
}
static const int kSetTagLoopThreshold = 176;
static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Ptr, uint64_t ObjSize,
const MachineMemOperand *BaseMemOperand,
bool ZeroData) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned ObjSizeScaled = ObjSize / 16;
SDValue TagSrc = Ptr;
if (Ptr.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
// A frame index operand may end up as [SP + offset] => it is fine to use SP
// register as the tag source.
TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
}
const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
SmallVector<SDValue, 8> OutChains;
unsigned OffsetScaled = 0;
while (OffsetScaled < ObjSizeScaled) {
if (ObjSizeScaled - OffsetScaled >= 2) {
SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode2, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
MVT::v4i64,
MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
OffsetScaled += 2;
OutChains.push_back(St);
continue;
}
if (ObjSizeScaled - OffsetScaled > 0) {
SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode1, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
MVT::v2i64,
MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
OffsetScaled += 1;
OutChains.push_back(St);
}
}
SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
return Res;
}
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
assert(ObjSize % 16 == 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);
bool UseSetTagRangeLoop =
kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
if (!UseSetTagRangeLoop)
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
ZeroData);
if (ObjSize % 32 != 0) {
SDNode *St1 = DAG.getMachineNode(
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
{MVT::i64, MVT::Other},
{Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
ObjSize -= 16;
Addr = SDValue(St1, 0);
Chain = SDValue(St1, 1);
}
const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
SDNode *St = DAG.getMachineNode(
ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
return SDValue(St, 2);
}

View File

@ -23,6 +23,10 @@ public:
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
MachinePointerInfo DstPtrInfo,
bool ZeroData) const override;
bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override;
};
}

View File

@ -13,6 +13,22 @@ entry:
ret void
}
; CHECK-LABEL: define void @checkNonnullTagp(
define void @checkNonnullTagp(i8* %tag) {
; CHECK: %[[p:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %a, i8* %tag, i64 1)
; CHECK: %[[p2:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %[[p]], i8* %tag, i64 2)
; CHECK: call void @use(i8* nonnull %[[p2]])
entry:
%a = alloca i8, align 8
%p = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 1)
%p2 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
call void @use(i8* %p2)
ret void
}
declare i8* @llvm.aarch64.irg(i8*, i64)
declare i8* @llvm.aarch64.tagp.p0i8(i8*, i8*, i64)
declare void @use(i8*)

View File

@ -0,0 +1,42 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
define i8* @irg_imm16(i8* %p) {
entry:
; CHECK-LABEL: irg_imm16:
; CHECK: mov w[[R:[0-9]+]], #16
; CHECK: irg x0, x0, x[[R]]
; CHECK: ret
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 16)
ret i8* %q
}
define i8* @irg_imm0(i8* %p) {
entry:
; CHECK-LABEL: irg_imm0:
; CHECK: irg x0, x0{{$}}
; CHECK: ret
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
ret i8* %q
}
define i8* @irg_reg(i8* %p, i64 %ex) {
entry:
; CHECK-LABEL: irg_reg:
; CHECK: irg x0, x0, x1
; CHECK: ret
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 %ex)
ret i8* %q
}
; undef argument in irg is treated specially
define i8* @irg_sp() {
entry:
; CHECK-LABEL: irg_sp:
; CHECK: irg x0, sp{{$}}
; CHECK: ret
%q = call i8* @llvm.aarch64.irg.sp(i64 0)
ret i8* %q
}
declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
declare i8* @llvm.aarch64.irg.sp(i64 %exclude)

View File

@ -0,0 +1,93 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
define i8* @small_alloca() {
entry:
; CHECK-LABEL: small_alloca:
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
; CHECK-NEXT: addg x0, [[R]], #0, #1
; CHECK: ret
%a = alloca i8, align 16
%q = call i8* @llvm.aarch64.irg.sp(i64 0)
%q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %q, i64 1)
ret i8* %q1
}
; Two large allocas. One's offset overflows addg immediate.
define void @huge_allocas() {
entry:
; CHECK-LABEL: huge_allocas:
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
; CHECK: add [[TMP:x[0-9]+]], [[R]], #3088
; CHECK: addg x0, [[TMP]], #1008, #1
; CHECK: addg x1, [[R]], #0, #2
; CHECK: bl use2
%a = alloca i8, i64 4096, align 16
%b = alloca i8, i64 4096, align 16
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 2)
call void @use2(i8* %a_t, i8* %b_t)
ret void
}
; Realigned stack frame. IRG uses value of SP after realignment,
; ADDG for the first stack allocation has offset 0.
define void @realign() {
entry:
; CHECK-LABEL: realign:
; CHECK: add x29, sp, #16
; CHECK: and sp, x{{[0-9]*}}, #0xffffffffffffffc0
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
; CHECK: addg x0, [[R]], #0, #1
; CHECK: bl use
%a = alloca i8, i64 4096, align 64
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
call void @use(i8* %a_t)
ret void
}
; With a dynamic alloca, IRG has to use FP with non-zero offset.
; ADDG offset for the single static alloca is still zero.
define void @dynamic_alloca(i64 %size) {
entry:
; CHECK-LABEL: dynamic_alloca:
; CHECK: sub [[R:x[0-9]+]], x29, #[[OFS:[0-9]+]]
; CHECK: irg [[R]], [[R]]
; CHECK: addg x1, [[R]], #0, #1
; CHECK: sub x0, x29, #[[OFS]]
; CHECK: bl use2
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a = alloca i128, i64 %size, align 16
%b = alloca i8, i64 16, align 16
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
call void @use2(i8* %b, i8* %b_t)
ret void
}
; Both dynamic alloca and realigned frame.
; After initial realignment, generate the base pointer.
; IRG uses the base pointer w/o offset.
; Offsets for tagged and untagged pointers to the same alloca match.
define void @dynamic_alloca_and_realign(i64 %size) {
entryz:
; CHECK-LABEL: dynamic_alloca_and_realign:
; CHECK: and sp, x{{.*}}, #0xffffffffffffffc0
; CHECK: mov x19, sp
; CHECK: irg [[R:x[0-9]+]], x19
; CHECK: addg x1, [[R]], #[[OFS:[0-9]+]], #1
; CHECK: add x0, x19, #[[OFS]]
; CHECK: bl use2
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a = alloca i128, i64 %size, align 64
%b = alloca i8, i64 16, align 16
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
call void @use2(i8* %b, i8* %b_t)
ret void
}
declare void @use(i8*)
declare void @use2(i8*, i8*)
declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)

View File

@ -0,0 +1,138 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
define void @stg1(i8* %p) {
entry:
; CHECK-LABEL: stg1:
; CHECK: stg x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 16)
ret void
}
define void @stg2(i8* %p) {
entry:
; CHECK-LABEL: stg2:
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 32)
ret void
}
define void @stg3(i8* %p) {
entry:
; CHECK-LABEL: stg3:
; CHECK: stg x0, [x0, #32]
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 48)
ret void
}
define void @stg4(i8* %p) {
entry:
; CHECK-LABEL: stg4:
; CHECK: st2g x0, [x0, #32]
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 64)
ret void
}
define void @stg5(i8* %p) {
entry:
; CHECK-LABEL: stg5:
; CHECK: stg x0, [x0, #64]
; CHECK: st2g x0, [x0, #32]
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 80)
ret void
}
define void @stg16(i8* %p) {
entry:
; CHECK-LABEL: stg16:
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
; CHECK: st2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 256)
ret void
}
define void @stg17(i8* %p) {
entry:
; CHECK-LABEL: stg17:
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
; CHECK: stg x0, [x0], #16
; CHECK: st2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 272)
ret void
}
define void @stzg3(i8* %p) {
entry:
; CHECK-LABEL: stzg3:
; CHECK: stzg x0, [x0, #32]
; CHECK: stz2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag.zero(i8* %p, i64 48)
ret void
}
define void @stzg17(i8* %p) {
entry:
; CHECK-LABEL: stzg17:
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
; CHECK: stzg x0, [x0], #16
; CHECK: stz2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
call void @llvm.aarch64.settag.zero(i8* %p, i64 272)
ret void
}
define void @stg_alloca1() {
entry:
; CHECK-LABEL: stg_alloca1:
; CHECK: stg sp, [sp]
; CHECK: ret
%a = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(i8* %a, i64 16)
ret void
}
define void @stg_alloca5() {
entry:
; CHECK-LABEL: stg_alloca5:
; CHECK: stg sp, [sp, #64]
; CHECK: st2g sp, [sp, #32]
; CHECK: st2g sp, [sp]
; CHECK: ret
%a = alloca i8, i32 80, align 16
call void @llvm.aarch64.settag(i8* %a, i64 80)
ret void
}
define void @stg_alloca17() {
entry:
; CHECK-LABEL: stg_alloca17:
; CHECK: mov [[P:x[0-9]+]], sp
; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
%a = alloca i8, i32 272, align 16
call void @llvm.aarch64.settag(i8* %a, i64 272)
ret void
}
declare void @llvm.aarch64.settag(i8* %p, i64 %a)
declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)

View File

@ -0,0 +1,78 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
define void @stgp0(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp0:
; CHECK: stgp x0, x1, [x2]
; CHECK: ret
call void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
ret void
}
define void @stgp1004(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp1004:
; CHECK: add [[R:x[0-9]+]], x2, #1004
; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1004
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}
define void @stgp1008(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp1008:
; CHECK: stgp x0, x1, [x2, #1008]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1008
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}
define void @stgp1024(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp1024:
; CHECK: add [[R:x[0-9]+]], x2, #1024
; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1024
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}
define void @stgp_1024(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp_1024:
; CHECK: stgp x0, x1, [x2, #-1024]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 -1024
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}
define void @stgp_1040(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp_1040:
; CHECK: sub [[R:x[0-9]+]], x2, #1040
; CHECK: stgp x0, x1, [x{{.*}}]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 -1040
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}
define void @stgp_alloca(i64 %a, i64 %b) {
entry:
; CHECK-LABEL: stgp_alloca:
; CHECK: stgp x0, x1, [sp]
; CHECK: stgp x1, x0, [sp, #16]
; CHECK: ret
%x = alloca i8, i32 32, align 16
call void @llvm.aarch64.stgp(i8* %x, i64 %a, i64 %b)
%x1 = getelementptr i8, i8* %x, i32 16
call void @llvm.aarch64.stgp(i8* %x1, i64 %b, i64 %a)
ret void
}
declare void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)

View File

@ -0,0 +1,41 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
define i8* @tagp2(i8* %p, i8* %tag) {
entry:
; CHECK-LABEL: tagp2:
; CHECK: subp [[R:x[0-9]+]], x0, x1
; CHECK: add [[R]], [[R]], x1
; CHECK: addg x0, [[R]], #0, #2
; CHECK: ret
%q = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
ret i8* %q
}
define i8* @irg_tagp_unrelated(i8* %p, i8* %q) {
entry:
; CHECK-LABEL: irg_tagp_unrelated:
; CHECK: irg [[R0:x[0-9]+]], x0{{$}}
; CHECK: subp [[R:x[0-9]+]], [[R0]], x1
; CHECK: add [[R]], [[R0]], x1
; CHECK: addg x0, [[R]], #0, #1
; CHECK: ret
%p1 = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
%q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p1, i8* %q, i64 1)
ret i8* %q1
}
define i8* @tagp_alloca(i8* %tag) {
entry:
; CHECK-LABEL: tagp_alloca:
; CHECK: mov [[R0:x[0-9]+]], sp{{$}}
; CHECK: subp [[R:x[0-9]+]], [[R0]], x0{{$}}
; CHECK: add [[R]], [[R0]], x0{{$}}
; CHECK: addg x0, [[R]], #0, #3
; CHECK: ret
%a = alloca i8, align 16
%q = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 3)
ret i8* %q
}
declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)