mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
Basic codegen for MTE stack tagging.
Implement IR intrinsics for stack tagging. Generated code is very unoptimized for now. Two special intrinsics, llvm.aarch64.irg.sp and llvm.aarch64.tagp are used to implement a tagged stack frame pointer in a virtual register. Differential Revision: https://reviews.llvm.org/D64172 llvm-svn: 366360
This commit is contained in:
parent
d9da0d9f91
commit
e010508942
@ -147,6 +147,14 @@ public:
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
}
|
||||
|
||||
virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
|
||||
SDValue Chain, SDValue Addr,
|
||||
SDValue Size,
|
||||
MachinePointerInfo DstPtrInfo,
|
||||
bool ZeroData) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
|
||||
// than FMUL and ADD is delegated to the machine combiner.
|
||||
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
|
||||
|
@ -702,4 +702,34 @@ def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
|
||||
[IntrWriteMem]>;
|
||||
def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// The following are codegen-only intrinsics for stack instrumentation.
|
||||
|
||||
// Generate a randomly tagged stack base pointer.
|
||||
def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
|
||||
[IntrInaccessibleMemOnly]>;
|
||||
|
||||
// Transfer pointer tag with offset.
|
||||
// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
|
||||
// * address is the address in ptr0
|
||||
// * tag is a function of (tag in baseptr, tag_offset).
|
||||
// Address bits in baseptr and tag bits in ptr0 are ignored.
|
||||
// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
|
||||
// ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
|
||||
// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
|
||||
def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrNoMem, ImmArg<2>]>;
|
||||
|
||||
// Update allocation tags for the memory range to match the tag in the pointer argument.
|
||||
def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
|
||||
|
||||
// Update allocation tags for the memory range to match the tag in the pointer argument,
|
||||
// and set memory contents to zero.
|
||||
def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
|
||||
|
||||
// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
|
||||
def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
|
||||
}
|
||||
|
@ -3666,7 +3666,8 @@ bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
|
||||
const CallBase *Call) {
|
||||
return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
|
||||
Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
|
||||
Call->getIntrinsicID() == Intrinsic::aarch64_irg;
|
||||
Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
|
||||
Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
|
||||
}
|
||||
|
||||
/// \p PN defines a loop-variant pointer to an object. Check if the
|
||||
|
@ -6805,6 +6805,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
|
||||
// delete it now.
|
||||
return;
|
||||
|
||||
case Intrinsic::aarch64_settag:
|
||||
case Intrinsic::aarch64_settag_zero: {
|
||||
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
|
||||
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
|
||||
SDValue Val = TSI.EmitTargetCodeForSetTag(
|
||||
DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
|
||||
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
|
||||
ZeroMemory);
|
||||
DAG.setRoot(Val);
|
||||
setValue(&I, Val);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include "AArch64ExpandImm.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "AArch64MachineFunctionInfo.h"
|
||||
#include "AArch64Subtarget.h"
|
||||
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||
#include "Utils/AArch64BaseInfo.h"
|
||||
@ -74,6 +75,9 @@ private:
|
||||
bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandSetTagLoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
@ -336,6 +340,64 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64ExpandPseudo::expandSetTagLoop(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
Register SizeReg = MI.getOperand(2).getReg();
|
||||
Register AddressReg = MI.getOperand(3).getReg();
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
|
||||
bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
|
||||
const unsigned OpCode =
|
||||
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
|
||||
|
||||
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), LoopBB);
|
||||
MF->insert(++LoopBB->getIterator(), DoneBB);
|
||||
|
||||
BuildMI(LoopBB, DL, TII->get(OpCode))
|
||||
.addDef(AddressReg)
|
||||
.addReg(AddressReg)
|
||||
.addReg(AddressReg)
|
||||
.addImm(2)
|
||||
.cloneMemRefs(MI)
|
||||
.setMIFlags(MI.getFlags());
|
||||
BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
|
||||
.addDef(SizeReg)
|
||||
.addReg(SizeReg)
|
||||
.addImm(16 * 2)
|
||||
.addImm(0);
|
||||
BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
|
||||
|
||||
LoopBB->addSuccessor(LoopBB);
|
||||
LoopBB->addSuccessor(DoneBB);
|
||||
|
||||
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
||||
DoneBB->transferSuccessors(&MBB);
|
||||
|
||||
MBB.addSuccessor(LoopBB);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
// Recompute liveness bottom up.
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *DoneBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopBB);
|
||||
// Do an extra pass in the loop to get the loop carried dependencies right.
|
||||
// FIXME: is this necessary?
|
||||
LoopBB->clearLiveIns();
|
||||
computeAndAddLiveIns(LiveRegs, *LoopBB);
|
||||
DoneBB->clearLiveIns();
|
||||
computeAndAddLiveIns(LiveRegs, *DoneBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// If MBBI references a pseudo instruction that should be expanded here,
|
||||
/// do the expansion and return true. Otherwise return false.
|
||||
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
@ -569,6 +631,46 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case AArch64::IRGstack: {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
const AArch64FrameLowering *TFI =
|
||||
MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
|
||||
|
||||
// IRG does not allow immediate offset. getTaggedBasePointerOffset should
|
||||
// almost always point to SP-after-prologue; if not, emit a longer
|
||||
// instruction sequence.
|
||||
int BaseOffset = -AFI->getTaggedBasePointerOffset();
|
||||
unsigned FrameReg;
|
||||
int FrameRegOffset = TFI->resolveFrameOffsetReference(
|
||||
MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false,
|
||||
/*ForSimm=*/true);
|
||||
Register SrcReg = FrameReg;
|
||||
if (FrameRegOffset != 0) {
|
||||
// Use output register as temporary.
|
||||
SrcReg = MI.getOperand(0).getReg();
|
||||
emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
|
||||
FrameRegOffset, TII);
|
||||
}
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
|
||||
.add(MI.getOperand(0))
|
||||
.addUse(SrcReg)
|
||||
.add(MI.getOperand(2));
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case AArch64::TAGPstack: {
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG))
|
||||
.add(MI.getOperand(0))
|
||||
.add(MI.getOperand(1))
|
||||
.add(MI.getOperand(2))
|
||||
.add(MI.getOperand(4));
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case AArch64::STGloop:
|
||||
case AArch64::STZGloop:
|
||||
return expandSetTagLoop(MBB, MBBI, NextMBBI);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -842,6 +842,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
|
||||
return;
|
||||
|
||||
// Set tagged base pointer to the bottom of the stack frame.
|
||||
// Ideally it should match SP value after prologue.
|
||||
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
|
||||
|
||||
// getStackSize() includes all the locals in its size calculation. We don't
|
||||
// include these locals when computing the stack size of a funclet, as they
|
||||
// are allocated in the parent's stack frame and accessed via the frame
|
||||
|
@ -157,6 +157,9 @@ public:
|
||||
|
||||
bool tryIndexedLoad(SDNode *N);
|
||||
|
||||
bool trySelectStackSlotTagP(SDNode *N);
|
||||
void SelectTagP(SDNode *N);
|
||||
|
||||
void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
|
||||
unsigned SubRegIdx);
|
||||
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
|
||||
@ -703,7 +706,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSigned
|
||||
return true;
|
||||
}
|
||||
|
||||
// As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
|
||||
// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
|
||||
// selected here doesn't support labels/immediates, only base+offset.
|
||||
if (CurDAG->isBaseWithConstantOffset(N)) {
|
||||
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
@ -2790,6 +2793,58 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
|
||||
// tagp(FrameIndex, IRGstack, tag_offset):
|
||||
// since the offset between FrameIndex and IRGstack is a compile-time
|
||||
// constant, this can be lowered to a single ADDG instruction.
|
||||
if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue IRG_SP = N->getOperand(2);
|
||||
if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
|
||||
cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
|
||||
Intrinsic::aarch64_irg_sp) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const TargetLowering *TLI = getTargetLowering();
|
||||
SDLoc DL(N);
|
||||
int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
|
||||
SDValue FiOp = CurDAG->getTargetFrameIndex(
|
||||
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
|
||||
int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
|
||||
|
||||
SDNode *Out = CurDAG->getMachineNode(
|
||||
AArch64::TAGPstack, DL, MVT::i64,
|
||||
{FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
|
||||
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
|
||||
ReplaceNode(N, Out);
|
||||
return true;
|
||||
}
|
||||
|
||||
void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
|
||||
assert(isa<ConstantSDNode>(N->getOperand(3)) &&
|
||||
"llvm.aarch64.tagp third argument must be an immediate");
|
||||
if (trySelectStackSlotTagP(N))
|
||||
return;
|
||||
// FIXME: above applies in any case when offset between Op1 and Op2 is a
|
||||
// compile-time constant, not just for stack allocations.
|
||||
|
||||
// General case for unrelated pointers in Op1 and Op2.
|
||||
SDLoc DL(N);
|
||||
int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
|
||||
SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
|
||||
{N->getOperand(1), N->getOperand(2)});
|
||||
SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
|
||||
{SDValue(N1, 0), N->getOperand(2)});
|
||||
SDNode *N3 = CurDAG->getMachineNode(
|
||||
AArch64::ADDG, DL, MVT::i64,
|
||||
{SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
|
||||
CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
|
||||
ReplaceNode(N, N3);
|
||||
}
|
||||
|
||||
void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
// If we have a custom node, we already have selected!
|
||||
if (Node->isMachineOpcode()) {
|
||||
@ -3283,6 +3338,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
switch (IntNo) {
|
||||
default:
|
||||
break;
|
||||
case Intrinsic::aarch64_tagp:
|
||||
SelectTagP(Node);
|
||||
return;
|
||||
case Intrinsic::aarch64_neon_tbl2:
|
||||
SelectTable(Node, 2,
|
||||
VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
|
||||
|
@ -1234,6 +1234,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
|
||||
case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
|
||||
case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
|
||||
case AArch64ISD::STG: return "AArch64ISD::STG";
|
||||
case AArch64ISD::STZG: return "AArch64ISD::STZG";
|
||||
case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
|
||||
case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -214,7 +214,13 @@ enum NodeType : unsigned {
|
||||
LD4LANEpost,
|
||||
ST2LANEpost,
|
||||
ST3LANEpost,
|
||||
ST4LANEpost
|
||||
ST4LANEpost,
|
||||
|
||||
STG,
|
||||
STZG,
|
||||
ST2G,
|
||||
STZ2G
|
||||
|
||||
};
|
||||
|
||||
} // end namespace AArch64ISD
|
||||
|
@ -4067,12 +4067,12 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
|
||||
(outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
|
||||
def PreIndex :
|
||||
BaseMemTagStore<opc1, 0b11, insn, "\t$Rt, [$Rn, $offset]!",
|
||||
"$Rn = $wback,@earlyclobber $wback",
|
||||
"$Rn = $wback",
|
||||
(outs GPR64sp:$wback),
|
||||
(ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
|
||||
def PostIndex :
|
||||
BaseMemTagStore<opc1, 0b01, insn, "\t$Rt, [$Rn], $offset",
|
||||
"$Rn = $wback,@earlyclobber $wback",
|
||||
"$Rn = $wback",
|
||||
(outs GPR64sp:$wback),
|
||||
(ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
|
||||
|
||||
|
@ -1772,6 +1772,7 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
|
||||
case AArch64::STNPWi:
|
||||
case AArch64::STNPSi:
|
||||
case AArch64::LDG:
|
||||
case AArch64::STGPi:
|
||||
return 3;
|
||||
case AArch64::ADDG:
|
||||
case AArch64::STGOffset:
|
||||
@ -2151,6 +2152,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
|
||||
MaxOffset = 4095;
|
||||
break;
|
||||
case AArch64::ADDG:
|
||||
case AArch64::TAGPstack:
|
||||
Scale = 16;
|
||||
Width = 0;
|
||||
MinOffset = 0;
|
||||
@ -2158,10 +2160,23 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
|
||||
break;
|
||||
case AArch64::LDG:
|
||||
case AArch64::STGOffset:
|
||||
case AArch64::STZGOffset:
|
||||
Scale = Width = 16;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
break;
|
||||
case AArch64::ST2GOffset:
|
||||
case AArch64::STZ2GOffset:
|
||||
Scale = 16;
|
||||
Width = 32;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
break;
|
||||
case AArch64::STGPi:
|
||||
Scale = Width = 16;
|
||||
MinOffset = -64;
|
||||
MaxOffset = 63;
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -3257,6 +3272,8 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
|
||||
case AArch64::ST1Twov1d:
|
||||
case AArch64::ST1Threev1d:
|
||||
case AArch64::ST1Fourv1d:
|
||||
case AArch64::IRG:
|
||||
case AArch64::IRGstack:
|
||||
return AArch64FrameOffsetCannotUpdate;
|
||||
}
|
||||
|
||||
|
@ -409,6 +409,12 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
|
||||
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
|
||||
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
|
||||
|
||||
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
|
||||
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1289,6 +1295,15 @@ defm STZG : MemTagStore<0b01, "stzg">;
|
||||
defm ST2G : MemTagStore<0b10, "st2g">;
|
||||
defm STZ2G : MemTagStore<0b11, "stz2g">;
|
||||
|
||||
def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
|
||||
(STGOffset $Rn, $Rm, $imm)>;
|
||||
def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
|
||||
(STZGOffset $Rn, $Rm, $imm)>;
|
||||
def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
|
||||
(ST2GOffset $Rn, $Rm, $imm)>;
|
||||
def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
|
||||
(STZ2GOffset $Rn, $Rm, $imm)>;
|
||||
|
||||
defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
|
||||
def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
|
||||
def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
|
||||
@ -1296,6 +1311,36 @@ def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
|
||||
def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
|
||||
(STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
|
||||
|
||||
def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
|
||||
(STGPi $Rt, $Rt2, $Rn, $imm)>;
|
||||
|
||||
def IRGstack
|
||||
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
|
||||
Sched<[]>;
|
||||
def TAGPstack
|
||||
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
|
||||
Sched<[]>;
|
||||
|
||||
// Explicit SP in the first operand prevents ShrinkWrap optimization
|
||||
// from leaving this instruction out of the stack frame. When IRGstack
|
||||
// is transformed into IRG, this operand is replaced with the actual
|
||||
// register / expression for the tagged base pointer of the current function.
|
||||
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
|
||||
|
||||
// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
|
||||
// $Rn_wback is one past the end of the range.
|
||||
let isCodeGenOnly=1, mayStore=1 in {
|
||||
def STGloop
|
||||
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
|
||||
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
|
||||
Sched<[WriteAdr, WriteST]>;
|
||||
|
||||
def STZGloop
|
||||
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
|
||||
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
|
||||
Sched<[WriteAdr, WriteST]>;
|
||||
}
|
||||
|
||||
} // Predicates = [HasMTE]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -105,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
|
||||
/// ForwardedMustTailRegParms - A list of virtual and physical registers
|
||||
/// that must be forwarded to every musttail call.
|
||||
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
|
||||
|
||||
// Offset from SP-at-entry to the tagged base pointer.
|
||||
// Tagged base pointer is set up to point to the first (lowest address) tagged
|
||||
// stack slot.
|
||||
unsigned TaggedBasePointerOffset;
|
||||
|
||||
public:
|
||||
AArch64FunctionInfo() = default;
|
||||
|
||||
@ -224,6 +230,13 @@ public:
|
||||
return ForwardedMustTailRegParms;
|
||||
}
|
||||
|
||||
unsigned getTaggedBasePointerOffset() const {
|
||||
return TaggedBasePointerOffset;
|
||||
}
|
||||
void setTaggedBasePointerOffset(unsigned Offset) {
|
||||
TaggedBasePointerOffset = Offset;
|
||||
}
|
||||
|
||||
private:
|
||||
// Hold the lists of LOHs.
|
||||
MILOHContainer LOHContainerSet;
|
||||
|
@ -468,10 +468,19 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
return;
|
||||
}
|
||||
|
||||
// Modify MI as necessary to handle as much of 'Offset' as possible
|
||||
Offset = TFI->resolveFrameIndexReference(
|
||||
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
|
||||
if (MI.getOpcode() == AArch64::TAGPstack) {
|
||||
// TAGPstack must use the virtual frame register in its 3rd operand.
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
FrameReg = MI.getOperand(3).getReg();
|
||||
Offset =
|
||||
MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
|
||||
} else {
|
||||
Offset = TFI->resolveFrameIndexReference(
|
||||
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
|
||||
}
|
||||
|
||||
// Modify MI as necessary to handle as much of 'Offset' as possible
|
||||
if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
|
||||
return;
|
||||
|
||||
|
@ -56,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
|
||||
CodeGenOpt::Level OptLevel) const {
|
||||
return OptLevel >= CodeGenOpt::Aggressive;
|
||||
}
|
||||
|
||||
static const int kSetTagLoopThreshold = 176;
|
||||
|
||||
static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
|
||||
SDValue Chain, SDValue Ptr, uint64_t ObjSize,
|
||||
const MachineMemOperand *BaseMemOperand,
|
||||
bool ZeroData) {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned ObjSizeScaled = ObjSize / 16;
|
||||
|
||||
SDValue TagSrc = Ptr;
|
||||
if (Ptr.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
|
||||
Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
|
||||
// A frame index operand may end up as [SP + offset] => it is fine to use SP
|
||||
// register as the tag source.
|
||||
TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
|
||||
}
|
||||
|
||||
const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
|
||||
const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
|
||||
|
||||
SmallVector<SDValue, 8> OutChains;
|
||||
unsigned OffsetScaled = 0;
|
||||
while (OffsetScaled < ObjSizeScaled) {
|
||||
if (ObjSizeScaled - OffsetScaled >= 2) {
|
||||
SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
|
||||
SDValue St = DAG.getMemIntrinsicNode(
|
||||
OpCode2, dl, DAG.getVTList(MVT::Other),
|
||||
{Chain, TagSrc, AddrNode},
|
||||
MVT::v4i64,
|
||||
MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
|
||||
OffsetScaled += 2;
|
||||
OutChains.push_back(St);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ObjSizeScaled - OffsetScaled > 0) {
|
||||
SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
|
||||
SDValue St = DAG.getMemIntrinsicNode(
|
||||
OpCode1, dl, DAG.getVTList(MVT::Other),
|
||||
{Chain, TagSrc, AddrNode},
|
||||
MVT::v2i64,
|
||||
MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
|
||||
OffsetScaled += 1;
|
||||
OutChains.push_back(St);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
|
||||
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
|
||||
SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
|
||||
uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
|
||||
assert(ObjSize % 16 == 0);
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
|
||||
DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);
|
||||
|
||||
bool UseSetTagRangeLoop =
|
||||
kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
|
||||
if (!UseSetTagRangeLoop)
|
||||
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
|
||||
ZeroData);
|
||||
|
||||
if (ObjSize % 32 != 0) {
|
||||
SDNode *St1 = DAG.getMachineNode(
|
||||
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
|
||||
{MVT::i64, MVT::Other},
|
||||
{Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
|
||||
DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
|
||||
ObjSize -= 16;
|
||||
Addr = SDValue(St1, 0);
|
||||
Chain = SDValue(St1, 1);
|
||||
}
|
||||
|
||||
const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
|
||||
SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
|
||||
SDNode *St = DAG.getMachineNode(
|
||||
ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
|
||||
|
||||
DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
|
||||
return SDValue(St, 2);
|
||||
}
|
||||
|
@ -23,6 +23,10 @@ public:
|
||||
SDValue Chain, SDValue Dst, SDValue Src,
|
||||
SDValue Size, unsigned Align, bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const override;
|
||||
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
|
||||
SDValue Chain, SDValue Op1, SDValue Op2,
|
||||
MachinePointerInfo DstPtrInfo,
|
||||
bool ZeroData) const override;
|
||||
bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override;
|
||||
};
|
||||
}
|
||||
|
@ -13,6 +13,22 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @checkNonnullTagp(
|
||||
define void @checkNonnullTagp(i8* %tag) {
|
||||
; CHECK: %[[p:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %a, i8* %tag, i64 1)
|
||||
; CHECK: %[[p2:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %[[p]], i8* %tag, i64 2)
|
||||
; CHECK: call void @use(i8* nonnull %[[p2]])
|
||||
entry:
|
||||
%a = alloca i8, align 8
|
||||
|
||||
%p = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 1)
|
||||
%p2 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
|
||||
call void @use(i8* %p2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i8* @llvm.aarch64.irg(i8*, i64)
|
||||
declare i8* @llvm.aarch64.tagp.p0i8(i8*, i8*, i64)
|
||||
|
||||
declare void @use(i8*)
|
||||
|
42
test/CodeGen/AArch64/irg.ll
Normal file
42
test/CodeGen/AArch64/irg.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
|
||||
|
||||
define i8* @irg_imm16(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: irg_imm16:
|
||||
; CHECK: mov w[[R:[0-9]+]], #16
|
||||
; CHECK: irg x0, x0, x[[R]]
|
||||
; CHECK: ret
|
||||
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 16)
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
define i8* @irg_imm0(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: irg_imm0:
|
||||
; CHECK: irg x0, x0{{$}}
|
||||
; CHECK: ret
|
||||
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
define i8* @irg_reg(i8* %p, i64 %ex) {
|
||||
entry:
|
||||
; CHECK-LABEL: irg_reg:
|
||||
; CHECK: irg x0, x0, x1
|
||||
; CHECK: ret
|
||||
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 %ex)
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
; undef argument in irg is treated specially
|
||||
define i8* @irg_sp() {
|
||||
entry:
|
||||
; CHECK-LABEL: irg_sp:
|
||||
; CHECK: irg x0, sp{{$}}
|
||||
; CHECK: ret
|
||||
%q = call i8* @llvm.aarch64.irg.sp(i64 0)
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
|
||||
declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
|
93
test/CodeGen/AArch64/irg_sp_tagp.ll
Normal file
93
test/CodeGen/AArch64/irg_sp_tagp.ll
Normal file
@ -0,0 +1,93 @@
|
||||
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
|
||||
|
||||
define i8* @small_alloca() {
|
||||
entry:
|
||||
; CHECK-LABEL: small_alloca:
|
||||
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
|
||||
; CHECK-NEXT: addg x0, [[R]], #0, #1
|
||||
; CHECK: ret
|
||||
%a = alloca i8, align 16
|
||||
%q = call i8* @llvm.aarch64.irg.sp(i64 0)
|
||||
%q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %q, i64 1)
|
||||
ret i8* %q1
|
||||
}
|
||||
|
||||
; Two large allocas. One's offset overflows addg immediate.
|
||||
define void @huge_allocas() {
|
||||
entry:
|
||||
; CHECK-LABEL: huge_allocas:
|
||||
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
|
||||
; CHECK: add [[TMP:x[0-9]+]], [[R]], #3088
|
||||
; CHECK: addg x0, [[TMP]], #1008, #1
|
||||
; CHECK: addg x1, [[R]], #0, #2
|
||||
; CHECK: bl use2
|
||||
%a = alloca i8, i64 4096, align 16
|
||||
%b = alloca i8, i64 4096, align 16
|
||||
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
|
||||
%a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
|
||||
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 2)
|
||||
call void @use2(i8* %a_t, i8* %b_t)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Realigned stack frame. IRG uses value of SP after realignment,
|
||||
; ADDG for the first stack allocation has offset 0.
|
||||
define void @realign() {
|
||||
entry:
|
||||
; CHECK-LABEL: realign:
|
||||
; CHECK: add x29, sp, #16
|
||||
; CHECK: and sp, x{{[0-9]*}}, #0xffffffffffffffc0
|
||||
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
|
||||
; CHECK: addg x0, [[R]], #0, #1
|
||||
; CHECK: bl use
|
||||
%a = alloca i8, i64 4096, align 64
|
||||
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
|
||||
%a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
|
||||
call void @use(i8* %a_t)
|
||||
ret void
|
||||
}
|
||||
|
||||
; With a dynamic alloca, IRG has to use FP with non-zero offset.
|
||||
; ADDG offset for the single static alloca is still zero.
|
||||
define void @dynamic_alloca(i64 %size) {
|
||||
entry:
|
||||
; CHECK-LABEL: dynamic_alloca:
|
||||
; CHECK: sub [[R:x[0-9]+]], x29, #[[OFS:[0-9]+]]
|
||||
; CHECK: irg [[R]], [[R]]
|
||||
; CHECK: addg x1, [[R]], #0, #1
|
||||
; CHECK: sub x0, x29, #[[OFS]]
|
||||
; CHECK: bl use2
|
||||
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
|
||||
%a = alloca i128, i64 %size, align 16
|
||||
%b = alloca i8, i64 16, align 16
|
||||
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
|
||||
call void @use2(i8* %b, i8* %b_t)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Both dynamic alloca and realigned frame.
|
||||
; After initial realignment, generate the base pointer.
|
||||
; IRG uses the base pointer w/o offset.
|
||||
; Offsets for tagged and untagged pointers to the same alloca match.
|
||||
define void @dynamic_alloca_and_realign(i64 %size) {
|
||||
entryz:
|
||||
; CHECK-LABEL: dynamic_alloca_and_realign:
|
||||
; CHECK: and sp, x{{.*}}, #0xffffffffffffffc0
|
||||
; CHECK: mov x19, sp
|
||||
; CHECK: irg [[R:x[0-9]+]], x19
|
||||
; CHECK: addg x1, [[R]], #[[OFS:[0-9]+]], #1
|
||||
; CHECK: add x0, x19, #[[OFS]]
|
||||
; CHECK: bl use2
|
||||
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
|
||||
%a = alloca i128, i64 %size, align 64
|
||||
%b = alloca i8, i64 16, align 16
|
||||
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
|
||||
call void @use2(i8* %b, i8* %b_t)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @use(i8*)
|
||||
declare void @use2(i8*, i8*)
|
||||
|
||||
declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
|
||||
declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)
|
138
test/CodeGen/AArch64/settag.ll
Normal file
138
test/CodeGen/AArch64/settag.ll
Normal file
@ -0,0 +1,138 @@
|
||||
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
|
||||
|
||||
define void @stg1(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg1:
|
||||
; CHECK: stg x0, [x0]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 16)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg2(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg2:
|
||||
; CHECK: st2g x0, [x0]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 32)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg3(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg3:
|
||||
; CHECK: stg x0, [x0, #32]
|
||||
; CHECK: st2g x0, [x0]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 48)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg4(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg4:
|
||||
; CHECK: st2g x0, [x0, #32]
|
||||
; CHECK: st2g x0, [x0]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 64)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg5(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg5:
|
||||
; CHECK: stg x0, [x0, #64]
|
||||
; CHECK: st2g x0, [x0, #32]
|
||||
; CHECK: st2g x0, [x0]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 80)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg16(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg16:
|
||||
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
|
||||
; CHECK: st2g x0, [x0], #32
|
||||
; CHECK: sub x[[R]], x[[R]], #32
|
||||
; CHECK: cbnz x[[R]],
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 256)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg17(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stg17:
|
||||
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
|
||||
; CHECK: stg x0, [x0], #16
|
||||
; CHECK: st2g x0, [x0], #32
|
||||
; CHECK: sub x[[R]], x[[R]], #32
|
||||
; CHECK: cbnz x[[R]],
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag(i8* %p, i64 272)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stzg3(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stzg3:
|
||||
; CHECK: stzg x0, [x0, #32]
|
||||
; CHECK: stz2g x0, [x0]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag.zero(i8* %p, i64 48)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stzg17(i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stzg17:
|
||||
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
|
||||
; CHECK: stzg x0, [x0], #16
|
||||
; CHECK: stz2g x0, [x0], #32
|
||||
; CHECK: sub x[[R]], x[[R]], #32
|
||||
; CHECK: cbnz x[[R]],
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.settag.zero(i8* %p, i64 272)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg_alloca1() {
|
||||
entry:
|
||||
; CHECK-LABEL: stg_alloca1:
|
||||
; CHECK: stg sp, [sp]
|
||||
; CHECK: ret
|
||||
%a = alloca i8, i32 16, align 16
|
||||
call void @llvm.aarch64.settag(i8* %a, i64 16)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg_alloca5() {
|
||||
entry:
|
||||
; CHECK-LABEL: stg_alloca5:
|
||||
; CHECK: stg sp, [sp, #64]
|
||||
; CHECK: st2g sp, [sp, #32]
|
||||
; CHECK: st2g sp, [sp]
|
||||
; CHECK: ret
|
||||
%a = alloca i8, i32 80, align 16
|
||||
call void @llvm.aarch64.settag(i8* %a, i64 80)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stg_alloca17() {
|
||||
entry:
|
||||
; CHECK-LABEL: stg_alloca17:
|
||||
; CHECK: mov [[P:x[0-9]+]], sp
|
||||
; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16
|
||||
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
|
||||
; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32
|
||||
; CHECK: sub x[[R]], x[[R]], #32
|
||||
; CHECK: cbnz x[[R]],
|
||||
; CHECK: ret
|
||||
%a = alloca i8, i32 272, align 16
|
||||
call void @llvm.aarch64.settag(i8* %a, i64 272)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.aarch64.settag(i8* %p, i64 %a)
|
||||
declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)
|
78
test/CodeGen/AArch64/stgp.ll
Normal file
78
test/CodeGen/AArch64/stgp.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
|
||||
|
||||
define void @stgp0(i64 %a, i64 %b, i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp0:
|
||||
; CHECK: stgp x0, x1, [x2]
|
||||
; CHECK: ret
|
||||
call void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stgp1004(i64 %a, i64 %b, i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp1004:
|
||||
; CHECK: add [[R:x[0-9]+]], x2, #1004
|
||||
; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
|
||||
; CHECK: ret
|
||||
%q = getelementptr i8, i8* %p, i32 1004
|
||||
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stgp1008(i64 %a, i64 %b, i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp1008:
|
||||
; CHECK: stgp x0, x1, [x2, #1008]
|
||||
; CHECK: ret
|
||||
%q = getelementptr i8, i8* %p, i32 1008
|
||||
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stgp1024(i64 %a, i64 %b, i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp1024:
|
||||
; CHECK: add [[R:x[0-9]+]], x2, #1024
|
||||
; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
|
||||
; CHECK: ret
|
||||
%q = getelementptr i8, i8* %p, i32 1024
|
||||
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stgp_1024(i64 %a, i64 %b, i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp_1024:
|
||||
; CHECK: stgp x0, x1, [x2, #-1024]
|
||||
; CHECK: ret
|
||||
%q = getelementptr i8, i8* %p, i32 -1024
|
||||
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stgp_1040(i64 %a, i64 %b, i8* %p) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp_1040:
|
||||
; CHECK: sub [[R:x[0-9]+]], x2, #1040
|
||||
; CHECK: stgp x0, x1, [x{{.*}}]
|
||||
; CHECK: ret
|
||||
%q = getelementptr i8, i8* %p, i32 -1040
|
||||
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stgp_alloca(i64 %a, i64 %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: stgp_alloca:
|
||||
; CHECK: stgp x0, x1, [sp]
|
||||
; CHECK: stgp x1, x0, [sp, #16]
|
||||
; CHECK: ret
|
||||
%x = alloca i8, i32 32, align 16
|
||||
call void @llvm.aarch64.stgp(i8* %x, i64 %a, i64 %b)
|
||||
%x1 = getelementptr i8, i8* %x, i32 16
|
||||
call void @llvm.aarch64.stgp(i8* %x1, i64 %b, i64 %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
|
41
test/CodeGen/AArch64/tagp.ll
Normal file
41
test/CodeGen/AArch64/tagp.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
|
||||
|
||||
define i8* @tagp2(i8* %p, i8* %tag) {
|
||||
entry:
|
||||
; CHECK-LABEL: tagp2:
|
||||
; CHECK: subp [[R:x[0-9]+]], x0, x1
|
||||
; CHECK: add [[R]], [[R]], x1
|
||||
; CHECK: addg x0, [[R]], #0, #2
|
||||
; CHECK: ret
|
||||
%q = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
define i8* @irg_tagp_unrelated(i8* %p, i8* %q) {
|
||||
entry:
|
||||
; CHECK-LABEL: irg_tagp_unrelated:
|
||||
; CHECK: irg [[R0:x[0-9]+]], x0{{$}}
|
||||
; CHECK: subp [[R:x[0-9]+]], [[R0]], x1
|
||||
; CHECK: add [[R]], [[R0]], x1
|
||||
; CHECK: addg x0, [[R]], #0, #1
|
||||
; CHECK: ret
|
||||
%p1 = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
|
||||
%q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p1, i8* %q, i64 1)
|
||||
ret i8* %q1
|
||||
}
|
||||
|
||||
define i8* @tagp_alloca(i8* %tag) {
|
||||
entry:
|
||||
; CHECK-LABEL: tagp_alloca:
|
||||
; CHECK: mov [[R0:x[0-9]+]], sp{{$}}
|
||||
; CHECK: subp [[R:x[0-9]+]], [[R0]], x0{{$}}
|
||||
; CHECK: add [[R]], [[R0]], x0{{$}}
|
||||
; CHECK: addg x0, [[R]], #0, #3
|
||||
; CHECK: ret
|
||||
%a = alloca i8, align 16
|
||||
%q = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 3)
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
|
||||
declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)
|
Loading…
x
Reference in New Issue
Block a user