1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AArch64] Extend AArch64SLSHardeningPass to harden BLR instructions.

To make sure that no barrier gets placed on the architectural execution
path, each
  BLR x<N>
instruction gets transformed to a
  BL __llvm_slsblr_thunk_x<N>
instruction, with __llvm_slsblr_thunk_x<N> a thunk that contains
__llvm_slsblr_thunk_x<N>:
  BR x<N>
  <speculation barrier>

Therefore, the BLR instruction gets split into 2; one BL and one BR.
This transformation results in not inserting a speculation barrier on
the architectural execution path.

The mitigation is off by default and can be enabled by the
harden-sls-blr subtarget feature.

As a linker is allowed to clobber X16 and X17 on function calls, the
above code transformation would not be correct in case a linker does so
when N=16 or N=17. Therefore, when the mitigation is enabled, generation
of BLR x16 or BLR x17 is avoided.

As BLRA* indirect calls are not produced by LLVM currently, this does
not aim to implement support for those.

Differential Revision:  https://reviews.llvm.org/D81402
This commit is contained in:
Kristof Beyls 2020-06-11 09:23:15 +01:00
parent 16cd3a8a43
commit cd0b5e8976
16 changed files with 526 additions and 36 deletions

View File

@ -39,6 +39,7 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
FunctionPass *createAArch64SLSHardeningPass();
FunctionPass *createAArch64IndirectThunks();
FunctionPass *createAArch64SpeculationHardeningPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
FunctionPass *createAArch64SIMDInstrOptPass();

View File

@ -464,6 +464,9 @@ def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
"HardenSlsRetBr", "true",
"Harden against straight line speculation across RET and BR instructions">;
def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
"HardenSlsBlr", "true",
"Harden against straight line speculation across BLR instructions">;
//===----------------------------------------------------------------------===//
// AArch64 Processors supported.

View File

@ -3270,7 +3270,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue the call.
MachineInstrBuilder MIB;
if (Subtarget->useSmallAddressing()) {
const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
const MCInstrDesc &II = TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
if (Symbol)
MIB.addSym(Symbol, 0);
@ -3303,7 +3303,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!CallReg)
return false;
const MCInstrDesc &II = TII.get(AArch64::BLR);
const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
CallReg = constrainOperandRegClass(II, CallReg, 0);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
}

View File

@ -1126,7 +1126,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
.addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)

View File

@ -6092,7 +6092,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
}
else if (LastInstrOpcode == AArch64::BL ||
(LastInstrOpcode == AArch64::BLR && !HasBTI)) {
((LastInstrOpcode == AArch64::BLR ||
LastInstrOpcode == AArch64::BLRNoIP) &&
!HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = 0;
@ -6409,7 +6411,8 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// as a tail-call. Whitelist the call instructions we know about so we
// don't get unexpected results with call pseudo-instructions.
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
if (MI.getOpcode() == AArch64::BLR ||
MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
if (!Callee)
@ -6557,7 +6560,8 @@ void AArch64InstrInfo::buildOutlinedFrame(
if (Call->getOpcode() == AArch64::BL) {
TailOpcode = AArch64::TCRETURNdi;
} else {
assert(Call->getOpcode() == AArch64::BLR);
assert(Call->getOpcode() == AArch64::BLR ||
Call->getOpcode() == AArch64::BLRNoIP);
TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
@ -6893,6 +6897,13 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
return AArch64::BLRNoIP;
else
return AArch64::BLR;
}
#define GET_INSTRINFO_HELPERS
#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"

View File

@ -397,6 +397,9 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return false;
}
/// Return opcode to be used for indirect calls.
unsigned getBLRCallOpcode(const MachineFunction &MF);
// struct TSFlags {
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit

View File

@ -589,6 +589,8 @@ let RecomputePerFunction = 1 in {
def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
// Toggles patterns which aren't beneficial in GlobalISel when we aren't
// optimizing. This allows us to selectively use patterns without impacting
// SelectionDAG's behaviour.
@ -2020,9 +2022,19 @@ def ERET : SpecialReturn<0b0100, "eret">;
def : InstAlias<"ret", (RET LR)>;
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
def BLR : BranchReg<0b0001, "blr", []>;
def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
Sched<[WriteBrReg]>,
PseudoInstExpansion<(BLR GPR64:$Rn)>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
(BLR GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
def : Pat<(AArch64call GPR64noip:$Rn),
(BLRNoIP GPR64noip:$Rn)>,
Requires<[SLSBLRMitigation]>;
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch

View File

@ -16,6 +16,7 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/IndirectThunks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -57,9 +58,9 @@ public:
private:
bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
void insertSpeculationBarrier(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL) const;
bool hardenBLRs(MachineBasicBlock &MBB) const;
MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB,
MachineBasicBlock::iterator) const;
};
} // end anonymous namespace
@ -69,20 +70,26 @@ char AArch64SLSHardening::ID = 0;
INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening",
AARCH64_SLS_HARDENING_NAME, false, false)
void AArch64SLSHardening::insertSpeculationBarrier(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL) const {
static void insertSpeculationBarrier(const AArch64Subtarget *ST,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL,
bool AlwaysUseISBDSB = false) {
assert(MBBI != MBB.begin() &&
"Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
assert(std::prev(MBBI)->isBarrier() &&
"SpeculationBarrierEndBB must only follow unconditional control flow "
"instructions.");
assert(std::prev(MBBI)->isTerminator() &&
"SpeculatoinBarrierEndBB must only follow terminators.");
if (ST->hasSB())
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SpeculationBarrierSBEndBB));
else
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SpeculationBarrierISBDSBEndBB));
"SpeculationBarrierEndBB must only follow terminators.");
const TargetInstrInfo *TII = ST->getInstrInfo();
unsigned BarrierOpc = ST->hasSB() && !AlwaysUseISBDSB
? AArch64::SpeculationBarrierSBEndBB
: AArch64::SpeculationBarrierISBDSBEndBB;
if (MBBI == MBB.end() ||
(MBBI->getOpcode() != AArch64::SpeculationBarrierSBEndBB &&
MBBI->getOpcode() != AArch64::SpeculationBarrierISBDSBEndBB))
BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
}
bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) {
@ -91,12 +98,30 @@ bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
bool Modified = false;
for (auto &MBB : MF)
for (auto &MBB : MF) {
Modified |= hardenReturnsAndBRs(MBB);
Modified |= hardenBLRs(MBB);
}
return Modified;
}
static bool isBLR(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AArch64::BLR:
case AArch64::BLRNoIP:
return true;
case AArch64::BLRAA:
case AArch64::BLRAB:
case AArch64::BLRAAZ:
case AArch64::BLRABZ:
llvm_unreachable("Currently, LLVM's code generator does not support "
"producing BLRA* instructions. Therefore, there's no "
"support in this pass for those instructions.");
}
return false;
}
bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
if (!ST->hardenSlsRetBr())
return false;
@ -108,7 +133,244 @@ bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
NextMBBI = std::next(MBBI);
if (MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode())) {
assert(MI.isTerminator());
insertSpeculationBarrier(MBB, std::next(MBBI), MI.getDebugLoc());
insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
Modified = true;
}
}
return Modified;
}
static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
static std::array<const char *, 29> SLSBLRThunkNames{
"__llvm_slsblr_thunk_x0", "__llvm_slsblr_thunk_x1",
"__llvm_slsblr_thunk_x2", "__llvm_slsblr_thunk_x3",
"__llvm_slsblr_thunk_x4", "__llvm_slsblr_thunk_x5",
"__llvm_slsblr_thunk_x6", "__llvm_slsblr_thunk_x7",
"__llvm_slsblr_thunk_x8", "__llvm_slsblr_thunk_x9",
"__llvm_slsblr_thunk_x10", "__llvm_slsblr_thunk_x11",
"__llvm_slsblr_thunk_x12", "__llvm_slsblr_thunk_x13",
"__llvm_slsblr_thunk_x14", "__llvm_slsblr_thunk_x15",
// X16 and X17 are deliberately missing, as the mitigation requires those
// register to not be used in BLR. See comment in ConvertBLRToBL for more
// details.
"__llvm_slsblr_thunk_x18", "__llvm_slsblr_thunk_x19",
"__llvm_slsblr_thunk_x20", "__llvm_slsblr_thunk_x21",
"__llvm_slsblr_thunk_x22", "__llvm_slsblr_thunk_x23",
"__llvm_slsblr_thunk_x24", "__llvm_slsblr_thunk_x25",
"__llvm_slsblr_thunk_x26", "__llvm_slsblr_thunk_x27",
"__llvm_slsblr_thunk_x28", "__llvm_slsblr_thunk_x29",
// X30 is deliberately missing, for similar reasons as X16 and X17 are
// missing.
"__llvm_slsblr_thunk_x31",
};
static std::array<unsigned, 29> SLSBLRThunkRegs{
AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
AArch64::X5, AArch64::X6, AArch64::X7, AArch64::X8, AArch64::X9,
AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14,
AArch64::X15, AArch64::X18, AArch64::X19, AArch64::X20, AArch64::X21,
AArch64::X22, AArch64::X23, AArch64::X24, AArch64::X25, AArch64::X26,
AArch64::X27, AArch64::X28, AArch64::FP, AArch64::XZR};
namespace {
struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
const char *getThunkPrefix() { return SLSBLRNamePrefix; }
bool mayUseThunk(const MachineFunction &MF) {
// FIXME: This could also check if there are any BLRs in the function
// to more accurately reflect if a thunk will be needed.
return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr();
}
void insertThunks(MachineModuleInfo &MMI);
void populateThunk(MachineFunction &MF);
};
} // namespace
void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
// FIXME: It probably would be possible to filter which thunks to produce
// based on which registers are actually used in BLR instructions in this
// function. But would that be a worthwhile optimization?
for (StringRef Name : SLSBLRThunkNames)
createThunkFunction(MMI, Name);
}
void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
// FIXME: How to better communicate Register number, rather than through
// name and lookup table?
assert(MF.getName().startswith(getThunkPrefix()));
int Index = -1;
for (int i = 0; i < (int)SLSBLRThunkNames.size(); ++i)
if (MF.getName() == SLSBLRThunkNames[i]) {
Index = i;
break;
}
assert(Index != -1);
Register ThunkReg = SLSBLRThunkRegs[Index];
const TargetInstrInfo *TII =
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
// generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
while (MF.size() > 1)
MF.erase(std::next(MF.begin()));
// These thunks need to consist of the following instructions:
// __llvm_slsblr_thunk_xN:
// BR xN
// barrierInsts
Entry->addLiveIn(ThunkReg);
BuildMI(Entry, DebugLoc(), TII->get(AArch64::BR)).addReg(ThunkReg);
// Make sure the thunks do not make use of the SB extension in case there is
// a function somewhere that will call to it that for some reason disabled
// the SB extension locally on that function, even though it's enabled for
// the module otherwise. Therefore set AlwaysUseISBSDB to true.
insertSpeculationBarrier(&MF.getSubtarget<AArch64Subtarget>(), *Entry,
Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
}
MachineBasicBlock &
AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
// Transform a BLR to a BL as follows:
// Before:
// |-----------------------------|
// | ... |
// | instI |
// | BLR xN |
// | instJ |
// | ... |
// |-----------------------------|
//
// After:
// |-----------------------------|
// | ... |
// | instI |
// | BL __llvm_slsblr_thunk_xN |
// | instJ |
// | ... |
// |-----------------------------|
//
// __llvm_slsblr_thunk_xN:
// |-----------------------------|
// | BR xN |
// | barrierInsts |
// |-----------------------------|
//
// The __llvm_slsblr_thunk_xN thunks are created by the SLSBLRThunkInserter.
// This function merely needs to transform BLR xN into BL
// __llvm_slsblr_thunk_xN.
//
// Since linkers are allowed to clobber X16 and X17 on function calls, the
// above mitigation only works if the original BLR instruction was not
// BLR X16 nor BLR X17. Code generation before must make sure that no BLR
// X16|X17 was produced if the mitigation is enabled.
MachineInstr &BLR = *MBBI;
assert(isBLR(BLR));
unsigned BLOpcode;
Register Reg;
bool RegIsKilled;
switch (BLR.getOpcode()) {
case AArch64::BLR:
case AArch64::BLRNoIP:
BLOpcode = AArch64::BL;
Reg = BLR.getOperand(0).getReg();
assert(Reg != AArch64::X16 && Reg != AArch64::X17 && Reg != AArch64::LR);
RegIsKilled = BLR.getOperand(0).isKill();
break;
case AArch64::BLRAA:
case AArch64::BLRAB:
case AArch64::BLRAAZ:
case AArch64::BLRABZ:
llvm_unreachable("BLRA instructions cannot yet be produced by LLVM, "
"therefore there is no need to support them for now.");
default:
llvm_unreachable("unhandled BLR");
}
DebugLoc DL = BLR.getDebugLoc();
// If we'd like to support also BLRAA and BLRAB instructions, we'd need
// a lot more different kind of thunks.
// For example, a
//
// BLRAA xN, xM
//
// instruction probably would need to be transformed to something like:
//
// BL __llvm_slsblraa_thunk_x<N>_x<M>
//
// __llvm_slsblraa_thunk_x<N>_x<M>:
// BRAA x<N>, x<M>
// barrierInsts
//
// Given that about 30 different values of N are possible and about 30
// different values of M are possible in the above, with the current way
// of producing indirect thunks, we'd be producing about 30 times 30, i.e.
// about 900 thunks (where most might not be actually called). This would
// multiply further by two to support both BLRAA and BLRAB variants of those
// instructions.
// If we'd want to support this, we'd probably need to look into a different
// way to produce thunk functions, based on which variants are actually
// needed, rather than producing all possible variants.
// So far, LLVM does never produce BLRA* instructions, so let's leave this
// for the future when LLVM can start producing BLRA* instructions.
MachineFunction &MF = *MBBI->getMF();
MCContext &Context = MBB.getParent()->getContext();
MCSymbol *Sym = Context.getOrCreateSymbol("__llvm_slsblr_thunk_x" +
utostr(Reg - AArch64::X0));
MachineInstr *BL = BuildMI(MBB, MBBI, DL, TII->get(BLOpcode)).addSym(Sym);
// Now copy the implicit operands from BLR to BL and copy other necessary
// info.
// However, both BLR and BL instructions implictly use SP and implicitly
// define LR. Blindly copying implicit operands would result in SP and LR
// operands to be present multiple times. While this may not be too much of
// an issue, let's avoid that for cleanliness, by removing those implicit
// operands from the BL created above before we copy over all implicit
// operands from the BLR.
int ImpLROpIdx = -1;
int ImpSPOpIdx = -1;
for (unsigned OpIdx = BL->getNumExplicitOperands();
OpIdx < BL->getNumOperands(); OpIdx++) {
MachineOperand Op = BL->getOperand(OpIdx);
if (!Op.isReg())
continue;
if (Op.getReg() == AArch64::LR && Op.isDef())
ImpLROpIdx = OpIdx;
if (Op.getReg() == AArch64::SP && !Op.isDef())
ImpSPOpIdx = OpIdx;
}
assert(ImpLROpIdx != -1);
assert(ImpSPOpIdx != -1);
int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
BL->RemoveOperand(FirstOpIdxToRemove);
BL->RemoveOperand(SecondOpIdxToRemove);
// Now copy over the implicit operands from the original BLR
BL->copyImplicitOps(MF, BLR);
MF.moveCallSiteInfo(&BLR, BL);
// Also add the register called in the BLR as being used in the called thunk.
BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
RegIsKilled /*isKill*/));
// Remove BLR instruction
MBB.erase(MBBI);
return MBB;
}
bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const {
if (!ST->hardenSlsBlr())
return false;
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMBBI;
for (; MBBI != E; MBBI = NextMBBI) {
MachineInstr &MI = *MBBI;
NextMBBI = std::next(MBBI);
if (isBLR(MI)) {
ConvertBLRToBL(MBB, MBBI);
Modified = true;
}
}
@ -118,3 +380,60 @@ bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
FunctionPass *llvm::createAArch64SLSHardeningPass() {
return new AArch64SLSHardening();
}
namespace {
class AArch64IndirectThunks : public MachineFunctionPass {
public:
static char ID;
AArch64IndirectThunks() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "AArch64 Indirect Thunks"; }
bool doInitialization(Module &M) override;
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
}
private:
std::tuple<SLSBLRThunkInserter> TIs;
// FIXME: When LLVM moves to C++17, these can become folds
template <typename... ThunkInserterT>
static void initTIs(Module &M,
std::tuple<ThunkInserterT...> &ThunkInserters) {
(void)std::initializer_list<int>{
(std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
}
template <typename... ThunkInserterT>
static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
std::tuple<ThunkInserterT...> &ThunkInserters) {
bool Modified = false;
(void)std::initializer_list<int>{
Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
return Modified;
}
};
} // end anonymous namespace
char AArch64IndirectThunks::ID = 0;
FunctionPass *llvm::createAArch64IndirectThunks() {
return new AArch64IndirectThunks();
}
bool AArch64IndirectThunks::doInitialization(Module &M) {
initTIs(M, TIs);
return false;
}
bool AArch64IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << getPassName() << '\n');
auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
return runTIs(MMI, MF, TIs);
}

View File

@ -211,6 +211,7 @@ protected:
bool UseEL3ForTP = false;
bool AllowTaggedGlobals = false;
bool HardenSlsRetBr = false;
bool HardenSlsBlr = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@ -365,6 +366,7 @@ public:
}
bool hardenSlsRetBr() const { return HardenSlsRetBr; }
bool hardenSlsBlr() const { return HardenSlsBlr; }
bool useEL1ForTP() const { return UseEL1ForTP; }
bool useEL2ForTP() const { return UseEL2ForTP; }

View File

@ -636,6 +636,7 @@ void AArch64PassConfig::addPreSched2() {
// info.
addPass(createAArch64SpeculationHardeningPass());
addPass(createAArch64IndirectThunks());
addPass(createAArch64SLSHardeningPass());
if (TM->getOptLevel() != CodeGenOpt::None) {

View File

@ -773,17 +773,17 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
return true;
}
static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
bool IsTailCall) {
if (!IsTailCall)
return IsIndirect ? AArch64::BLR : AArch64::BL;
return IsIndirect ? getBLRCallOpcode(CallerF) : AArch64::BL;
if (!IsIndirect)
return AArch64::TCRETURNdi;
// When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
// x16 or x17.
if (CallerF.hasFnAttribute("branch-target-enforcement"))
if (CallerF.getFunction().hasFnAttribute("branch-target-enforcement"))
return AArch64::TCRETURNriBTI;
return AArch64::TCRETURNri;
@ -819,7 +819,7 @@ bool AArch64CallLowering::lowerTailCall(
if (!IsSibCall)
CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
@ -979,7 +979,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false);
unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);

View File

@ -2890,7 +2890,7 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
MIB.buildInstr(AArch64::BLR, {}, {Load})
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());

View File

@ -55,6 +55,7 @@
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: AArch64 pseudo instruction expansion pass
; CHECK-NEXT: AArch64 speculation hardening pass
; CHECK-NEXT: AArch64 Indirect Thunks
; CHECK-NEXT: AArch64 sls hardening pass
; CHECK-NEXT: Analyze Machine Code For Garbage Collection
; CHECK-NEXT: Insert fentry calls

View File

@ -178,6 +178,7 @@
; CHECK-NEXT: AArch64 pseudo instruction expansion pass
; CHECK-NEXT: AArch64 load / store optimization pass
; CHECK-NEXT: AArch64 speculation hardening pass
; CHECK-NEXT: AArch64 Indirect Thunks
; CHECK-NEXT: AArch64 sls hardening pass
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction

View File

@ -0,0 +1,58 @@
# RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \
# RUN: -start-before aarch64-sls-hardening \
# RUN: -stop-after aarch64-sls-hardening -o - %s \
# RUN: | FileCheck %s --check-prefixes=CHECK
# Check that the BLR SLS hardening transforms a BLR into a BL with operands as
# expected.
--- |
$__llvm_slsblr_thunk_x8 = comdat any
@a = dso_local local_unnamed_addr global i32 (...)* null, align 8
@b = dso_local local_unnamed_addr global i32 0, align 4
define dso_local void @fn1() local_unnamed_addr "target-features"="+harden-sls-blr" {
entry:
%0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @a to i32 ()**), align 8
%call = tail call i32 %0() nounwind
store i32 %call, i32* @b, align 4
ret void
}
; Function Attrs: naked nounwind
define linkonce_odr hidden void @__llvm_slsblr_thunk_x8() naked nounwind comdat {
entry:
ret void
}
...
---
name: fn1
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: fn1
bb.0.entry:
liveins: $lr
early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 ; :: (store 8 into %stack.0)
frame-setup CFI_INSTRUCTION def_cfa_offset 16
frame-setup CFI_INSTRUCTION offset $w30, -16
renamable $x8 = ADRP target-flags(aarch64-page) @a
renamable $x8 = LDRXui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @a :: (dereferenceable load 8 from `i32 ()** bitcast (i32 (...)** @a to i32 ()**)`)
BLRNoIP killed renamable $x8, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
; CHECK: BL <mcsymbol __llvm_slsblr_thunk_x8>, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0, implicit killed $x8
renamable $x8 = ADRP target-flags(aarch64-page) @b
STRWui killed renamable $w0, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @b :: (store 4 into @b)
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 ; :: (load 8 from %stack.0)
RET undef $lr
...
---
name: __llvm_slsblr_thunk_x8
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x8
BR $x8
SpeculationBarrierISBDSBEndBB
...

View File

@ -1,5 +1,6 @@
; RUN: llc -mattr=harden-sls-retbr -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,ISBDSB
; RUN: llc -mattr=harden-sls-retbr -mattr=+sb -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,SB
; RUN: llc -mattr=harden-sls-retbr,harden-sls-blr -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,HARDEN,ISBDSB
; RUN: llc -mattr=harden-sls-retbr,harden-sls-blr -mattr=+sb -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,HARDEN,SB
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,NOHARDEN
; Function Attrs: norecurse nounwind readnone
@ -24,33 +25,39 @@ if.else: ; preds = %entry
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: {{ sb$}}
; CHECK-NEXT: .Lfunc_end
}
@__const.indirect_branch.ptr = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@indirect_branch, %return), i8* blockaddress(@indirect_branch, %l2)], align 8
; Function Attrs: norecurse nounwind readnone
define dso_local i32 @indirect_branch(i32 %a, i32 %b, i32 %i) {
; CHECK-LABEL: indirect_branch:
entry:
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @__const.indirect_branch.ptr, i64 0, i64 %idxprom
%0 = load i8*, i8** %arrayidx, align 8
indirectbr i8* %0, [label %return, label %l2]
l2: ; preds = %entry
br label %return
return: ; preds = %entry, %l2
%retval.0 = phi i32 [ 1, %l2 ], [ 0, %entry ]
ret i32 %retval.0
; CHECK-LABEL: indirect_branch:
; CHECK: br x
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: {{ sb$}}
l2: ; preds = %entry
br label %return
; CHECK: {{ret$}}
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: {{ sb$}}
return: ; preds = %entry, %l2
%retval.0 = phi i32 [ 1, %l2 ], [ 0, %entry ]
ret i32 %retval.0
; CHECK: {{ret$}}
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: {{ sb$}}
; CHECK-NEXT: .Lfunc_end
}
; Check that RETAA and RETAB instructions are also protected as expected.
@ -61,6 +68,7 @@ entry:
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: {{ sb$}}
; CHECK-NEXT: .Lfunc_end
ret i32 %a
}
@ -71,6 +79,7 @@ entry:
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: {{ sb$}}
; CHECK-NEXT: .Lfunc_end
ret i32 %a
}
@ -102,3 +111,72 @@ d: ; preds = %asm.fallthrough, %entry
; SB-NEXT: {{ sb$}}
; CHECK-NEXT: .Lfunc_end
}
define dso_local i32 @indirect_call(
i32 (...)* nocapture %f1, i32 (...)* nocapture %f2) {
entry:
; CHECK-LABEL: indirect_call:
%callee.knr.cast = bitcast i32 (...)* %f1 to i32 ()*
%call = tail call i32 %callee.knr.cast()
; HARDEN: bl {{__llvm_slsblr_thunk_x[0-9]+$}}
%callee.knr.cast1 = bitcast i32 (...)* %f2 to i32 ()*
%call2 = tail call i32 %callee.knr.cast1()
; HARDEN: bl {{__llvm_slsblr_thunk_x[0-9]+$}}
%add = add nsw i32 %call2, %call
ret i32 %add
; CHECK: .Lfunc_end
}
; verify calling through a function pointer.
@a = dso_local local_unnamed_addr global i32 (...)* null, align 8
@b = dso_local local_unnamed_addr global i32 0, align 4
define dso_local void @indirect_call_global() local_unnamed_addr {
; CHECK-LABEL: indirect_call_global:
entry:
%0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @a to i32 ()**), align 8
%call = tail call i32 %0() nounwind
; HARDEN: bl {{__llvm_slsblr_thunk_x[0-9]+$}}
store i32 %call, i32* @b, align 4
ret void
; CHECK: .Lfunc_end
}
; Verify that neither x16 nor x17 are used when the BLR mitigation is enabled,
; as a linker is allowed to clobber x16 or x17 on calls, which would break the
; correct execution of the code sequence produced by the mitigation.
; The below test carefully increases register pressure to persuade code
; generation to produce a BLR x16. Yes, that is a bit fragile.
define i64 @check_x16(i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** nocapture readonly %fp, i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** nocapture readonly %fp2) "target-features"="+neon,+reserve-x10,+reserve-x11,+reserve-x12,+reserve-x13,+reserve-x14,+reserve-x15,+reserve-x18,+reserve-x20,+reserve-x21,+reserve-x22,+reserve-x23,+reserve-x24,+reserve-x25,+reserve-x26,+reserve-x27,+reserve-x28,+reserve-x30,+reserve-x9" {
entry:
; CHECK-LABEL: check_x16:
%0 = load i64 (i8*, i64, i64, i64, i64, i64, i64, i64)*, i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp, align 8
%1 = bitcast i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp2 to i8**
%2 = load i8*, i8** %1, align 8
%call = call i64 %0(i8* %2, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0)
%3 = load i64 (i8*, i64, i64, i64, i64, i64, i64, i64)*, i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp2, align 8
%4 = bitcast i64 (i8*, i64, i64, i64, i64, i64, i64, i64)** %fp to i8**
%5 = load i8*, i8** %4, align 8;, !tbaa !2
%call1 = call i64 %3(i8* %5, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0)
; NOHARDEN: blr x16
; ISBDSB-NOT: bl __llvm_slsblr_thunk_x16
; SB-NOT: bl __llvm_slsblr_thunk_x16
; CHECK
%add = add nsw i64 %call1, %call
ret i64 %add
; CHECK: .Lfunc_end
}
; HARDEN-label: __llvm_slsblr_thunk_x0:
; HARDEN: br x0
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: dsb sy
; SB-NEXT: isb
; HARDEN-NEXT: .Lfunc_end
; HARDEN-label: __llvm_slsblr_thunk_x19:
; HARDEN: br x19
; ISBDSB-NEXT: dsb sy
; ISBDSB-NEXT: isb
; SB-NEXT: dsb sy
; SB-NEXT: isb
; HARDEN-NEXT: .Lfunc_end