//===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass abstracted struct/union member accesses in order to support // compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program // which can run on different kernels. In particular, if bpf program tries to // access a particular kernel data structure member, the details of the // intermediate member access will be remembered so bpf loader can do // necessary adjustment right before program loading. // // For example, // // struct s { // int a; // int b; // }; // struct t { // struct s c; // int d; // }; // struct t e; // // For the member access e.c.b, the compiler will generate code // &e + 4 // // The compile-once run-everywhere instead generates the following code // r = 4 // &e + r // The "4" in "r = 4" can be changed based on a particular kernel version. // For example, on a particular kernel version, if struct s is changed to // // struct s { // int new_field; // int a; // int b; // } // // By repeating the member access on the host, the bpf loader can // adjust "r = 4" as "r = 8". // // This feature relies on the following three intrinsic calls: // addr = preserve_array_access_index(base, dimension, index) // addr = preserve_union_access_index(base, di_index) // !llvm.preserve.access.index // addr = preserve_struct_access_index(base, gep_index, di_index) // !llvm.preserve.access.index // //===----------------------------------------------------------------------===// #include "BPF.h" #include "BPFCORE.h" #include "BPFTargetMachine.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #define DEBUG_TYPE "bpf-abstract-member-access" namespace llvm { const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama"; const std::string BPFCoreSharedInfo::PatchableExtSecName = ".BPF.patchable_externs"; } // namespace llvm using namespace llvm; namespace { class BPFAbstractMemberAccess final : public ModulePass { StringRef getPassName() const override { return "BPF Abstract Member Access"; } bool runOnModule(Module &M) override; public: static char ID; BPFAbstractMemberAccess() : ModulePass(ID) {} private: enum : uint32_t { BPFPreserveArrayAI = 1, BPFPreserveUnionAI = 2, BPFPreserveStructAI = 3, }; std::map GEPGlobals; // A map to link preserve_*_access_index instrinsic calls. std::map> AIChain; // A map to hold all the base preserve_*_access_index instrinsic calls. // The base call is not an input of any other preserve_*_access_index // intrinsics. std::map BaseAICalls; bool doTransformation(Module &M); void traceAICall(CallInst *Call, uint32_t Kind, const MDNode *ParentMeta, uint32_t ParentAI); void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind, const MDNode *ParentMeta, uint32_t ParentAI); void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind, const MDNode *ParentMeta, uint32_t ParentAI); void collectAICallChains(Module &M, Function &F); bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind, const MDNode *&TypeMeta, uint32_t &AccessIndex); bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI, const MDNode *ChildMeta); bool removePreserveAccessIndexIntrinsic(Module &M); void replaceWithGEP(std::vector &CallList, uint32_t NumOfZerosIndex, uint32_t DIIndex); Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, uint32_t Kind, MDNode *&BaseMeta); bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); }; } // End anonymous namespace char BPFAbstractMemberAccess::ID = 0; INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, "abstracting struct/union member accessees", false, false) ModulePass *llvm::createBPFAbstractMemberAccess() { return new BPFAbstractMemberAccess(); } bool BPFAbstractMemberAccess::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); // Bail out if no debug info. if (empty(M.debug_compile_units())) return false; return doTransformation(M); } static bool SkipDIDerivedTag(unsigned Tag) { if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_member) return false; return true; } static DIType * stripQualifiers(DIType *Ty) { while (auto *DTy = dyn_cast(Ty)) { if (!SkipDIDerivedTag(DTy->getTag())) break; Ty = DTy->getBaseType(); } return Ty; } static const DIType * stripQualifiers(const DIType *Ty) { while (auto *DTy = dyn_cast(Ty)) { if (!SkipDIDerivedTag(DTy->getTag())) break; Ty = DTy->getBaseType(); } return Ty; } static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) { DINodeArray Elements = CTy->getElements(); uint32_t DimSize = 1; for (uint32_t I = StartDim; I < Elements.size(); ++I) { if (auto *Element = dyn_cast_or_null(Elements[I])) if (Element->getTag() == dwarf::DW_TAG_subrange_type) { const DISubrange *SR = cast(Element); auto *CI = SR->getCount().dyn_cast(); DimSize *= CI->getSExtValue(); } } return DimSize; } /// Check whether a call is a preserve_*_access_index intrinsic call or not. bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind, const MDNode *&TypeMeta, uint32_t &AccessIndex) { if (!Call) return false; const auto *GV = dyn_cast(Call->getCalledValue()); if (!GV) return false; if (GV->getName().startswith("llvm.preserve.array.access.index")) { Kind = BPFPreserveArrayAI; TypeMeta = Call->getMetadata(LLVMContext::MD_preserve_access_index); if (!TypeMeta) report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic"); AccessIndex = cast(Call->getArgOperand(2)) ->getZExtValue(); return true; } if (GV->getName().startswith("llvm.preserve.union.access.index")) { Kind = BPFPreserveUnionAI; TypeMeta = Call->getMetadata(LLVMContext::MD_preserve_access_index); if (!TypeMeta) report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic"); AccessIndex = cast(Call->getArgOperand(1)) ->getZExtValue(); return true; } if (GV->getName().startswith("llvm.preserve.struct.access.index")) { Kind = BPFPreserveStructAI; TypeMeta = Call->getMetadata(LLVMContext::MD_preserve_access_index); if (!TypeMeta) report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic"); AccessIndex = cast(Call->getArgOperand(2)) ->getZExtValue(); return true; } return false; } void BPFAbstractMemberAccess::replaceWithGEP(std::vector &CallList, uint32_t DimensionIndex, uint32_t GEPIndex) { for (auto Call : CallList) { uint32_t Dimension = 1; if (DimensionIndex > 0) Dimension = cast(Call->getArgOperand(DimensionIndex)) ->getZExtValue(); Constant *Zero = ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0); SmallVector IdxList; for (unsigned I = 0; I < Dimension; ++I) IdxList.push_back(Zero); IdxList.push_back(Call->getArgOperand(GEPIndex)); auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0), IdxList, "", Call); Call->replaceAllUsesWith(GEP); Call->eraseFromParent(); } } bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { std::vector PreserveArrayIndexCalls; std::vector PreserveUnionIndexCalls; std::vector PreserveStructIndexCalls; bool Found = false; for (Function &F : M) for (auto &BB : F) for (auto &I : BB) { auto *Call = dyn_cast(&I); uint32_t Kind; const MDNode *TypeMeta; uint32_t AccessIndex; if (!IsPreserveDIAccessIndexCall(Call, Kind, TypeMeta, AccessIndex)) continue; Found = true; if (Kind == BPFPreserveArrayAI) PreserveArrayIndexCalls.push_back(Call); else if (Kind == BPFPreserveUnionAI) PreserveUnionIndexCalls.push_back(Call); else PreserveStructIndexCalls.push_back(Call); } // do the following transformation: // . addr = preserve_array_access_index(base, dimension, index) // is transformed to // addr = GEP(base, dimenion's zero's, index) // . addr = preserve_union_access_index(base, di_index) // is transformed to // addr = base, i.e., all usages of "addr" are replaced by "base". // . addr = preserve_struct_access_index(base, gep_index, di_index) // is transformed to // addr = GEP(base, 0, gep_index) replaceWithGEP(PreserveArrayIndexCalls, 1, 2); replaceWithGEP(PreserveStructIndexCalls, 0, 1); for (auto Call : PreserveUnionIndexCalls) { Call->replaceAllUsesWith(Call->getArgOperand(0)); Call->eraseFromParent(); } return Found; } /// Check whether the access index chain is valid. We check /// here because there may be type casts between two /// access indexes. We want to ensure memory access still valid. bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType, uint32_t ParentAI, const MDNode *ChildType) { const DIType *PType = stripQualifiers(cast(ParentType)); const DIType *CType = stripQualifiers(cast(ChildType)); // Child is a derived/pointer type, which is due to type casting. // Pointer type cannot be in the middle of chain. if (isa(CType)) return false; // Parent is a pointer type. if (const auto *PtrTy = dyn_cast(PType)) { if (PtrTy->getTag() != dwarf::DW_TAG_pointer_type) return false; return stripQualifiers(PtrTy->getBaseType()) == CType; } // Otherwise, struct/union/array types const auto *PTy = dyn_cast(PType); const auto *CTy = dyn_cast(CType); assert(PTy && CTy && "ParentType or ChildType is null or not composite"); uint32_t PTyTag = PTy->getTag(); assert(PTyTag == dwarf::DW_TAG_array_type || PTyTag == dwarf::DW_TAG_structure_type || PTyTag == dwarf::DW_TAG_union_type); uint32_t CTyTag = CTy->getTag(); assert(CTyTag == dwarf::DW_TAG_array_type || CTyTag == dwarf::DW_TAG_structure_type || CTyTag == dwarf::DW_TAG_union_type); // Multi dimensional arrays, base element should be the same if (PTyTag == dwarf::DW_TAG_array_type && PTyTag == CTyTag) return PTy->getBaseType() == CTy->getBaseType(); DIType *Ty; if (PTyTag == dwarf::DW_TAG_array_type) Ty = PTy->getBaseType(); else Ty = dyn_cast(PTy->getElements()[ParentAI]); return dyn_cast(stripQualifiers(Ty)) == CTy; } void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind, const MDNode *ParentMeta, uint32_t ParentAI) { for (User *U : Call->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { traceBitCast(BI, Call, Kind, ParentMeta, ParentAI); } else if (auto *CI = dyn_cast(Inst)) { uint32_t CIKind; const MDNode *ChildMeta; uint32_t ChildAI; if (IsPreserveDIAccessIndexCall(CI, CIKind, ChildMeta, ChildAI) && IsValidAIChain(ParentMeta, ParentAI, ChildMeta)) { AIChain[CI] = std::make_pair(Call, Kind); traceAICall(CI, CIKind, ChildMeta, ChildAI); } else { BaseAICalls[Call] = Kind; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) traceGEP(GI, Call, Kind, ParentMeta, ParentAI); else BaseAICalls[Call] = Kind; } else { BaseAICalls[Call] = Kind; } } } void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind, const MDNode *ParentMeta, uint32_t ParentAI) { for (User *U : BitCast->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { traceBitCast(BI, Parent, Kind, ParentMeta, ParentAI); } else if (auto *CI = dyn_cast(Inst)) { uint32_t CIKind; const MDNode *ChildMeta; uint32_t ChildAI; if (IsPreserveDIAccessIndexCall(CI, CIKind, ChildMeta, ChildAI) && IsValidAIChain(ParentMeta, ParentAI, ChildMeta)) { AIChain[CI] = std::make_pair(Parent, Kind); traceAICall(CI, CIKind, ChildMeta, ChildAI); } else { BaseAICalls[Parent] = Kind; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) traceGEP(GI, Parent, Kind, ParentMeta, ParentAI); else BaseAICalls[Parent] = Kind; } else { BaseAICalls[Parent] = Kind; } } } void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind, const MDNode *ParentMeta, uint32_t ParentAI) { for (User *U : GEP->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { traceBitCast(BI, Parent, Kind, ParentMeta, ParentAI); } else if (auto *CI = dyn_cast(Inst)) { uint32_t CIKind; const MDNode *ChildMeta; uint32_t ChildAI; if (IsPreserveDIAccessIndexCall(CI, CIKind, ChildMeta, ChildAI) && IsValidAIChain(ParentMeta, ParentAI, ChildMeta)) { AIChain[CI] = std::make_pair(Parent, Kind); traceAICall(CI, CIKind, ChildMeta, ChildAI); } else { BaseAICalls[Parent] = Kind; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) traceGEP(GI, Parent, Kind, ParentMeta, ParentAI); else BaseAICalls[Parent] = Kind; } else { BaseAICalls[Parent] = Kind; } } } void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) { AIChain.clear(); BaseAICalls.clear(); for (auto &BB : F) for (auto &I : BB) { uint32_t Kind; const MDNode *TypeMeta; uint32_t AccessIndex; auto *Call = dyn_cast(&I); if (!IsPreserveDIAccessIndexCall(Call, Kind, TypeMeta, AccessIndex) || AIChain.find(Call) != AIChain.end()) continue; traceAICall(Call, Kind, TypeMeta, AccessIndex); } } /// Get access index from the preserve_*_access_index intrinsic calls. bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex) { const ConstantInt *CV = dyn_cast(IndexValue); if (!CV) return false; AccessIndex = CV->getValue().getZExtValue(); return true; } /// Compute the base of the whole preserve_*_access_index chains, i.e., the base /// pointer of the first preserve_*_access_index call, and construct the access /// string, which will be the name of a global variable. Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, uint32_t Kind, MDNode *&TypeMeta) { Value *Base = nullptr; std::string TypeName; std::stack> CallStack; // Put the access chain into a stack with the top as the head of the chain. while (Call) { CallStack.push(std::make_pair(Call, Kind)); Kind = AIChain[Call].second; Call = AIChain[Call].first; } // The access offset from the base of the head of chain is also // calculated here as all debuginfo types are available. // Get type name and calculate the first index. // We only want to get type name from structure or union. // If user wants a relocation like // int *p; ... __builtin_preserve_access_index(&p[4]) ... // or // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ... // we will skip them. uint32_t FirstIndex = 0; uint32_t AccessOffset = 0; while (CallStack.size()) { auto StackElem = CallStack.top(); Call = StackElem.first; Kind = StackElem.second; if (!Base) Base = Call->getArgOperand(0); MDNode *MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = stripQualifiers(cast(MDN)); if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) { // struct or union type TypeName = Ty->getName(); TypeMeta = Ty; AccessOffset += FirstIndex * Ty->getSizeInBits() >> 3; break; } // Array entries will always be consumed for accumulative initial index. CallStack.pop(); // BPFPreserveArrayAI uint64_t AccessIndex; if (!getAccessIndex(Call->getArgOperand(2), AccessIndex)) return nullptr; DIType *BaseTy = nullptr; bool CheckElemType = false; if (const auto *CTy = dyn_cast(Ty)) { // array type assert(CTy->getTag() == dwarf::DW_TAG_array_type); FirstIndex += AccessIndex * calcArraySize(CTy, 1); BaseTy = stripQualifiers(CTy->getBaseType()); CheckElemType = CTy->getElements().size() == 1; } else { // pointer type auto *DTy = cast(Ty); assert(DTy->getTag() == dwarf::DW_TAG_pointer_type); BaseTy = stripQualifiers(DTy->getBaseType()); CTy = dyn_cast(BaseTy); if (!CTy) { CheckElemType = true; } else if (CTy->getTag() != dwarf::DW_TAG_array_type) { FirstIndex += AccessIndex; CheckElemType = true; } else { FirstIndex += AccessIndex * calcArraySize(CTy, 0); } } if (CheckElemType) { auto *CTy = dyn_cast(BaseTy); if (!CTy) return nullptr; unsigned CTag = CTy->getTag(); if (CTag != dwarf::DW_TAG_structure_type && CTag != dwarf::DW_TAG_union_type) return nullptr; else TypeName = CTy->getName(); TypeMeta = CTy; AccessOffset += FirstIndex * CTy->getSizeInBits() >> 3; break; } } assert(TypeName.size()); AccessKey += std::to_string(FirstIndex); // Traverse the rest of access chain to complete offset calculation // and access key construction. while (CallStack.size()) { auto StackElem = CallStack.top(); Call = StackElem.first; Kind = StackElem.second; CallStack.pop(); // Access Index uint64_t AccessIndex; uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2; if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex)) return nullptr; AccessKey += ":" + std::to_string(AccessIndex); MDNode *MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); // At this stage, it cannot be pointer type. auto *CTy = cast(stripQualifiers(cast(MDN))); uint32_t Tag = CTy->getTag(); if (Tag == dwarf::DW_TAG_structure_type) { auto *MemberTy = cast(CTy->getElements()[AccessIndex]); AccessOffset += MemberTy->getOffsetInBits() >> 3; } else if (Tag == dwarf::DW_TAG_array_type) { auto *EltTy = stripQualifiers(CTy->getBaseType()); AccessOffset += AccessIndex * calcArraySize(CTy, 1) * EltTy->getSizeInBits() >> 3; } } // Access key is the type name + access string, uniquely identifying // one kernel memory access. AccessKey = TypeName + ":" + std::to_string(AccessOffset) + "$" + AccessKey; return Base; } /// Call/Kind is the base preserve_*_access_index() call. Attempts to do /// transformation to a chain of relocable GEPs. bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, uint32_t Kind) { std::string AccessKey; MDNode *TypeMeta; Value *Base = computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta); if (!Base) return false; // Do the transformation // For any original GEP Call and Base %2 like // %4 = bitcast %struct.net_device** %dev1 to i64* // it is transformed to: // %6 = load sk_buff:50:$0:0:0:2:0 // %7 = bitcast %struct.sk_buff* %2 to i8* // %8 = getelementptr i8, i8* %7, %6 // %9 = bitcast i8* %8 to i64* // using %9 instead of %4 // The original Call inst is removed. BasicBlock *BB = Call->getParent(); GlobalVariable *GV; if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, GlobalVariable::ExternalLinkage, NULL, AccessKey); GV->addAttribute(BPFCoreSharedInfo::AmaAttr); GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); GEPGlobals[AccessKey] = GV; } else { GV = GEPGlobals[AccessKey]; } // Load the global variable. auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); BB->getInstList().insert(Call->getIterator(), LDInst); // Generate a BitCast auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext())); BB->getInstList().insert(Call->getIterator(), BCInst); // Generate a GetElementPtr auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()), BCInst, LDInst); BB->getInstList().insert(Call->getIterator(), GEP); // Generate a BitCast auto *BCInst2 = new BitCastInst(GEP, Call->getType()); BB->getInstList().insert(Call->getIterator(), BCInst2); Call->replaceAllUsesWith(BCInst2); Call->eraseFromParent(); return true; } bool BPFAbstractMemberAccess::doTransformation(Module &M) { bool Transformed = false; for (Function &F : M) { // Collect PreserveDIAccessIndex Intrinsic call chains. // The call chains will be used to generate the access // patterns similar to GEP. collectAICallChains(M, F); for (auto &C : BaseAICalls) Transformed = transformGEPChain(M, C.first, C.second) || Transformed; } return removePreserveAccessIndexIntrinsic(M) || Transformed; }