From 98a8db31ebcac70ac5a666f8df4e29cda8e969b6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 29 Nov 2018 13:58:02 +0000 Subject: [PATCH] Revert r347596 "Support for inserting profile-directed cache prefetches" It causes asserts building BoringSSL. See https://crbug.com/91009#c3 for repro. This also reverts the follow-ups: Revert r347724 "Do not insert prefetches with unsupported memory operands." Revert r347606 "[X86] Add dependency from X86 to ProfileData after rL347596" Revert r347607 "Add new passes to X86 pipeline tests" llvm-svn: 347864 --- lib/Target/X86/CMakeLists.txt | 2 - lib/Target/X86/LLVMBuild.txt | 2 +- lib/Target/X86/X86.h | 7 - lib/Target/X86/X86DiscriminateMemOps.cpp | 130 --------- lib/Target/X86/X86InsertPrefetch.cpp | 253 ------------------ lib/Target/X86/X86TargetMachine.cpp | 2 - test/CodeGen/X86/O0-pipeline.ll | 2 - test/CodeGen/X86/O3-pipeline.ll | 2 - test/CodeGen/X86/discriminate-mem-ops.ll | 55 ---- test/CodeGen/X86/insert-prefetch-inline.afdo | 4 - test/CodeGen/X86/insert-prefetch-inline.ll | 76 ------ .../X86/insert-prefetch-invalid-instr.afdo | 2 - .../X86/insert-prefetch-invalid-instr.ll | 46 ---- test/CodeGen/X86/insert-prefetch-other.afdo | 3 - test/CodeGen/X86/insert-prefetch.afdo | 3 - test/CodeGen/X86/insert-prefetch.ll | 101 ------- 16 files changed, 1 insertion(+), 689 deletions(-) delete mode 100644 lib/Target/X86/X86DiscriminateMemOps.cpp delete mode 100644 lib/Target/X86/X86InsertPrefetch.cpp delete mode 100644 test/CodeGen/X86/discriminate-mem-ops.ll delete mode 100644 test/CodeGen/X86/insert-prefetch-inline.afdo delete mode 100644 test/CodeGen/X86/insert-prefetch-inline.ll delete mode 100644 test/CodeGen/X86/insert-prefetch-invalid-instr.afdo delete mode 100644 test/CodeGen/X86/insert-prefetch-invalid-instr.ll delete mode 100644 test/CodeGen/X86/insert-prefetch-other.afdo delete mode 100644 test/CodeGen/X86/insert-prefetch.afdo delete mode 100644 test/CodeGen/X86/insert-prefetch.ll diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 524b4ae53be..5ded1f971a0 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -30,7 +30,6 @@ set(sources X86CmovConversion.cpp X86CondBrFolding.cpp X86DomainReassignment.cpp - X86DiscriminateMemOps.cpp X86ExpandPseudo.cpp X86FastISel.cpp X86FixupBWInsts.cpp @@ -45,7 +44,6 @@ set(sources X86ISelLowering.cpp X86IndirectBranchTracking.cpp X86InterleavedAccess.cpp - X86InsertPrefetch.cpp X86InstrFMA3Info.cpp X86InstrFoldTables.cpp X86InstrInfo.cpp diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt index 055336baac1..2062163afb0 100644 --- a/lib/Target/X86/LLVMBuild.txt +++ b/lib/Target/X86/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = X86CodeGen parent = X86 -required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils GlobalISel ProfileData +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils GlobalISel add_to_library_groups = X86 diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index ab0cfeb9866..19f8e35ade0 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -122,13 +122,6 @@ FunctionPass *createX86EvexToVexInsts(); /// This pass creates the thunks for the retpoline feature. FunctionPass *createX86RetpolineThunksPass(); -/// This pass ensures instructions featuring a memory operand -/// have distinctive (with respect to eachother) -FunctionPass *createX86DiscriminateMemOpsPass(); - -/// This pass applies profiling information to insert cache prefetches. -FunctionPass *createX86InsertPrefetchPass(); - InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); diff --git a/lib/Target/X86/X86DiscriminateMemOps.cpp b/lib/Target/X86/X86DiscriminateMemOps.cpp deleted file mode 100644 index 5b94259246b..00000000000 --- a/lib/Target/X86/X86DiscriminateMemOps.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// This pass aids profile-driven cache prefetch insertion by ensuring all -/// instructions that have a memory operand are distinguishible from each other. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86InstrInfo.h" -#include "X86MachineFunctionInfo.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/Transforms/IPO/SampleProfile.h" -using namespace llvm; - -namespace { - -using Location = std::pair; - -Location diToLocation(const DILocation *Loc) { - return std::make_pair(Loc->getFilename(), Loc->getLine()); -} - -/// Ensure each instruction having a memory operand has a distinct pair. -void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) { - DebugLoc DL(Loc); - MI->setDebugLoc(DL); -} - -class X86DiscriminateMemOps : public MachineFunctionPass { - bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override { - return "X86 Discriminate Memory Operands"; - } - -public: - static char ID; - - /// Default construct and initialize the pass. - X86DiscriminateMemOps(); -}; - -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -char X86DiscriminateMemOps::ID = 0; - -/// Default construct and initialize the pass. -X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {} - -bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) { - DISubprogram *FDI = MF.getFunction().getSubprogram(); - if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling()) - return false; - - // Have a default DILocation, if we find instructions with memops that don't - // have any debug info. - const DILocation *ReferenceDI = - DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI); - - DenseMap MemOpDiscriminators; - MemOpDiscriminators[diToLocation(ReferenceDI)] = 0; - - // Figure out the largest discriminator issued for each Location. When we - // issue new discriminators, we can thus avoid issuing discriminators - // belonging to instructions that don't have memops. This isn't a requirement - // for the goals of this pass, however, it avoids unnecessary ambiguity. - for (auto &MBB : MF) { - for (auto &MI : MBB) { - const auto &DI = MI.getDebugLoc(); - if (!DI) - continue; - Location Loc = diToLocation(DI); - MemOpDiscriminators[Loc] = - std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator()); - } - } - - // Keep track of the discriminators seen at each Location. If an instruction's - // DebugInfo has a Location and discriminator we've already seen, replace its - // discriminator with a new one, to guarantee uniqueness. - DenseMap> Seen; - - bool Changed = false; - for (auto &MBB : MF) { - for (auto &MI : MBB) { - if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0) - continue; - const DILocation *DI = MI.getDebugLoc(); - if (!DI) { - DI = ReferenceDI; - } - DenseSet &Set = Seen[diToLocation(DI)]; - std::pair::iterator, bool> P = - Set.insert(DI->getBaseDiscriminator()); - if (!P.second) { - DI = DI->setBaseDiscriminator(++MemOpDiscriminators[diToLocation(DI)]); - updateDebugInfo(&MI, DI); - Changed = true; - *P.first = DI->getBaseDiscriminator(); - } - - // Bump the reference DI to avoid cramming discriminators on line 0. - // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI - // in a block. It's more consistent than just relying on the last memop - // instruction we happened to see. - ReferenceDI = DI; - } - } - return Changed; -} - -FunctionPass *llvm::createX86DiscriminateMemOpsPass() { - return new X86DiscriminateMemOps(); -} diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp deleted file mode 100644 index 30b46a09ef0..00000000000 --- a/lib/Target/X86/X86InsertPrefetch.cpp +++ /dev/null @@ -1,253 +0,0 @@ -//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass applies cache prefetch instructions based on a profile. The pass -// assumes DiscriminateMemOps ran immediately before, to ensure debug info -// matches the one used at profile generation time. The profile is encoded in -// afdo format (text or binary). It contains prefetch hints recommendations. -// Each recommendation is made in terms of debug info locations, a type (i.e. -// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a -// memory operand (see X86DiscriminateMemOps). The prefetch will be made for -// a location at that memory operand + the delta specified in the -// recommendation. -// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86InstrInfo.h" -#include "X86MachineFunctionInfo.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/Transforms/IPO/SampleProfile.h" -using namespace llvm; -using namespace sampleprof; - -static cl::opt - PrefetchHintsFile("prefetch-hints-file", - cl::desc("Path to the prefetch hints profile."), - cl::Hidden); -namespace { - -class X86InsertPrefetch : public MachineFunctionPass { - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool doInitialization(Module &) override; - - bool runOnMachineFunction(MachineFunction &MF) override; - struct PrefetchInfo { - unsigned InstructionID; - int64_t Delta; - }; - typedef SmallVectorImpl Prefetches; - bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI, - Prefetches &prefetches) const; - -public: - static char ID; - X86InsertPrefetch(const std::string &PrefetchHintsFilename); - StringRef getPassName() const override { - return "X86 Insert Cache Prefetches"; - } - -private: - std::string Filename; - std::unique_ptr Reader; -}; - -using PrefetchHints = SampleRecord::CallTargetMap; - -// Return any prefetching hints for the specified MachineInstruction. The hints -// are returned as pairs (name, delta). -ErrorOr getPrefetchHints(const FunctionSamples *TopSamples, - const MachineInstr &MI) { - if (const auto &Loc = MI.getDebugLoc()) - if (const auto *Samples = TopSamples->findFunctionSamples(Loc)) - return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc), - Loc->getBaseDiscriminator()); - return std::error_code(); -} - -// The prefetch instruction can't take memory operands involving vector -// registers. -bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { - unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); - unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); - return (BaseReg == 0 || - X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && - (IndexReg == 0 || - X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)); -} - -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// Implementation -//===----------------------------------------------------------------------===// - -char X86InsertPrefetch::ID = 0; - -X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename) - : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {} - -/// Return true if the provided MachineInstruction has cache prefetch hints. In -/// that case, the prefetch hints are stored, in order, in the Prefetches -/// vector. -bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, - const MachineInstr &MI, - Prefetches &Prefetches) const { - assert(Prefetches.empty() && - "Expected caller passed empty PrefetchInfo vector."); - static const std::pair HintTypes[] = { - {"_nta_", X86::PREFETCHNTA}, - {"_t0_", X86::PREFETCHT0}, - {"_t1_", X86::PREFETCHT1}, - {"_t2_", X86::PREFETCHT2}, - }; - static const char *SerializedPrefetchPrefix = "__prefetch"; - - const ErrorOr T = getPrefetchHints(TopSamples, MI); - if (!T) - return false; - int16_t max_index = -1; - // Convert serialized prefetch hints into PrefetchInfo objects, and populate - // the Prefetches vector. - for (const auto &S_V : *T) { - StringRef Name = S_V.getKey(); - if (Name.consume_front(SerializedPrefetchPrefix)) { - int64_t D = static_cast(S_V.second); - unsigned IID = 0; - for (const auto &HintType : HintTypes) { - if (Name.startswith(HintType.first)) { - Name = Name.drop_front(HintType.first.size()); - IID = HintType.second; - break; - } - } - if (IID == 0) - return false; - uint8_t index = 0; - Name.consumeInteger(10, index); - - if (index >= Prefetches.size()) - Prefetches.resize(index + 1); - Prefetches[index] = {IID, D}; - max_index = std::max(max_index, static_cast(index)); - } - } - assert(max_index + 1 >= 0 && - "Possible overflow: max_index + 1 should be positive."); - assert(static_cast(max_index + 1) == Prefetches.size() && - "The number of prefetch hints received should match the number of " - "PrefetchInfo objects returned"); - return !Prefetches.empty(); -} - -bool X86InsertPrefetch::doInitialization(Module &M) { - if (Filename.empty()) - return false; - - LLVMContext &Ctx = M.getContext(); - ErrorOr> ReaderOrErr = - SampleProfileReader::create(Filename, Ctx); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg, - DiagnosticSeverity::DS_Warning)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - Reader->read(); - return true; -} - -void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); -} - -bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { - if (!Reader) - return false; - const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction()); - if (!Samples) - return false; - - bool Changed = false; - - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - SmallVector Prefetches; - for (auto &MBB : MF) { - for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) { - auto Current = MI; - ++MI; - - int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags); - if (Offset < 0) - continue; - unsigned Bias = X86II::getOperandBias(Current->getDesc()); - int MemOpOffset = Offset + Bias; - // FIXME(mtrofin): ORE message when the recommendation cannot be taken. - if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset)) - continue; - Prefetches.clear(); - if (!findPrefetchInfo(Samples, *Current, Prefetches)) - continue; - assert(!Prefetches.empty() && - "The Prefetches vector should contain at least a value if " - "findPrefetchInfo returned true."); - for (auto &PrefInfo : Prefetches) { - unsigned PFetchInstrID = PrefInfo.InstructionID; - int64_t Delta = PrefInfo.Delta; - const MCInstrDesc &Desc = TII->get(PFetchInstrID); - MachineInstr *PFetch = - MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true); - MachineInstrBuilder MIB(MF, PFetch); - - assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && - X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && - X86::AddrSegmentReg == 4 && - "Unexpected change in X86 operand offset order."); - - // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc. - // FIXME(mtrofin): consider adding a: - // MachineInstrBuilder::set(unsigned offset, op). - MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg()) - .addImm( - Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm()) - .addReg( - Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg()) - .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() + - Delta) - .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg) - .getReg()); - - if (!Current->memoperands_empty()) { - MachineMemOperand *CurrentOp = *(Current->memoperands_begin()); - MIB.addMemOperand(MF.getMachineMemOperand( - CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize())); - } - - // Insert before Current. This is because Current may clobber some of - // the registers used to describe the input memory operand. - MBB.insert(Current, PFetch); - Changed = true; - } - } - } - return Changed; -} - -FunctionPass *llvm::createX86InsertPrefetchPass() { - return new X86InsertPrefetch(PrefetchHintsFile); -} diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index c2b5a6ebe11..3ef020744db 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -497,8 +497,6 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupLEAs()); addPass(createX86EvexToVexInsts()); } - addPass(createX86DiscriminateMemOpsPass()); - addPass(createX86InsertPrefetchPass()); } void X86PassConfig::addPreEmitPass2() { diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll index 67b1aceecf6..d9a093b8c59 100644 --- a/test/CodeGen/X86/O0-pipeline.ll +++ b/test/CodeGen/X86/O0-pipeline.ll @@ -58,8 +58,6 @@ ; CHECK-NEXT: Shadow Call Stack ; CHECK-NEXT: X86 Indirect Branch Tracking ; CHECK-NEXT: X86 vzeroupper inserter -; CHECK-NEXT: X86 Discriminate Memory Operands -; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/test/CodeGen/X86/O3-pipeline.ll b/test/CodeGen/X86/O3-pipeline.ll index 224c435ebd1..68f16b31a16 100644 --- a/test/CodeGen/X86/O3-pipeline.ll +++ b/test/CodeGen/X86/O3-pipeline.ll @@ -159,8 +159,6 @@ ; CHECK-NEXT: X86 Atom pad short functions ; CHECK-NEXT: X86 LEA Fixup ; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible -; CHECK-NEXT: X86 Discriminate Memory Operands -; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/test/CodeGen/X86/discriminate-mem-ops.ll b/test/CodeGen/X86/discriminate-mem-ops.ll deleted file mode 100644 index b77a91fafd2..00000000000 --- a/test/CodeGen/X86/discriminate-mem-ops.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc < %s | FileCheck %s -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind readonly uwtable -define i32 @sum(i32* %arr, i32 %pos1, i32 %pos2) !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !9 - %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom, !dbg !9 - %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !10 - %idxprom1 = sext i32 %pos2 to i64, !dbg !14 - %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1, !dbg !14 - %1 = load i32, i32* %arrayidx2, align 4, !dbg !14, !tbaa !10 - %add = add nsw i32 %1, %0, !dbg !15 - ret i32 %add, !dbg !16 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 10, scope: !7) -!10 = !{!11, !11, i64 0} -!11 = !{!"int", !12, i64 0} -!12 = !{!"omnipotent char", !13, i64 0} -!13 = !{!"Simple C++ TBAA"} -!14 = !DILocation(line: 2, column: 22, scope: !7) -!15 = !DILocation(line: 2, column: 20, scope: !7) -!16 = !DILocation(line: 2, column: 3, scope: !7) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 2 20 discriminator 2 # test.cc:2:20 -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 2 3 # test.cc:2:3 diff --git a/test/CodeGen/X86/insert-prefetch-inline.afdo b/test/CodeGen/X86/insert-prefetch-inline.afdo deleted file mode 100644 index 83b30f6e210..00000000000 --- a/test/CodeGen/X86/insert-prefetch-inline.afdo +++ /dev/null @@ -1,4 +0,0 @@ -caller:0:0 - 2:sum:0 - 3: 0 __prefetch_nta_0:23456 - 3.1: 0 __prefetch_nta_0:8764 __prefetch_nta_1:64 \ No newline at end of file diff --git a/test/CodeGen/X86/insert-prefetch-inline.ll b/test/CodeGen/X86/insert-prefetch-inline.ll deleted file mode 100644 index 5f8373f9480..00000000000 --- a/test/CodeGen/X86/insert-prefetch-inline.ll +++ /dev/null @@ -1,76 +0,0 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s -; -; Verify we can insert prefetch instructions in code belonging to inlined -; functions. -; -; ModuleID = 'test.cc' - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind readonly uwtable -define dso_local i32 @sum(i32* nocapture readonly %arr, i32 %pos1, i32 %pos2) local_unnamed_addr #0 !dbg !7 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !10 - %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom, !dbg !10 - %0 = load i32, i32* %arrayidx, align 4, !dbg !10, !tbaa !11 - %idxprom1 = sext i32 %pos2 to i64, !dbg !15 - %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1, !dbg !15 - %1 = load i32, i32* %arrayidx2, align 4, !dbg !15, !tbaa !11 - %add = add nsw i32 %1, %0, !dbg !16 - ret i32 %add, !dbg !17 -} - -; "caller" inlines "sum". The associated .afdo file references instructions -; in "caller" that came from "sum"'s inlining. -; -; Function Attrs: norecurse nounwind readonly uwtable -define dso_local i32 @caller(i32* nocapture readonly %arr) local_unnamed_addr #0 !dbg !18 { -entry: - %0 = load i32, i32* %arr, align 4, !dbg !19, !tbaa !11 - %arrayidx2.i = getelementptr inbounds i32, i32* %arr, i64 2, !dbg !21 - %1 = load i32, i32* %arrayidx2.i, align 4, !dbg !21, !tbaa !11 - %add.i = add nsw i32 %1, %0, !dbg !22 - ret i32 %add.i, !dbg !23 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)"} -!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !8, file: !8, line: 3, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DIFile(filename: "./test.h", directory: "/tmp") -!9 = !DISubroutineType(types: !2) -!10 = !DILocation(line: 6, column: 10, scope: !7) -!11 = !{!12, !12, i64 0} -!12 = !{!"int", !13, i64 0} -!13 = !{!"omnipotent char", !14, i64 0} -!14 = !{!"Simple C++ TBAA"} -!15 = !DILocation(line: 6, column: 22, scope: !7) -!16 = !DILocation(line: 6, column: 20, scope: !7) -!17 = !DILocation(line: 6, column: 3, scope: !7) -!18 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 4, type: !9, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!19 = !DILocation(line: 6, column: 10, scope: !7, inlinedAt: !20) -!20 = distinct !DILocation(line: 6, column: 10, scope: !18) -!21 = !DILocation(line: 6, column: 22, scope: !7, inlinedAt: !20) -!22 = !DILocation(line: 6, column: 20, scope: !7, inlinedAt: !20) -!23 = !DILocation(line: 6, column: 3, scope: !18) - -; CHECK-LABEL: caller: -; CHECK-LABEL: # %bb.0: -; CHECK-NEXT: .loc 1 6 22 prologue_end -; CHECK-NEXT: prefetchnta 23464(%rdi) -; CHECK-NEXT: movl 8(%rdi), %eax -; CHECK-NEXT: .loc 1 6 20 is_stmt 0 discriminator 2 -; CHECK-NEXT: prefetchnta 8764(%rdi) -; CHECK-NEXT: prefetchnta 64(%rdi) -; CHECK-NEXT: addl (%rdi), %eax diff --git a/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo b/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo deleted file mode 100644 index 6385a498b8f..00000000000 --- a/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo +++ /dev/null @@ -1,2 +0,0 @@ -main:0:0 - 6: 0 __prefetch_nta_0:42 \ No newline at end of file diff --git a/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/test/CodeGen/X86/insert-prefetch-invalid-instr.ll deleted file mode 100644 index 004fb56a56e..00000000000 --- a/test/CodeGen/X86/insert-prefetch-invalid-instr.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s -; ModuleID = 'prefetch.cc' -source_filename = "prefetch.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { -entry: - tail call void @llvm.prefetch(i8* inttoptr (i64 291 to i8*), i32 0, i32 0, i32 1), !dbg !9 - tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, i8* null, i32 1, i32 2), !dbg !10 - ret i32 291, !dbg !11 -} - -; Function Attrs: inaccessiblemem_or_argmemonly nounwind -declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) #1 - -; Function Attrs: argmemonly nounwind -declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, i8*, i32, i32) #2 - -attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"} -attributes #1 = { inaccessiblemem_or_argmemonly nounwind } -attributes #2 = { argmemonly nounwind } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "prefetch.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 327078) (llvm/trunk 327086)"} -!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 12, column: 3, scope: !7) -!10 = !DILocation(line: 14, column: 3, scope: !7) -!11 = !DILocation(line: 15, column: 3, scope: !7) - -;CHECK-LABEL: main: -;CHECK: # %bb.0: -;CHECK: prefetchnta 291 -;CHECK-NOT: prefetchnta 42(%rax,%ymm0) -;CHECK: vgatherpf1dpd (%rax,%ymm0) {%k1} diff --git a/test/CodeGen/X86/insert-prefetch-other.afdo b/test/CodeGen/X86/insert-prefetch-other.afdo deleted file mode 100644 index 783da34f7f8..00000000000 --- a/test/CodeGen/X86/insert-prefetch-other.afdo +++ /dev/null @@ -1,3 +0,0 @@ -sum:0:0 - 1: 0 __prefetch_t0_1:0 __prefetch_t2_0:42 - 1.1: 0 __prefetch_t1_0:18446744073709551615 diff --git a/test/CodeGen/X86/insert-prefetch.afdo b/test/CodeGen/X86/insert-prefetch.afdo deleted file mode 100644 index 96487e85eaa..00000000000 --- a/test/CodeGen/X86/insert-prefetch.afdo +++ /dev/null @@ -1,3 +0,0 @@ -sum:0:0 - 1: 0 __prefetch_nta_1:0 __prefetch_nta_0:42 - 1.1: 0 __prefetch_nta_0:18446744073709551615 diff --git a/test/CodeGen/X86/insert-prefetch.ll b/test/CodeGen/X86/insert-prefetch.ll deleted file mode 100644 index 9e77772df77..00000000000 --- a/test/CodeGen/X86/insert-prefetch.ll +++ /dev/null @@ -1,101 +0,0 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS -; -; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling: -; int sum(int* arr, int pos1, int pos2) { -; return arr[pos1] + arr[pos2]; -; } -; -; NOTE: debug line numbers were adjusted such that the function would start -; at line 15 (an arbitrary number). The sample profile file format uses -; offsets from the start of the symbol instead of file-relative line numbers. -; The .afdo file reflects that - the instructions are offset '1'. -; -; ModuleID = 'test.cc' -source_filename = "test.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @sum(i32* %arr, i32 %pos1, i32 %pos2) !dbg !35 !prof !37 { -entry: - %idxprom = sext i32 %pos1 to i64, !dbg !38 - %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom, !dbg !38 - %0 = load i32, i32* %arrayidx, align 4, !dbg !38, !tbaa !39 - %idxprom1 = sext i32 %pos2 to i64, !dbg !43 - %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1, !dbg !43 - %1 = load i32, i32* %arrayidx2, align 4, !dbg !43, !tbaa !39 - %add = add nsw i32 %1, %0, !dbg !44 - ret i32 %add, !dbg !45 -} - -attributes #0 = { "target-cpu"="x86-64" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!33} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "test.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{i32 1, !"ProfileSummary", !7} -!7 = !{!8, !9, !10, !11, !12, !13, !14, !15} -!8 = !{!"ProfileFormat", !"SampleProfile"} -!9 = !{!"TotalCount", i64 0} -!10 = !{!"MaxCount", i64 0} -!11 = !{!"MaxInternalCount", i64 0} -!12 = !{!"MaxFunctionCount", i64 0} -!13 = !{!"NumCounts", i64 2} -!14 = !{!"NumFunctions", i64 1} -!15 = !{!"DetailedSummary", !16} -!16 = !{!17, !18, !19, !20, !21, !22, !22, !23, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32} -!17 = !{i32 10000, i64 0, i32 0} -!18 = !{i32 100000, i64 0, i32 0} -!19 = !{i32 200000, i64 0, i32 0} -!20 = !{i32 300000, i64 0, i32 0} -!21 = !{i32 400000, i64 0, i32 0} -!22 = !{i32 500000, i64 0, i32 0} -!23 = !{i32 600000, i64 0, i32 0} -!24 = !{i32 700000, i64 0, i32 0} -!25 = !{i32 800000, i64 0, i32 0} -!26 = !{i32 900000, i64 0, i32 0} -!27 = !{i32 950000, i64 0, i32 0} -!28 = !{i32 990000, i64 0, i32 0} -!29 = !{i32 999000, i64 0, i32 0} -!30 = !{i32 999900, i64 0, i32 0} -!31 = !{i32 999990, i64 0, i32 0} -!32 = !{i32 999999, i64 0, i32 0} -!33 = !{!"clang version 7.0.0 (trunk 322593) (llvm/trunk 322526)"} -!35 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 15, type: !36, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!36 = !DISubroutineType(types: !2) -!37 = !{!"function_entry_count", i64 -1} -!38 = !DILocation(line: 16, column: 10, scope: !35) -!39 = !{!40, !40, i64 0} -!40 = !{!"int", !41, i64 0} -!41 = !{!"omnipotent char", !42, i64 0} -!42 = !{!"Simple C++ TBAA"} -!43 = !DILocation(line: 16, column: 22, scope: !35) -!44 = !DILocation(line: 16, column: 20, scope: !35) -!45 = !DILocation(line: 16, column: 3, scope: !35) - -;CHECK-LABEL: sum: -;CHECK: # %bb.0: -;CHECK: prefetchnta 42(%rdi,%rax,4) -;CHECK-NEXT: prefetchnta (%rdi,%rax,4) -;CHECK-NEXT: movl (%rdi,%rax,4), %eax -;CHECK-NEXT: .loc 1 16 20 discriminator 2 # test.cc:16:20 -;CHECK-NEXT: prefetchnta -1(%rdi,%rcx,4) -;CHECK-NEXT: addl (%rdi,%rcx,4), %eax -;CHECK-NEXT: .loc 1 16 3 # test.cc:16:3 - -;OTHERS-LABEL: sum: -;OTHERS: # %bb.0: -;OTHERS: prefetcht2 42(%rdi,%rax,4) -;OTHERS-NEXT: prefetcht0 (%rdi,%rax,4) -;OTHERS-NEXT: movl (%rdi,%rax,4), %eax -;OTHERS-NEXT: .loc 1 16 20 discriminator 2 # test.cc:16:20 -;OTHERS-NEXT: prefetcht1 -1(%rdi,%rcx,4) -;OTHERS-NEXT: addl (%rdi,%rcx,4), %eax -;OTHERS-NEXT: .loc 1 16 3 # test.cc:16:3