1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 03:23:01 +02:00
llvm-mirror/lib/Target/X86/X86.h
Lama Saba cc71753981 [X86] Reduce Store Forward Block issues in HW - Recommit after fixing Bug 36346
If a load follows a store and reloads data that the store has written to memory, Intel microarchitectures can in many cases forward the data directly from the store to the load, This "store forwarding" saves cycles by enabling the load to directly obtain the data instead of accessing the data from cache or memory.
A "store forward block" occurs in cases that a store cannot be forwarded to the load. The most typical case of store forward block on Intel Core microarchiticutre that a small store cannot be forwarded to a large load.
The estimated penalty for a store forward block is ~13 cycles.

This pass tries to recognize and handle cases where "store forward block" is created by the compiler when lowering memcpy calls to a sequence
of a load and a store.

The pass currently only handles cases where memcpy is lowered to XMM/YMM registers, it tries to break the memcpy into smaller copies.
breaking the memcpy should be possible since there is no atomicity guarantee for loads and stores to XMM/YMM.

Change-Id: Ic41aa9ade6512e0478db66e07e2fde41b4fb35f9
llvm-svn: 325128
2018-02-14 14:58:53 +00:00

125 lines
4.7 KiB
C++

//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the entry points for global functions defined in the x86
// target library, as used by the LLVM JIT.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_X86_X86_H
#define LLVM_LIB_TARGET_X86_X86_H
#include "llvm/Support/CodeGen.h"
namespace llvm {
class FunctionPass;
class ImmutablePass;
class InstructionSelector;
class ModulePass;
class PassRegistry;
class X86RegisterBankInfo;
class X86Subtarget;
class X86TargetMachine;
/// This pass converts a legalized DAG into a X86-specific DAG, ready for
/// instruction scheduling.
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
/// This pass initializes a global base register for PIC on x86-32.
FunctionPass *createX86GlobalBaseRegPass();
/// This pass combines multiple accesses to local-dynamic TLS variables so that
/// the TLS base address for the module is only fetched once per execution path
/// through the function.
FunctionPass *createCleanupLocalDynamicTLSPass();
/// This function returns a pass which converts floating-point register
/// references and pseudo instructions into floating-point stack references and
/// physical instructions.
FunctionPass *createX86FloatingPointStackifierPass();
/// This pass inserts AVX vzeroupper instructions before each call to avoid
/// transition penalty between functions encoded with AVX and SSE.
FunctionPass *createX86IssueVZeroUpperPass();
/// This pass inserts ENDBR instructions before indirect jump/call
/// destinations as part of CET IBT mechanism.
FunctionPass *createX86IndirectBranchTrackingPass();
/// Return a pass that pads short functions with NOOPs.
/// This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
/// Return a pass that selectively replaces certain instructions (like add,
/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
/// Return a pass that removes redundant LEA instructions and redundant address
/// recalculations.
FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
/// Return a pass that avoids creating store forward block issues in the hardware.
FunctionPass *createX86FixupSFB();
/// Return a pass that expands WinAlloca pseudo-instructions.
FunctionPass *createX86WinAllocaExpander();
/// Return a pass that optimizes the code-size of x86 call sequences. This is
/// done by replacing esp-relative movs with pushes.
FunctionPass *createX86CallFrameOptimization();
/// Return an IR pass that inserts EH registration stack objects and explicit
/// EH state updates. This pass must run after EH preparation, which does
/// Windows-specific but architecture-neutral preparation.
FunctionPass *createX86WinEHStatePass();
/// Return a Machine IR pass that expands X86-specific pseudo
/// instructions into a sequence of actual instructions. This pass
/// must run after prologue/epilogue insertion and before lowering
/// the MachineInstr to MC.
FunctionPass *createX86ExpandPseudoPass();
/// This pass converts X86 cmov instructions into branch when profitable.
FunctionPass *createX86CmovConverterPass();
/// Return a Machine IR pass that selectively replaces
/// certain byte and word instructions by equivalent 32 bit instructions,
/// in order to eliminate partial register usage, false dependences on
/// the upper portions of registers, and to save code size.
FunctionPass *createX86FixupBWInsts();
/// Return a Machine IR pass that reassigns instruction chains from one domain
/// to another, when profitable.
FunctionPass *createX86DomainReassignmentPass();
void initializeFixupBWInstPassPass(PassRegistry &);
/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
/// encoding when possible in order to reduce code size.
FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
FunctionPass *createX86RetpolineThunksPass();
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
void initializeEvexToVexInstPassPass(PassRegistry &);
} // End llvm namespace
#endif