mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
InstCombine rule to fold trunc when value available
Summary: This instcombine rule folds away trunc operations that have value available from a prior load or store. This kind of code can be generated as a result of GVN widening the load or from source code as well. Reviewers: reames, majnemer, sanjoy Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D21246 llvm-svn: 273608
This commit is contained in:
parent
139759e7d7
commit
7e0ac12473
@ -61,6 +61,38 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
|
||||
/// to scan in the block, used by FindAvailableLoadedValue().
|
||||
extern cl::opt<unsigned> DefMaxInstsToScan;
|
||||
|
||||
/// Scan the ScanBB block backwards checking to see if we have the value at
|
||||
/// the memory address \p Ptr of type \p AccessTy locally available within a
|
||||
/// small number of instructions. If the value is available, return it.
|
||||
///
|
||||
/// If not, return the iterator for the last validated instruction that the
|
||||
/// value would be live through. If we scanned the entire block and didn't
|
||||
/// find something that invalidates *Ptr or provides it, ScanFrom would be
|
||||
/// left at begin() and this returns null. ScanFrom could also be left
|
||||
///
|
||||
/// MaxInstsToScan specifies the maximum instructions to scan in the block.
|
||||
/// If it is set to 0, it will scan the whole block. You can also optionally
|
||||
/// specify an alias analysis implementation, which makes this more precise.
|
||||
///
|
||||
/// If AATags is non-null and a load or store is found, the AA tags from the
|
||||
/// load or store are recorded there. If there are no AA tags or if no access
|
||||
/// is found, it is left unmodified.
|
||||
///
|
||||
/// IsAtomicMemOp specifies the atomicity of the memory operation that accesses
|
||||
/// \p *Ptr. We verify atomicity constraints are satisfied when value forwarding
|
||||
/// from another memory operation that has value \p *Ptr available.
|
||||
///
|
||||
/// Note that we assume the \p *Ptr is accessed through a non-volatile but
|
||||
/// potentially atomic load. Any other constraints should be verified at the
|
||||
/// caller.
|
||||
Value *FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, bool IsAtomicMemOp,
|
||||
BasicBlock *ScanBB,
|
||||
BasicBlock::iterator &ScanFrom,
|
||||
unsigned MaxInstsToScan,
|
||||
AliasAnalysis *AA = nullptr,
|
||||
AAMDNodes *AATags = nullptr,
|
||||
bool *IsLoadCSE = nullptr);
|
||||
|
||||
/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at
|
||||
/// the instruction before ScanFrom) checking to see if we have the value at
|
||||
/// the memory address *Ptr locally available within a small number of
|
||||
|
@ -300,8 +300,9 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
|
||||
"to scan backward from a given instruction, when searching for "
|
||||
"available loaded value"));
|
||||
|
||||
/// \brief Scan the ScanBB block backwards to see if we have the value at the
|
||||
/// memory address *Ptr locally available within a small number of instructions.
|
||||
/// Scan the ScanBB block backwards checking to see if we have the value at
|
||||
/// the memory address \p Ptr of type \p AccessTy locally available within a
|
||||
/// small number of instructions. If the value is available, return it.
|
||||
///
|
||||
/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum
|
||||
/// instructions to scan in the block. If it is set to \c 0, it will scan the whole
|
||||
@ -318,26 +319,25 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
|
||||
///
|
||||
/// If \c AATags is non-null and a load or store is found, the AA tags from the
|
||||
/// load or store are recorded there. If there are no AA tags or if no access is
|
||||
/// found, it is left unmodified.
|
||||
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||
/// is found, it is left unmodified.
|
||||
///
|
||||
/// IsAtomicMemOp specifies the atomicity of the memory operation that accesses
|
||||
/// \p *Ptr. We verify atomicity constraints are satisfied when value forwarding
|
||||
/// from another memory operation that has value \p *Ptr available.
|
||||
///
|
||||
/// Note that we assume the \p *Ptr is accessed through a non-volatile but
|
||||
/// potentially atomic load. Any other constraints should be verified at the
|
||||
/// caller.
|
||||
Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, bool IsAtomicMemOp,
|
||||
BasicBlock *ScanBB,
|
||||
BasicBlock::iterator &ScanFrom,
|
||||
unsigned MaxInstsToScan,
|
||||
AliasAnalysis *AA, AAMDNodes *AATags,
|
||||
bool *IsLoadCSE) {
|
||||
|
||||
if (MaxInstsToScan == 0)
|
||||
MaxInstsToScan = ~0U;
|
||||
|
||||
Value *Ptr = Load->getPointerOperand();
|
||||
Type *AccessTy = Load->getType();
|
||||
|
||||
// We can never remove a volatile load
|
||||
if (Load->isVolatile())
|
||||
return nullptr;
|
||||
|
||||
// Anything stronger than unordered is currently unimplemented.
|
||||
if (!Load->isUnordered())
|
||||
return nullptr;
|
||||
|
||||
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
|
||||
|
||||
// Try to get the store size for the type.
|
||||
@ -363,14 +363,14 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||
// If this is a load of Ptr, the loaded value is available.
|
||||
// (This is true even if the load is volatile or atomic, although
|
||||
// those cases are unlikely.)
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
|
||||
if (AreEquivalentAddressValues(
|
||||
LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
|
||||
Value *LoadPtr = LI->getPointerOperand()->stripPointerCasts();
|
||||
if (AreEquivalentAddressValues(LoadPtr, StrippedPtr) &&
|
||||
CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) {
|
||||
|
||||
// We can value forward from an atomic to a non-atomic, but not the
|
||||
// other way around.
|
||||
if (LI->isAtomic() < Load->isAtomic())
|
||||
if (LI->isAtomic() < IsAtomicMemOp)
|
||||
return nullptr;
|
||||
|
||||
if (AATags)
|
||||
@ -380,6 +380,8 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||
return LI;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
|
||||
Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
|
||||
// If this is a store through Ptr, the value is available!
|
||||
@ -391,7 +393,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||
|
||||
// We can value forward from an atomic to a non-atomic, but not the
|
||||
// other way around.
|
||||
if (SI->isAtomic() < Load->isAtomic())
|
||||
if (SI->isAtomic() < IsAtomicMemOp)
|
||||
return nullptr;
|
||||
|
||||
if (AATags)
|
||||
@ -434,4 +436,44 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||
// Got to the start of the block, we didn't find it, but are done for this
|
||||
// block.
|
||||
return nullptr;
|
||||
|
||||
}
|
||||
|
||||
/// \brief Scan the ScanBB block backwards to see if we have the value at the
|
||||
/// memory address *Ptr locally available within a small number of instructions.
|
||||
///
|
||||
/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum
|
||||
/// instructions to scan in the block. If it is set to \c 0, it will scan the whole
|
||||
/// block.
|
||||
///
|
||||
/// If the value is available, this function returns it. If not, it returns the
|
||||
/// iterator for the last validated instruction that the value would be live
|
||||
/// through. If we scanned the entire block and didn't find something that
|
||||
/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last
|
||||
/// instruction processed and this returns null.
|
||||
///
|
||||
/// You can also optionally specify an alias analysis implementation, which
|
||||
/// makes this more precise.
|
||||
///
|
||||
/// If \c AATags is non-null and a load or store is found, the AA tags from the
|
||||
/// load or store are recorded there. If there are no AA tags or if no access is
|
||||
/// is found, it is left unmodified.
|
||||
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||
BasicBlock::iterator &ScanFrom,
|
||||
unsigned MaxInstsToScan,
|
||||
AliasAnalysis *AA, AAMDNodes *AATags,
|
||||
bool *IsLoadCSE) {
|
||||
|
||||
// We can never remove a volatile load
|
||||
if (Load->isVolatile())
|
||||
return nullptr;
|
||||
|
||||
// Anything stronger than unordered is currently unimplemented.
|
||||
if (!Load->isUnordered())
|
||||
return nullptr;
|
||||
|
||||
// Return the full value of the load if available.
|
||||
return FindAvailableLoadedValue(Load->getPointerOperand(), Load->getType(),
|
||||
Load->isAtomic(), ScanBB, ScanFrom,
|
||||
MaxInstsToScan, AA, AATags, IsLoadCSE);
|
||||
}
|
||||
|
@ -14,9 +14,10 @@
|
||||
#include "InstCombineInternal.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
using namespace llvm;
|
||||
using namespace PatternMatch;
|
||||
|
||||
@ -576,6 +577,24 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
|
||||
if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
|
||||
return I;
|
||||
|
||||
// When trunc operand is a widened load, see if we can get the value from a
|
||||
// previous store/load
|
||||
if (auto *LI = dyn_cast<LoadInst>(Src)) {
|
||||
BasicBlock::iterator BBI(*LI);
|
||||
|
||||
// Scan a few instructions up from LI and if we find a partial load/store
|
||||
// of Type DestTy that feeds into LI, we can replace all uses of the trunc
|
||||
// with the load/store value.
|
||||
// This replacement can be done only in the case of non-volatile loads. If
|
||||
// the load is atomic, its only use should be the trunc instruction. We
|
||||
// don't want to allow other users of LI to see a value that is out of sync
|
||||
// with the value we're folding the trunc to (in case of a race).
|
||||
if (!LI->isVolatile() && (!LI->isAtomic() || LI->hasOneUse()))
|
||||
if (Value *AvailableVal = FindAvailableLoadedValue(
|
||||
LI->getPointerOperand(), DestTy, LI->isAtomic(), LI->getParent(),
|
||||
BBI, DefMaxInstsToScan))
|
||||
return replaceInstUsesWith(CI, AvailableVal);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -181,3 +181,60 @@ bb1:
|
||||
bb2:
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare void @consume(i8) readonly
|
||||
define i1 @trunc_load_store(i8* align 2 %a) {
|
||||
store i8 0, i8 *%a, align 2
|
||||
%bca = bitcast i8* %a to i16*
|
||||
%wide.load = load i16, i16* %bca, align 2
|
||||
%lowhalf.1 = trunc i16 %wide.load to i8
|
||||
call void @consume(i8 %lowhalf.1)
|
||||
%cmp.2 = icmp ult i16 %wide.load, 256
|
||||
ret i1 %cmp.2
|
||||
; CHECK-LABEL: @trunc_load_store
|
||||
; CHECK-NOT: trunc
|
||||
; CHECK: call void @consume(i8 0)
|
||||
}
|
||||
|
||||
|
||||
; The trunc can be replaced with the load value.
|
||||
define i1 @trunc_load_load(i8* align 2 %a) {
|
||||
%pload = load i8, i8* %a, align 2
|
||||
%bca = bitcast i8* %a to i16*
|
||||
%wide.load = load i16, i16* %bca, align 2
|
||||
%lowhalf = trunc i16 %wide.load to i8
|
||||
call void @consume(i8 %lowhalf)
|
||||
call void @consume(i8 %pload)
|
||||
%cmp.2 = icmp ult i16 %wide.load, 256
|
||||
ret i1 %cmp.2
|
||||
; CHECK-LABEL: @trunc_load_load
|
||||
; CHECK-NOT: trunc
|
||||
}
|
||||
|
||||
; trunc should not be replaced since atomic load %wide.load has more than one use.
|
||||
; different values can be seen by the uses of %wide.load in case of race.
|
||||
define i1 @trunc_atomic_loads(i8* align 2 %a) {
|
||||
%pload = load atomic i8, i8* %a unordered, align 2
|
||||
%bca = bitcast i8* %a to i16*
|
||||
%wide.load = load atomic i16, i16* %bca unordered, align 2
|
||||
%lowhalf = trunc i16 %wide.load to i8
|
||||
call void @consume(i8 %lowhalf)
|
||||
call void @consume(i8 %pload)
|
||||
%cmp.2 = icmp ult i16 %wide.load, 256
|
||||
ret i1 %cmp.2
|
||||
; CHECK-LABEL: @trunc_atomic_loads
|
||||
; CHECK: trunc
|
||||
}
|
||||
|
||||
; trunc cannot be replaced since store size is not trunc result size
|
||||
define i1 @trunc_different_size_load(i16 * align 2 %a) {
|
||||
store i16 0, i16 *%a, align 2
|
||||
%bca = bitcast i16* %a to i32*
|
||||
%wide.load = load i32, i32* %bca, align 2
|
||||
%lowhalf = trunc i32 %wide.load to i8
|
||||
call void @consume(i8 %lowhalf)
|
||||
%cmp.2 = icmp ult i32 %wide.load, 256
|
||||
ret i1 %cmp.2
|
||||
; CHECK-LABEL: @trunc_different_size_load
|
||||
; CHECK: %lowhalf = trunc i32 %wide.load to i8
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user