mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
Revert "InstCombine rule to fold truncs whose value is available"
This reverts commit r274853. Caused failure in ppcBE build llvm-svn: 274943
This commit is contained in:
parent
2fc31d6ea4
commit
401d88378c
@ -58,44 +58,6 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
|
|||||||
/// to scan in the block, used by FindAvailableLoadedValue().
|
/// to scan in the block, used by FindAvailableLoadedValue().
|
||||||
extern cl::opt<unsigned> DefMaxInstsToScan;
|
extern cl::opt<unsigned> DefMaxInstsToScan;
|
||||||
|
|
||||||
/// \brief Scan backwards to see if we have the value of type \p AccessTy
|
|
||||||
/// at the memory address \p Ptr locally available within a
|
|
||||||
/// small number of instructions. If the value is available, return it.
|
|
||||||
///
|
|
||||||
/// You can use this function to scan across multiple blocks: after you call
|
|
||||||
/// this function, if ScanFrom points at the beginning of the block, it's safe
|
|
||||||
/// to continue scanning the predecessors.
|
|
||||||
/// Note that we assume the \p *Ptr is accessed through a non-volatile but
|
|
||||||
/// potentially atomic load. Any other constraints should be verified at the
|
|
||||||
/// caller.
|
|
||||||
///
|
|
||||||
/// \param Ptr The memory location whose contents we are retrieving
|
|
||||||
/// \param AccessTy The type (and size) of the contents we need from \p Ptr
|
|
||||||
/// \param IsAtomicMemOp specifies the atomicity of the memory operation that accesses
|
|
||||||
/// \p *Ptr. We verify atomicity constraints are satisfied when value forwarding
|
|
||||||
/// from another memory operation that has value \p *Ptr available.
|
|
||||||
/// \param ScanBB The basic block to scan. FIXME: This is redundant.
|
|
||||||
/// \param [in,out] ScanFrom The location to start scanning from. When this
|
|
||||||
/// function returns, it points at the last instruction scanned.
|
|
||||||
/// \param MaxInstsToScan The maximum number of instructions to scan. If this
|
|
||||||
/// is zero, the whole block will be scanned.
|
|
||||||
/// \param AA Optional pointer to alias analysis, to make the scan more
|
|
||||||
/// precise.
|
|
||||||
/// \param [out] AATags The aliasing metadata for the operation which produced
|
|
||||||
/// the value. FIXME: This is basically useless.
|
|
||||||
///
|
|
||||||
/// \param [out] IsLoadCSE Whether the returned value is a load from the same
|
|
||||||
/// location in memory, as opposed to the value operand of a store.
|
|
||||||
///
|
|
||||||
/// \returns The found value, or nullptr if no value is found.
|
|
||||||
Value *FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, bool IsAtomicMemOp,
|
|
||||||
BasicBlock *ScanBB,
|
|
||||||
BasicBlock::iterator &ScanFrom,
|
|
||||||
unsigned MaxInstsToScan,
|
|
||||||
AliasAnalysis *AA = nullptr,
|
|
||||||
AAMDNodes *AATags = nullptr,
|
|
||||||
bool *IsLoadCSE = nullptr);
|
|
||||||
|
|
||||||
/// \brief Scan backwards to see if we have the value of the given load
|
/// \brief Scan backwards to see if we have the value of the given load
|
||||||
/// available locally within a small number of instructions.
|
/// available locally within a small number of instructions.
|
||||||
///
|
///
|
||||||
|
@ -297,17 +297,27 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
|
|||||||
"to scan backward from a given instruction, when searching for "
|
"to scan backward from a given instruction, when searching for "
|
||||||
"available loaded value"));
|
"available loaded value"));
|
||||||
|
|
||||||
|
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
||||||
Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy,
|
|
||||||
bool IsAtomicMemOp, BasicBlock *ScanBB,
|
|
||||||
BasicBlock::iterator &ScanFrom,
|
BasicBlock::iterator &ScanFrom,
|
||||||
unsigned MaxInstsToScan,
|
unsigned MaxInstsToScan,
|
||||||
AliasAnalysis *AA, AAMDNodes *AATags,
|
AliasAnalysis *AA, AAMDNodes *AATags,
|
||||||
bool *IsLoadCSE) {
|
bool *IsLoadCSE) {
|
||||||
|
|
||||||
if (MaxInstsToScan == 0)
|
if (MaxInstsToScan == 0)
|
||||||
MaxInstsToScan = ~0U;
|
MaxInstsToScan = ~0U;
|
||||||
|
|
||||||
|
Value *Ptr = Load->getPointerOperand();
|
||||||
|
Type *AccessTy = Load->getType();
|
||||||
|
|
||||||
|
// We can never remove a volatile load
|
||||||
|
if (Load->isVolatile())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// Anything stronger than unordered is currently unimplemented.
|
||||||
|
if (!Load->isUnordered())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
|
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
|
||||||
|
|
||||||
// Try to get the store size for the type.
|
// Try to get the store size for the type.
|
||||||
uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
|
uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
|
||||||
|
|
||||||
@ -338,7 +348,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy,
|
|||||||
|
|
||||||
// We can value forward from an atomic to a non-atomic, but not the
|
// We can value forward from an atomic to a non-atomic, but not the
|
||||||
// other way around.
|
// other way around.
|
||||||
if (LI->isAtomic() < IsAtomicMemOp)
|
if (LI->isAtomic() < Load->isAtomic())
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
if (AATags)
|
if (AATags)
|
||||||
@ -359,7 +369,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy,
|
|||||||
|
|
||||||
// We can value forward from an atomic to a non-atomic, but not the
|
// We can value forward from an atomic to a non-atomic, but not the
|
||||||
// other way around.
|
// other way around.
|
||||||
if (SI->isAtomic() < IsAtomicMemOp)
|
if (SI->isAtomic() < Load->isAtomic())
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
if (AATags)
|
if (AATags)
|
||||||
@ -403,24 +413,3 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy,
|
|||||||
// block.
|
// block.
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
|
|
||||||
BasicBlock::iterator &ScanFrom,
|
|
||||||
unsigned MaxInstsToScan,
|
|
||||||
AliasAnalysis *AA, AAMDNodes *AATags,
|
|
||||||
bool *IsLoadCSE) {
|
|
||||||
|
|
||||||
// We can never remove a volatile load
|
|
||||||
if (Load->isVolatile())
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
// Anything stronger than unordered is currently unimplemented.
|
|
||||||
if (!Load->isUnordered())
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
// Return the full value of the load if available.
|
|
||||||
return FindAvailableLoadedValue(Load->getPointerOperand(), Load->getType(),
|
|
||||||
Load->isAtomic(), ScanBB, ScanFrom,
|
|
||||||
MaxInstsToScan, AA, AATags, IsLoadCSE);
|
|
||||||
}
|
|
||||||
|
@ -13,10 +13,9 @@
|
|||||||
|
|
||||||
#include "InstCombineInternal.h"
|
#include "InstCombineInternal.h"
|
||||||
#include "llvm/Analysis/ConstantFolding.h"
|
#include "llvm/Analysis/ConstantFolding.h"
|
||||||
#include "llvm/Analysis/Loads.h"
|
|
||||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
||||||
#include "llvm/IR/DataLayout.h"
|
#include "llvm/IR/DataLayout.h"
|
||||||
#include "llvm/IR/PatternMatch.h"
|
#include "llvm/IR/PatternMatch.h"
|
||||||
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
using namespace PatternMatch;
|
using namespace PatternMatch;
|
||||||
|
|
||||||
@ -576,27 +575,6 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
|
|||||||
if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
|
if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
|
||||||
return I;
|
return I;
|
||||||
|
|
||||||
// When trunc operand is a widened load, see if we can get the value from a
|
|
||||||
// previous store/load
|
|
||||||
if (auto *LI = dyn_cast<LoadInst>(Src)) {
|
|
||||||
BasicBlock::iterator BBI(*LI);
|
|
||||||
|
|
||||||
// Scan a few instructions up from LI and if we find a partial load/store
|
|
||||||
// of Type DestTy that feeds into LI, we can replace all uses of the trunc
|
|
||||||
// with the load/store value.
|
|
||||||
// This replacement can be done only in the case of non-volatile loads, with
|
|
||||||
// ordering at most unordered. If the load is atomic, its only use should be
|
|
||||||
// the trunc instruction. We don't want to allow other users of LI to see a
|
|
||||||
// value that is out of sync with the value we're folding the trunc to (in
|
|
||||||
// case of a race).
|
|
||||||
if (LI->isUnordered() && (!LI->isAtomic() || LI->hasOneUse()))
|
|
||||||
if (Value *AvailableVal = FindAvailableLoadedValue(
|
|
||||||
LI->getPointerOperand(), DestTy, LI->isAtomic(), LI->getParent(),
|
|
||||||
BBI, DefMaxInstsToScan))
|
|
||||||
return replaceInstUsesWith(
|
|
||||||
CI, Builder->CreateBitOrPointerCast(AvailableVal, CI.getType(),
|
|
||||||
CI.getName() + ".cast"));
|
|
||||||
}
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,133 +181,3 @@ bb1:
|
|||||||
bb2:
|
bb2:
|
||||||
unreachable
|
unreachable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
; the trunc can be replaced from value available from store
|
|
||||||
; load feeding into trunc left as-is.
|
|
||||||
declare void @consume(i8) readonly
|
|
||||||
define i1 @trunc_load_store(i8* align 2 %a) {
|
|
||||||
store i8 0, i8 *%a, align 2
|
|
||||||
%bca = bitcast i8* %a to i16*
|
|
||||||
%wide.load = load i16, i16* %bca, align 2
|
|
||||||
%lowhalf.1 = trunc i16 %wide.load to i8
|
|
||||||
call void @consume(i8 %lowhalf.1)
|
|
||||||
%cmp.2 = icmp ult i16 %wide.load, 256
|
|
||||||
ret i1 %cmp.2
|
|
||||||
; CHECK-LABEL: @trunc_load_store
|
|
||||||
; CHECK: %wide.load = load i16, i16* %bca, align 2
|
|
||||||
; CHECK-NOT: trunc
|
|
||||||
; CHECK: call void @consume(i8 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
; The trunc can be replaced with the load value.
|
|
||||||
; both loads left as-is, since they have uses.
|
|
||||||
define i1 @trunc_load_load(i8* align 2 %a) {
|
|
||||||
%pload = load i8, i8* %a, align 2
|
|
||||||
%bca = bitcast i8* %a to i16*
|
|
||||||
%wide.load = load i16, i16* %bca, align 2
|
|
||||||
%lowhalf = trunc i16 %wide.load to i8
|
|
||||||
call void @consume(i8 %lowhalf)
|
|
||||||
call void @consume(i8 %pload)
|
|
||||||
%cmp.2 = icmp ult i16 %wide.load, 256
|
|
||||||
ret i1 %cmp.2
|
|
||||||
; CHECK-LABEL: @trunc_load_load
|
|
||||||
; CHECK-NEXT: %pload = load i8, i8* %a, align 2
|
|
||||||
; CHECK-NEXT: %bca = bitcast i8* %a to i16*
|
|
||||||
; CHECK-NEXT: %wide.load = load i16, i16* %bca, align 2
|
|
||||||
; CHECK-NEXT: call void @consume(i8 %pload)
|
|
||||||
; CHECK-NEXT: call void @consume(i8 %pload)
|
|
||||||
; CHECK-NEXT: %cmp.2 = icmp ult i16 %wide.load, 256
|
|
||||||
}
|
|
||||||
|
|
||||||
; Store and load to same memory location address generated through GEP.
|
|
||||||
; trunc can be removed by using the store value.
|
|
||||||
define void @trunc_with_gep_memaccess(i16* align 2 %p) {
|
|
||||||
%t0 = getelementptr i16, i16* %p, i32 1
|
|
||||||
store i16 2, i16* %t0
|
|
||||||
%t1 = getelementptr i16, i16* %p, i32 1
|
|
||||||
%x = load i16, i16* %t1
|
|
||||||
%lowhalf = trunc i16 %x to i8
|
|
||||||
call void @consume(i8 %lowhalf)
|
|
||||||
ret void
|
|
||||||
; CHECK-LABEL: @trunc_with_gep_memaccess
|
|
||||||
; CHECK-NOT: trunc
|
|
||||||
; CHECK: call void @consume(i8 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
; trunc should not be replaced since atomic load %wide.load has more than one use.
|
|
||||||
; different values can be seen by the uses of %wide.load in case of race.
|
|
||||||
define i1 @trunc_atomic_loads(i8* align 2 %a) {
|
|
||||||
%pload = load atomic i8, i8* %a unordered, align 2
|
|
||||||
%bca = bitcast i8* %a to i16*
|
|
||||||
%wide.load = load atomic i16, i16* %bca unordered, align 2
|
|
||||||
%lowhalf = trunc i16 %wide.load to i8
|
|
||||||
call void @consume(i8 %lowhalf)
|
|
||||||
call void @consume(i8 %pload)
|
|
||||||
%cmp.2 = icmp ult i16 %wide.load, 256
|
|
||||||
ret i1 %cmp.2
|
|
||||||
; CHECK-LABEL: @trunc_atomic_loads
|
|
||||||
; CHECK: trunc
|
|
||||||
}
|
|
||||||
|
|
||||||
; trunc can be replaced since atomic load has single use.
|
|
||||||
; atomic load is also removed since use is removed.
|
|
||||||
define void @trunc_atomic_single_load(i8* align 2 %a) {
|
|
||||||
%pload = load atomic i8, i8* %a unordered, align 2
|
|
||||||
%bca = bitcast i8* %a to i16*
|
|
||||||
%wide.load = load atomic i16, i16* %bca unordered, align 2
|
|
||||||
%lowhalf = trunc i16 %wide.load to i8
|
|
||||||
call void @consume(i8 %lowhalf)
|
|
||||||
call void @consume(i8 %pload)
|
|
||||||
ret void
|
|
||||||
; CHECK-LABEL: @trunc_atomic_single_load
|
|
||||||
; CHECK-NOT: trunc
|
|
||||||
; CHECK-NOT: %wide.load = load atomic i16, i16* %bca unordered, align 2
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
; trunc cannot be replaced since load's atomic ordering is higher than unordered
|
|
||||||
define void @trunc_atomic_monotonic(i8* align 2 %a) {
|
|
||||||
%pload = load atomic i8, i8* %a monotonic, align 2
|
|
||||||
%bca = bitcast i8* %a to i16*
|
|
||||||
%wide.load = load atomic i16, i16* %bca monotonic, align 2
|
|
||||||
%lowhalf = trunc i16 %wide.load to i8
|
|
||||||
call void @consume(i8 %lowhalf)
|
|
||||||
call void @consume(i8 %pload)
|
|
||||||
ret void
|
|
||||||
; CHECK-LABEL: @trunc_atomic_monotonic
|
|
||||||
; CHECK: %wide.load = load atomic i16, i16* %bca monotonic, align 2
|
|
||||||
; CHECK: trunc
|
|
||||||
}
|
|
||||||
|
|
||||||
; trunc cannot be replaced since store size (i16) is not trunc result size (i8).
|
|
||||||
; FIXME: we could get the i8 content of trunc from the i16 store value.
|
|
||||||
define i1 @trunc_different_size_load(i16 * align 2 %a) {
|
|
||||||
store i16 0, i16 *%a, align 2
|
|
||||||
%bca = bitcast i16* %a to i32*
|
|
||||||
%wide.load = load i32, i32* %bca, align 2
|
|
||||||
%lowhalf = trunc i32 %wide.load to i8
|
|
||||||
call void @consume(i8 %lowhalf)
|
|
||||||
%cmp.2 = icmp ult i32 %wide.load, 256
|
|
||||||
ret i1 %cmp.2
|
|
||||||
; CHECK-LABEL: @trunc_different_size_load
|
|
||||||
; CHECK: %lowhalf = trunc i32 %wide.load to i8
|
|
||||||
}
|
|
||||||
|
|
||||||
declare void @consume_f(float) readonly
|
|
||||||
; bitcast required since trunc result type and %fload are different types.
|
|
||||||
; so replace the trunc with bitcast.
|
|
||||||
define i1 @trunc_avoid_bitcast(float* %b) {
|
|
||||||
%fload = load float, float* %b
|
|
||||||
%bca = bitcast float* %b to i64*
|
|
||||||
%iload = load i64, i64* %bca
|
|
||||||
%low32 = trunc i64 %iload to i32
|
|
||||||
call void @consume_f(float %fload)
|
|
||||||
%cmp.2 = icmp ult i32 %low32, 256
|
|
||||||
ret i1 %cmp.2
|
|
||||||
; CHECK-LABEL: @trunc_avoid_bitcast
|
|
||||||
; CHECK-NOT: %low32 = trunc i64 %iload to i32
|
|
||||||
; CHECK: %low32.cast = bitcast float %fload to i32
|
|
||||||
; CHECK: %cmp.2 = icmp ult i32 %low32.cast, 256
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user