mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
InferAddressSpaces: Search constant expressions for addrspacecasts
These are pretty common when using local memory, and the 64-bit generic addressing is much more expensive to compute. llvm-svn: 301711
This commit is contained in:
parent
7cd832b016
commit
4a1b9ae80b
@ -138,7 +138,7 @@ private:
|
|||||||
|
|
||||||
// Tries to infer the specific address space of each address expression in
|
// Tries to infer the specific address space of each address expression in
|
||||||
// Postorder.
|
// Postorder.
|
||||||
void inferAddressSpaces(const std::vector<Value *> &Postorder,
|
void inferAddressSpaces(ArrayRef<WeakVH> Postorder,
|
||||||
ValueToAddrSpaceMapTy *InferredAddrSpace) const;
|
ValueToAddrSpaceMapTy *InferredAddrSpace) const;
|
||||||
|
|
||||||
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
|
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
|
||||||
@ -147,7 +147,7 @@ private:
|
|||||||
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
|
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
|
||||||
// all flat expressions in the use-def graph of function F.
|
// all flat expressions in the use-def graph of function F.
|
||||||
bool
|
bool
|
||||||
rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
|
rewriteWithNewAddressSpaces(ArrayRef<WeakVH> Postorder,
|
||||||
const ValueToAddrSpaceMapTy &InferredAddrSpace,
|
const ValueToAddrSpaceMapTy &InferredAddrSpace,
|
||||||
Function *F) const;
|
Function *F) const;
|
||||||
|
|
||||||
@ -162,7 +162,7 @@ private:
|
|||||||
std::vector<std::pair<Value *, bool>> &PostorderStack,
|
std::vector<std::pair<Value *, bool>> &PostorderStack,
|
||||||
DenseSet<Value *> &Visited) const;
|
DenseSet<Value *> &Visited) const;
|
||||||
|
|
||||||
std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
|
std::vector<WeakVH> collectFlatAddressExpressions(Function &F) const;
|
||||||
|
|
||||||
Value *cloneValueWithNewAddressSpace(
|
Value *cloneValueWithNewAddressSpace(
|
||||||
Value *V, unsigned NewAddrSpace,
|
Value *V, unsigned NewAddrSpace,
|
||||||
@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
|
|||||||
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
|
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
|
||||||
DenseSet<Value *> &Visited) const {
|
DenseSet<Value *> &Visited) const {
|
||||||
assert(V->getType()->isPointerTy());
|
assert(V->getType()->isPointerTy());
|
||||||
|
|
||||||
|
// Generic addressing expressions may be hidden in nested constant
|
||||||
|
// expressions.
|
||||||
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
|
||||||
|
// TODO: Look in non-address parts, like icmp operands.
|
||||||
|
if (isAddressExpression(*CE) && Visited.insert(CE).second)
|
||||||
|
PostorderStack.push_back(std::make_pair(CE, false));
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (isAddressExpression(*V) &&
|
if (isAddressExpression(*V) &&
|
||||||
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
|
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
|
||||||
if (Visited.insert(V).second)
|
if (Visited.insert(V).second) {
|
||||||
PostorderStack.push_back(std::make_pair(V, false));
|
PostorderStack.push_back(std::make_pair(V, false));
|
||||||
|
|
||||||
|
Operator *Op = cast<Operator>(V);
|
||||||
|
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
|
||||||
|
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
|
||||||
|
if (isAddressExpression(*CE) && Visited.insert(CE).second)
|
||||||
|
PostorderStack.emplace_back(CE, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns all flat address expressions in function F. The elements are ordered
|
// Returns all flat address expressions in function F. The elements are ordered
|
||||||
// ordered in postorder.
|
// ordered in postorder.
|
||||||
std::vector<Value *>
|
std::vector<WeakVH>
|
||||||
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
|
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
|
||||||
// This function implements a non-recursive postorder traversal of a partial
|
// This function implements a non-recursive postorder traversal of a partial
|
||||||
// use-def graph of function F.
|
// use-def graph of function F.
|
||||||
@ -332,18 +352,19 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Value *> Postorder; // The resultant postorder.
|
std::vector<WeakVH> Postorder; // The resultant postorder.
|
||||||
while (!PostorderStack.empty()) {
|
while (!PostorderStack.empty()) {
|
||||||
|
Value *TopVal = PostorderStack.back().first;
|
||||||
// If the operands of the expression on the top are already explored,
|
// If the operands of the expression on the top are already explored,
|
||||||
// adds that expression to the resultant postorder.
|
// adds that expression to the resultant postorder.
|
||||||
if (PostorderStack.back().second) {
|
if (PostorderStack.back().second) {
|
||||||
Postorder.push_back(PostorderStack.back().first);
|
Postorder.push_back(TopVal);
|
||||||
PostorderStack.pop_back();
|
PostorderStack.pop_back();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Otherwise, adds its operands to the stack and explores them.
|
// Otherwise, adds its operands to the stack and explores them.
|
||||||
PostorderStack.back().second = true;
|
PostorderStack.back().second = true;
|
||||||
for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
|
for (Value *PtrOperand : getPointerOperands(*TopVal)) {
|
||||||
appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
|
appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
|
||||||
Visited);
|
Visited);
|
||||||
}
|
}
|
||||||
@ -562,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Collects all flat address expressions in postorder.
|
// Collects all flat address expressions in postorder.
|
||||||
std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
|
std::vector<WeakVH> Postorder = collectFlatAddressExpressions(F);
|
||||||
|
|
||||||
// Runs a data-flow analysis to refine the address spaces of every expression
|
// Runs a data-flow analysis to refine the address spaces of every expression
|
||||||
// in Postorder.
|
// in Postorder.
|
||||||
@ -574,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
|
|||||||
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
|
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Constants need to be tracked through RAUW to handle cases with nested
|
||||||
|
// constant expressions, so wrap values in WeakVH.
|
||||||
void InferAddressSpaces::inferAddressSpaces(
|
void InferAddressSpaces::inferAddressSpaces(
|
||||||
const std::vector<Value *> &Postorder,
|
ArrayRef<WeakVH> Postorder,
|
||||||
ValueToAddrSpaceMapTy *InferredAddrSpace) const {
|
ValueToAddrSpaceMapTy *InferredAddrSpace) const {
|
||||||
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
|
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
|
||||||
// Initially, all expressions are in the uninitialized address space.
|
// Initially, all expressions are in the uninitialized address space.
|
||||||
@ -787,7 +810,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
||||||
const std::vector<Value *> &Postorder,
|
ArrayRef<WeakVH> Postorder,
|
||||||
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
|
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
|
||||||
// For each address expression to be modified, creates a clone of it with its
|
// For each address expression to be modified, creates a clone of it with its
|
||||||
// pointer operands converted to the new address space. Since the pointer
|
// pointer operands converted to the new address space. Since the pointer
|
||||||
@ -818,7 +841,9 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
|||||||
SmallVector<Instruction *, 16> DeadInstructions;
|
SmallVector<Instruction *, 16> DeadInstructions;
|
||||||
|
|
||||||
// Replaces the uses of the old address expressions with the new ones.
|
// Replaces the uses of the old address expressions with the new ones.
|
||||||
for (Value *V : Postorder) {
|
for (const WeakVH &WVH : Postorder) {
|
||||||
|
assert(WVH && "value was unexpectedly deleted");
|
||||||
|
Value *V = WVH;
|
||||||
Value *NewV = ValueWithNewAddrSpace.lookup(V);
|
Value *NewV = ValueWithNewAddrSpace.lookup(V);
|
||||||
if (NewV == nullptr)
|
if (NewV == nullptr)
|
||||||
continue;
|
continue;
|
||||||
@ -826,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
|
|||||||
DEBUG(dbgs() << "Replacing the uses of " << *V
|
DEBUG(dbgs() << "Replacing the uses of " << *V
|
||||||
<< "\n with\n " << *NewV << '\n');
|
<< "\n with\n " << *NewV << '\n');
|
||||||
|
|
||||||
|
if (Constant *C = dyn_cast<Constant>(V)) {
|
||||||
|
Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
|
||||||
|
C->getType());
|
||||||
|
if (C != Replace) {
|
||||||
|
DEBUG(dbgs() << "Inserting replacement const cast: "
|
||||||
|
<< Replace << ": " << *Replace << '\n');
|
||||||
|
C->replaceAllUsesWith(Replace);
|
||||||
|
V = Replace;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Value::use_iterator I, E, Next;
|
Value::use_iterator I, E, Next;
|
||||||
for (I = V->use_begin(), E = V->use_end(); I != E; ) {
|
for (I = V->use_begin(), E = V->use_end(); I != E; ) {
|
||||||
Use &U = *I;
|
Use &U = *I;
|
||||||
|
@ -15,9 +15,8 @@ define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: Should be able to eliminate inner constantexpr addrspacecast.
|
|
||||||
; CHECK-LABEL: @constexpr_gep_addrspacecast(
|
; CHECK-LABEL: @constexpr_gep_addrspacecast(
|
||||||
; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* addrspacecast (double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384) to double addrspace(3)*), i64 %idx0
|
; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
|
||||||
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
|
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
|
||||||
define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
|
define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
|
||||||
%gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
|
%gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
|
||||||
@ -54,3 +53,21 @@ define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) n
|
|||||||
store i32 99, i32 addrspace(4)* %p3
|
store i32 99, i32 addrspace(4)* %p3
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast(
|
||||||
|
; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
|
||||||
|
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
|
||||||
|
; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx1
|
||||||
|
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
|
||||||
|
%gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
|
||||||
|
%asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
|
||||||
|
store double 1.0, double addrspace(3)* %asc0, align 8
|
||||||
|
|
||||||
|
%gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
|
||||||
|
%asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
|
||||||
|
store double 1.0, double addrspace(3)* %asc1, align 8
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
@ -10,7 +10,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
|||||||
; CHECK: %tmp = load float*, float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1), align 8
|
; CHECK: %tmp = load float*, float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1), align 8
|
||||||
; CHECK: %tmp1 = load float, float* %tmp, align 4
|
; CHECK: %tmp1 = load float, float* %tmp, align 4
|
||||||
; CHECK: store float %conv1, float* %tmp, align 4
|
; CHECK: store float %conv1, float* %tmp, align 4
|
||||||
; CHECK: store i32 32, i32 addrspace(3)* addrspacecast (i32* bitcast (float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1) to i32*) to i32 addrspace(3)*), align 4
|
; CHECK: store i32 32, i32 addrspace(3)* bitcast (float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1) to i32 addrspace(3)*), align 4
|
||||||
define void @bug31948(float %a, float* nocapture readnone %x, float* nocapture readnone %y) local_unnamed_addr #0 {
|
define void @bug31948(float %a, float* nocapture readnone %x, float* nocapture readnone %y) local_unnamed_addr #0 {
|
||||||
entry:
|
entry:
|
||||||
%tmp = load float*, float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1), align 8
|
%tmp = load float*, float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1), align 8
|
||||||
|
Loading…
Reference in New Issue
Block a user