mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[Split GEP] handle trunc() in separate-const-offset-from-gep pass.
Let separate-const-offset-from-gep pass handle trunc() when it calculates constant offset relative to base. The pass itself may insert trunc() instructions when it canonicalises array indices to pointer-size integers and needs to handle trunc() in order to evaluate the offset. Differential Revision: https://reviews.llvm.org/D46732 llvm-svn: 332142
This commit is contained in:
parent
be274cfd1b
commit
3657df88ef
@ -587,6 +587,10 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
|
||||
// Trace into subexpressions for more hoisting opportunities.
|
||||
if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
|
||||
ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
|
||||
} else if (isa<TruncInst>(V)) {
|
||||
ConstantOffset =
|
||||
find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
|
||||
.trunc(BitWidth);
|
||||
} else if (isa<SExtInst>(V)) {
|
||||
ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
|
||||
ZeroExtended, NonNegative).sext(BitWidth);
|
||||
@ -651,8 +655,9 @@ ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
|
||||
}
|
||||
|
||||
if (CastInst *Cast = dyn_cast<CastInst>(U)) {
|
||||
assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
|
||||
"We only traced into two types of CastInst: sext and zext");
|
||||
assert(
|
||||
(isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
|
||||
"Only following instructions can be traced: sext, zext & trunc");
|
||||
ExtInsts.push_back(Cast);
|
||||
UserChain[ChainIndex] = nullptr;
|
||||
return distributeExtsAndCloneChain(ChainIndex - 1);
|
||||
|
@ -1,5 +1,8 @@
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
|
||||
; RUN: opt < %s -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn | FileCheck %s --check-prefix=IR
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_20 \
|
||||
; RUN: | FileCheck %s --check-prefix=PTX
|
||||
; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -S -separate-const-offset-from-gep \
|
||||
; RUN: -reassociate-geps-verify-no-dead-code -gvn \
|
||||
; RUN: | FileCheck %s --check-prefix=IR
|
||||
|
||||
; Verifies the SeparateConstOffsetFromGEP pass.
|
||||
; The following code computes
|
||||
@ -12,9 +15,6 @@
|
||||
;
|
||||
; so the backend can emit PTX that uses fewer virtual registers.
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
||||
@array = internal addrspace(3) constant [32 x [32 x float]] zeroinitializer, align 4
|
||||
|
||||
define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
|
||||
|
@ -1,14 +1,10 @@
|
||||
; RUN: opt < %s -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -S | FileCheck %s
|
||||
; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -separate-const-offset-from-gep \
|
||||
; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s
|
||||
|
||||
; Several unit tests for -separate-const-offset-from-gep. The transformation
|
||||
; heavily relies on TargetTransformInfo, so we put these tests under
|
||||
; target-specific folders.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
; target triple is necessary; otherwise TargetTransformInfo rejects any
|
||||
; addressing mode.
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
||||
%struct.S = type { float, double }
|
||||
|
||||
@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
|
||||
@ -271,9 +267,34 @@ entry:
|
||||
; CHECK-NOT: add
|
||||
%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
|
||||
; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
|
||||
; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
|
||||
; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
|
||||
; CHECK: bitcast
|
||||
; CHECK: getelementptr inbounds %struct2, %struct2* [[PTR]], i64 -3
|
||||
ret %struct2* %ptr2
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
; Check that we can see through explicit trunc() instruction.
|
||||
define %struct2* @trunk_explicit(%struct0* %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: @trunk_explicit(
|
||||
entry:
|
||||
%idx0 = trunc i64 1 to i32
|
||||
%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i32 %idx0, i32 3, i64 %idx, i32 1
|
||||
; CHECK-NOT: trunc
|
||||
; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
|
||||
; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151
|
||||
ret %struct2* %ptr2
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
; Check that we can deal with trunc inserted by
|
||||
; canonicalizeArrayIndicesToPointerSize() if size of an index is larger than
|
||||
; that of the pointer.
|
||||
define %struct2* @trunk_long_idx(%struct0* %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: @trunk_long_idx(
|
||||
entry:
|
||||
%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i65 1, i32 3, i64 %idx, i32 1
|
||||
; CHECK-NOT: trunc
|
||||
; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
|
||||
; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151
|
||||
ret %struct2* %ptr2
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user