From 3657df88ef2e4823184c3d9977f5f7ce31f72b26 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 11 May 2018 21:13:19 +0000 Subject: [PATCH] [Split GEP] handle trunc() in separate-const-offset-from-gep pass. Let separate-const-offset-from-gep pass handle trunc() when it calculates constant offset relative to base. The pass itself may insert trunc() instructions when it canonicalises array indices to pointer-size integers and needs to handle trunc() in order to evaluate the offset. Differential Revision: https://reviews.llvm.org/D46732 llvm-svn: 332142 --- .../Scalar/SeparateConstOffsetFromGEP.cpp | 9 ++++- .../NVPTX/split-gep-and-gvn.ll | 10 ++--- .../NVPTX/split-gep.ll | 39 ++++++++++++++----- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index e8a8328d24c..534fdfdb5e3 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -587,6 +587,10 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, // Trace into subexpressions for more hoisting opportunities. if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); + } else if (isa(V)) { + ConstantOffset = + find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative) + .trunc(BitWidth); } else if (isa(V)) { ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, ZeroExtended, NonNegative).sext(BitWidth); @@ -651,8 +655,9 @@ ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { } if (CastInst *Cast = dyn_cast(U)) { - assert((isa(Cast) || isa(Cast)) && - "We only traced into two types of CastInst: sext and zext"); + assert( + (isa(Cast) || isa(Cast) || isa(Cast)) && + "Only following instructions can be traced: sext, zext & trunc"); ExtInsts.push_back(Cast); UserChain[ChainIndex] = nullptr; return distributeExtsAndCloneChain(ChainIndex - 1); diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 0b65035117a..4f9e0ec88ad 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -1,5 +1,8 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX -; RUN: opt < %s -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn | FileCheck %s --check-prefix=IR +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_20 \ +; RUN: | FileCheck %s --check-prefix=PTX +; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -S -separate-const-offset-from-gep \ +; RUN: -reassociate-geps-verify-no-dead-code -gvn \ +; RUN: | FileCheck %s --check-prefix=IR ; Verifies the SeparateConstOffsetFromGEP pass. ; The following code computes @@ -12,9 +15,6 @@ ; ; so the backend can emit PTX that uses fewer virtual registers. -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-unknown-unknown" - @array = internal addrspace(3) constant [32 x [32 x float]] zeroinitializer, align 4 define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) { diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index eeeac196374..917e0587ae9 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -1,14 +1,10 @@ -; RUN: opt < %s -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -S | FileCheck %s +; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -separate-const-offset-from-gep \ +; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s ; Several unit tests for -separate-const-offset-from-gep. The transformation ; heavily relies on TargetTransformInfo, so we put these tests under ; target-specific folders. -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; target triple is necessary; otherwise TargetTransformInfo rejects any -; addressing mode. -target triple = "nvptx64-unknown-unknown" - %struct.S = type { float, double } @struct_array = global [1024 x %struct.S] zeroinitializer, align 16 @@ -271,9 +267,34 @@ entry: ; CHECK-NOT: add %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 ; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 -; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8* -; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64 -; CHECK: bitcast +; CHECK: getelementptr inbounds %struct2, %struct2* [[PTR]], i64 -3 + ret %struct2* %ptr2 +; CHECK-NEXT: ret +} + +; Check that we can see through explicit trunc() instruction. +define %struct2* @trunk_explicit(%struct0* %ptr, i64 %idx) { +; CHECK-LABEL: @trunk_explicit( +entry: + %idx0 = trunc i64 1 to i32 + %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i32 %idx0, i32 3, i64 %idx, i32 1 +; CHECK-NOT: trunc +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 +; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151 + ret %struct2* %ptr2 +; CHECK-NEXT: ret +} + +; Check that we can deal with trunc inserted by +; canonicalizeArrayIndicesToPointerSize() if size of an index is larger than +; that of the pointer. +define %struct2* @trunk_long_idx(%struct0* %ptr, i64 %idx) { +; CHECK-LABEL: @trunk_long_idx( +entry: + %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i65 1, i32 3, i64 %idx, i32 1 +; CHECK-NOT: trunc +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 +; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151 ret %struct2* %ptr2 ; CHECK-NEXT: ret }