From 2673f8862f36ecb1afed51259845a608531e8689 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 17 Nov 2011 07:15:58 +0000 Subject: [PATCH] When fast iseling a GEP, accumulate the offset rather than emitting a series of ADDs. MaxOffs is used as a threshold to limit the size of the offset. Tradeoffs being: (1) If we can't materialize the large constant then we'll cause fast-isel to bail. (2) Too large of an offset can't be directly encoded in the ADD resulting in a MOV+ADD. Generally not a bad thing because otherwise we would have had ADD+ADD, but on Thumb this turns into a MOVS+MOVT+ADD. Working on a fix for that. (3) Conversely, too low of a threshold we'll miss opportunities to coalesce ADDs. rdar://10412592 llvm-svn: 144886 --- lib/CodeGen/SelectionDAG/FastISel.cpp | 46 +++++++++++---- test/CodeGen/ARM/fast-isel-GEP-coalesce.ll | 65 ++++++++++++++++++++++ 2 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/ARM/fast-isel-GEP-coalesce.ll diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index f0fe179e050..cff37c2c860 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -437,6 +437,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { bool NIsKill = hasTrivialKill(I->getOperand(0)); + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -446,14 +451,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast(Idx)->getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); - // FIXME: This can be optimized by combining the add with a - // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - NIsKill = true; + TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } } Ty = StTy->getElementType(Field); } else { @@ -462,14 +468,26 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + // N = N + Offset + TotalOffs += TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; - continue; + TotalOffs = 0; } // N = N + Idx * ElementSize; @@ -494,6 +512,12 @@ bool FastISel::SelectGetElementPtr(const User *I) { return false; } } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, N); diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll new file mode 100644 index 00000000000..dbb634df0a1 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB + +%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] } +%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] } + +@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4 +@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4 +@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4 + +define i32* @t1() nounwind { +entry: +; ARM: t1 +; THUMB: t1 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4 +; ARM: add r0, r0, #124 +; THUMB: adds r0, #124 + %0 = load i32** %addr, align 4 + ret i32* %0 +} + +define i32* @t2() nounwind { +entry: +; ARM: t2 +; THUMB: t2 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4 +; ARM: movw r1, #1148 +; ARM: add r0, r0, r1 +; THUMB: addw r0, r0, #1148 + %0 = load i32** %addr, align 4 + ret i32* %0 +} + +define i32* @t3() nounwind { +entry: +; ARM: t3 +; THUMB: t3 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4 +; ARM: add r0, r0, #140 +; THUMB: adds r0, #140 + %0 = load i32** %addr, align 4 + ret i32* %0 +} + +define i32* @t4() nounwind { +entry: +; ARM: t4 +; THUMB: t4 + %addr = alloca i32*, align 4 + store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4 +; ARM-NOT: movw r{{[0-9]}}, #1060 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24 +; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4 +; ARM: movw r{{[0-9]}}, #1284 +; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284 + %0 = load i32** %addr, align 4 + ret i32* %0 +}