mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
When fast iseling a GEP, accumulate the offset rather than emitting a series of
ADDs. MaxOffs is used as a threshold to limit the size of the offset. Tradeoffs being: (1) If we can't materialize the large constant then we'll cause fast-isel to bail. (2) Too large of an offset can't be directly encoded in the ADD resulting in a MOV+ADD. Generally not a bad thing because otherwise we would have had ADD+ADD, but on Thumb this turns into a MOVS+MOVT+ADD. Working on a fix for that. (3) Conversely, too low of a threshold we'll miss opportunities to coalesce ADDs. rdar://10412592 llvm-svn: 144886
This commit is contained in:
parent
4d39196041
commit
2673f8862f
@ -437,6 +437,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
|
||||
|
||||
bool NIsKill = hasTrivialKill(I->getOperand(0));
|
||||
|
||||
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
|
||||
// into a single N = N + TotalOffset.
|
||||
uint64_t TotalOffs = 0;
|
||||
// FIXME: What's a good SWAG number for MaxOffs?
|
||||
uint64_t MaxOffs = 2048;
|
||||
Type *Ty = I->getOperand(0)->getType();
|
||||
MVT VT = TLI.getPointerTy();
|
||||
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
|
||||
@ -446,14 +451,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
|
||||
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
|
||||
if (Field) {
|
||||
// N = N + Offset
|
||||
uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
|
||||
// FIXME: This can be optimized by combining the add with a
|
||||
// subsequent one.
|
||||
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
|
||||
TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
|
||||
if (TotalOffs >= MaxOffs) {
|
||||
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
|
||||
if (N == 0)
|
||||
// Unhandled operand. Halt "fast" selection and bail.
|
||||
return false;
|
||||
NIsKill = true;
|
||||
TotalOffs = 0;
|
||||
}
|
||||
}
|
||||
Ty = StTy->getElementType(Field);
|
||||
} else {
|
||||
@ -462,15 +468,27 @@ bool FastISel::SelectGetElementPtr(const User *I) {
|
||||
// If this is a constant subscript, handle it quickly.
|
||||
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
|
||||
if (CI->isZero()) continue;
|
||||
uint64_t Offs =
|
||||
// N = N + Offset
|
||||
TotalOffs +=
|
||||
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
|
||||
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
|
||||
if (TotalOffs >= MaxOffs) {
|
||||
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
|
||||
if (N == 0)
|
||||
// Unhandled operand. Halt "fast" selection and bail.
|
||||
return false;
|
||||
NIsKill = true;
|
||||
TotalOffs = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (TotalOffs) {
|
||||
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
|
||||
if (N == 0)
|
||||
// Unhandled operand. Halt "fast" selection and bail.
|
||||
return false;
|
||||
NIsKill = true;
|
||||
TotalOffs = 0;
|
||||
}
|
||||
|
||||
// N = N + Idx * ElementSize;
|
||||
uint64_t ElementSize = TD.getTypeAllocSize(Ty);
|
||||
@ -494,6 +512,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (TotalOffs) {
|
||||
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
|
||||
if (N == 0)
|
||||
// Unhandled operand. Halt "fast" selection and bail.
|
||||
return false;
|
||||
}
|
||||
|
||||
// We successfully emitted code for the given LLVM Instruction.
|
||||
UpdateValueMap(I, N);
|
||||
|
65
test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
Normal file
65
test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
Normal file
@ -0,0 +1,65 @@
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
|
||||
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
|
||||
|
||||
%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
|
||||
%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
|
||||
|
||||
@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
|
||||
@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
|
||||
@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
|
||||
|
||||
define i32* @t1() nounwind {
|
||||
entry:
|
||||
; ARM: t1
|
||||
; THUMB: t1
|
||||
%addr = alloca i32*, align 4
|
||||
store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
|
||||
; ARM: add r0, r0, #124
|
||||
; THUMB: adds r0, #124
|
||||
%0 = load i32** %addr, align 4
|
||||
ret i32* %0
|
||||
}
|
||||
|
||||
define i32* @t2() nounwind {
|
||||
entry:
|
||||
; ARM: t2
|
||||
; THUMB: t2
|
||||
%addr = alloca i32*, align 4
|
||||
store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
|
||||
; ARM: movw r1, #1148
|
||||
; ARM: add r0, r0, r1
|
||||
; THUMB: addw r0, r0, #1148
|
||||
%0 = load i32** %addr, align 4
|
||||
ret i32* %0
|
||||
}
|
||||
|
||||
define i32* @t3() nounwind {
|
||||
entry:
|
||||
; ARM: t3
|
||||
; THUMB: t3
|
||||
%addr = alloca i32*, align 4
|
||||
store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
|
||||
; ARM: add r0, r0, #140
|
||||
; THUMB: adds r0, #140
|
||||
%0 = load i32** %addr, align 4
|
||||
ret i32* %0
|
||||
}
|
||||
|
||||
define i32* @t4() nounwind {
|
||||
entry:
|
||||
; ARM: t4
|
||||
; THUMB: t4
|
||||
%addr = alloca i32*, align 4
|
||||
store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
|
||||
; ARM-NOT: movw r{{[0-9]}}, #1060
|
||||
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
|
||||
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
|
||||
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
|
||||
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
|
||||
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
|
||||
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
|
||||
; ARM: movw r{{[0-9]}}, #1284
|
||||
; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
|
||||
%0 = load i32** %addr, align 4
|
||||
ret i32* %0
|
||||
}
|
Loading…
Reference in New Issue
Block a user