1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00
Stack is formed improperly for long structures passed as byval arguments for
EABI mode.

If we took AAPCS reference, we can found the next statements:

A: "If the argument requires double-word alignment (8-byte), the NCRN (Next
Core Register Number) is rounded up to the next even register number." (5.5
Parameter Passing, Stage C, C.3).

B: "The alignment of an aggregate shall be the alignment of its most-aligned
component." (4.3 Composite Types, 4.3.1 Aggregates).

So if we have structure with doubles (9 double fields) and 3 Core unused
registers (r1, r2, r3): caller should use r2 and r3 registers only.
Currently r1,r2,r3 set is used, but it is invalid.

Callee VA routine should also use r2 and r3 regs only. All is ok here. This
behaviour is guessed by rounding up SP address with ADD+BFC operations.

Fix:
Main fix is in ARMTargetLowering::HandleByVal. If we detected AAPCS mode and
8 byte alignment, we waste odd registers then.

P.S.:
I also improved LDRB_POST_IMM regression test. Since ldrb instruction will
not generated by current regression test after this patch. 

llvm-svn: 166018
This commit is contained in:
Stepan Dyatkovskiy 2012-10-16 07:16:47 +00:00
parent 83458d4d01
commit 09c6b0a273
6 changed files with 83 additions and 25 deletions

View File

@ -1366,7 +1366,7 @@ public:
}
/// HandleByVal - Target-specific cleanup for formal ByVal parameters.
virtual void HandleByVal(CCState *, unsigned &) const {}
virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
/// CanLowerReturn - This hook should be implemented to check whether the
/// return values described by the Outs array can fit into the return

View File

@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
if (MinAlign > (int)Align)
Align = MinAlign;
MF.getFrameInfo()->ensureMaxAlignment(Align);
TM.getTargetLowering()->HandleByVal(this, Size);
TM.getTargetLowering()->HandleByVal(this, Size, Align);
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}

View File

@ -1656,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void
ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
ARMTargetLowering::HandleByVal(
CCState *State, unsigned &size, unsigned Align) const {
unsigned reg = State->AllocateReg(GPRArgRegs, 4);
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
State->setFirstByValReg(reg);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
unsigned excess = 4 * (ARM::R4 - reg);
assert(size >= excess && "expected larger existing stack allocation");
size -= excess;
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
for (unsigned i = 0; i < Waste; ++i)
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
State->setFirstByValReg(reg);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
unsigned excess = 4 * (ARM::R4 - reg);
assert(size >= excess && "expected larger existing stack allocation");
size -= excess;
}
}
}
// Confiscate any remaining parameter registers to preclude their

View File

@ -480,7 +480,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
/// HandleByVal - Target-specific cleanup for ByVal support.
virtual void HandleByVal(CCState *, unsigned &) const;
virtual void HandleByVal(CCState *, unsigned &, unsigned) const;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call

View File

@ -0,0 +1,56 @@
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
; Test that we correctly use registers and align elements when using va_arg
%struct_t = type { double, double, double }
@static_val = constant %struct_t { double 1.0, double 2.0, double 3.0 }
declare void @llvm.va_start(i8*) nounwind
declare void @llvm.va_end(i8*) nounwind
; CHECK: test_byval_8_bytes_alignment:
define void @test_byval_8_bytes_alignment(i32 %i, ...) {
entry:
; CHECK: stm r0, {r1, r2, r3}
%g = alloca i8*
%g1 = bitcast i8** %g to i8*
call void @llvm.va_start(i8* %g1)
; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
; CHECK: bfc [[REG]], #0, #3
%0 = va_arg i8** %g, double
call void @llvm.va_end(i8* %g1)
ret void
}
; CHECK: main:
; CHECK: ldm r0, {r2, r3}
define i32 @main() {
entry:
call void (i32, ...)* @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
ret i32 0
}
declare void @f(double);
; CHECK: test_byval_8_bytes_alignment_fixed_arg:
; CHECK-NOT: str r1
; CHECK: str r3, [sp, #12]
; CHECK: str r2, [sp, #8]
; CHECK-NOT: str r1
define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
entry:
%a = getelementptr inbounds %struct_t* %val, i32 0, i32 0
%0 = load double* %a
call void (double)* @f(double %0)
ret void
}
; CHECK: main_fixed_arg:
; CHECK: ldm r0, {r2, r3}
define i32 @main_fixed_arg() {
entry:
call void (i32, %struct_t*)* @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
ret i32 0
}

View File

@ -1,23 +1,16 @@
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
; RUN: llc < %s -mtriple=armv7-none-linux- | FileCheck %s
; Check that LDRB_POST_IMM instruction emitted properly.
%my_struct_t = type { double, double, double }
@main.val = private unnamed_addr constant %my_struct_t { double 1.0, double 2.0, double 3.0 }, align 8
declare void @f(i32 %n1, %my_struct_t* byval %val);
%my_struct_t = type { i8, i8, i8, i8, i8 }
@main.val = private unnamed_addr constant %my_struct_t { i8 1, i8 2, i8 3, i8 4, i8 5 }
declare void @f(i32 %n1, i32 %n2, i32 %n3, %my_struct_t* byval %val);
; CHECK: main:
define i32 @main() nounwind {
entry:
%val = alloca %my_struct_t, align 8
%0 = bitcast %my_struct_t* %val to i8*
; CHECK: ldrb {{(r[0-9]+)}}, {{(\[r[0-9]+\])}}, #1
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast (%my_struct_t* @main.val to i8*), i32 24, i32 8, i1 false)
call void @f(i32 555, %my_struct_t* byval %val)
call void @f(i32 555, i32 555, i32 555, %my_struct_t* byval @main.val)
ret i32 0
}
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind