mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
ARM: teach AAPCS-VFP to deal with Cortex-M4.
Cortex-M4 only has single-precision floating point support, so any LLVM "double" type will have been split into 2 i32s by now. Fortunately, the consecutive-register framework turns out to be precisely what's needed to reconstruct the double and follow AAPCS-VFP correctly! rdar://problem/17012966 llvm-svn: 209650
This commit is contained in:
parent
f7e05eb5c0
commit
2172cefdfd
@ -7176,11 +7176,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||||||
}
|
}
|
||||||
if (Args[i].isNest)
|
if (Args[i].isNest)
|
||||||
Flags.setNest();
|
Flags.setNest();
|
||||||
if (NeedsRegBlock) {
|
if (NeedsRegBlock)
|
||||||
Flags.setInConsecutiveRegs();
|
Flags.setInConsecutiveRegs();
|
||||||
if (Value == NumValues - 1)
|
|
||||||
Flags.setInConsecutiveRegsLast();
|
|
||||||
}
|
|
||||||
Flags.setOrigAlign(OriginalAlignment);
|
Flags.setOrigAlign(OriginalAlignment);
|
||||||
|
|
||||||
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
|
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
|
||||||
@ -7226,6 +7223,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||||||
else if (j != 0)
|
else if (j != 0)
|
||||||
MyFlags.Flags.setOrigAlign(1);
|
MyFlags.Flags.setOrigAlign(1);
|
||||||
|
|
||||||
|
// Only mark the end at the last register of the last value.
|
||||||
|
if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
|
||||||
|
MyFlags.Flags.setInConsecutiveRegsLast();
|
||||||
|
|
||||||
CLI.Outs.push_back(MyFlags);
|
CLI.Outs.push_back(MyFlags);
|
||||||
CLI.OutVals.push_back(Parts[j]);
|
CLI.OutVals.push_back(Parts[j]);
|
||||||
}
|
}
|
||||||
@ -7412,11 +7413,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||||||
}
|
}
|
||||||
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
|
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
|
||||||
Flags.setNest();
|
Flags.setNest();
|
||||||
if (NeedsRegBlock) {
|
if (NeedsRegBlock)
|
||||||
Flags.setInConsecutiveRegs();
|
Flags.setInConsecutiveRegs();
|
||||||
if (Value == NumValues - 1)
|
|
||||||
Flags.setInConsecutiveRegsLast();
|
|
||||||
}
|
|
||||||
Flags.setOrigAlign(OriginalAlignment);
|
Flags.setOrigAlign(OriginalAlignment);
|
||||||
|
|
||||||
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
|
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
|
||||||
@ -7429,6 +7427,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||||||
// if it isn't first piece, alignment must be 1
|
// if it isn't first piece, alignment must be 1
|
||||||
else if (i > 0)
|
else if (i > 0)
|
||||||
MyFlags.Flags.setOrigAlign(1);
|
MyFlags.Flags.setOrigAlign(1);
|
||||||
|
|
||||||
|
// Only mark the end at the last register of the last value.
|
||||||
|
if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
|
||||||
|
MyFlags.Flags.setInConsecutiveRegsLast();
|
||||||
|
|
||||||
Ins.push_back(MyFlags);
|
Ins.push_back(MyFlags);
|
||||||
}
|
}
|
||||||
PartBase += VT.getStoreSize();
|
PartBase += VT.getStoreSize();
|
||||||
|
@ -177,9 +177,8 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
|||||||
CCValAssign::LocInfo &LocInfo,
|
CCValAssign::LocInfo &LocInfo,
|
||||||
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||||
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
|
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
|
||||||
|
|
||||||
// AAPCS HFAs must have 1-4 elements, all of the same type
|
// AAPCS HFAs must have 1-4 elements, all of the same type
|
||||||
assert(PendingHAMembers.size() < 4);
|
assert(PendingHAMembers.size() < 8);
|
||||||
if (PendingHAMembers.size() > 0)
|
if (PendingHAMembers.size() > 0)
|
||||||
assert(PendingHAMembers[0].getLocVT() == LocVT);
|
assert(PendingHAMembers[0].getLocVT() == LocVT);
|
||||||
|
|
||||||
@ -189,7 +188,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
|||||||
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
||||||
|
|
||||||
if (ArgFlags.isInConsecutiveRegsLast()) {
|
if (ArgFlags.isInConsecutiveRegsLast()) {
|
||||||
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
|
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
|
||||||
"Homogeneous aggregates must have between 1 and 4 members");
|
"Homogeneous aggregates must have between 1 and 4 members");
|
||||||
|
|
||||||
// Try to allocate a contiguous block of registers, each of the correct
|
// Try to allocate a contiguous block of registers, each of the correct
|
||||||
@ -197,6 +196,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
|||||||
const uint16_t *RegList;
|
const uint16_t *RegList;
|
||||||
unsigned NumRegs;
|
unsigned NumRegs;
|
||||||
switch (LocVT.SimpleTy) {
|
switch (LocVT.SimpleTy) {
|
||||||
|
case MVT::i32:
|
||||||
case MVT::f32:
|
case MVT::f32:
|
||||||
RegList = SRegList;
|
RegList = SRegList;
|
||||||
NumRegs = 16;
|
NumRegs = 16;
|
||||||
@ -235,11 +235,20 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
|||||||
State.AllocateReg(SRegList[regNo]);
|
State.AllocateReg(SRegList[regNo]);
|
||||||
|
|
||||||
unsigned Size = LocVT.getSizeInBits() / 8;
|
unsigned Size = LocVT.getSizeInBits() / 8;
|
||||||
unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size;
|
unsigned Align = Size;
|
||||||
|
|
||||||
|
if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
|
||||||
|
// Vectors are always aligned to 8 bytes. If we've seen an i32 here
|
||||||
|
// it's because it's been split from a larger type, also with align 8.
|
||||||
|
Align = 8;
|
||||||
|
}
|
||||||
|
|
||||||
for (auto It : PendingHAMembers) {
|
for (auto It : PendingHAMembers) {
|
||||||
It.convertToMem(State.AllocateStack(Size, Align));
|
It.convertToMem(State.AllocateStack(Size, Align));
|
||||||
State.addLoc(It);
|
State.addLoc(It);
|
||||||
|
|
||||||
|
// Only the first member needs to be aligned.
|
||||||
|
Align = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// All pending members have now been allocated
|
// All pending members have now been allocated
|
||||||
|
@ -10778,14 +10778,13 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
|
|||||||
/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
|
/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
|
||||||
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
||||||
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
||||||
if (getEffectiveCallingConv(CallConv, isVarArg) ==
|
if (getEffectiveCallingConv(CallConv, isVarArg) !=
|
||||||
CallingConv::ARM_AAPCS_VFP) {
|
CallingConv::ARM_AAPCS_VFP)
|
||||||
HABaseType Base = HA_UNKNOWN;
|
|
||||||
uint64_t Members = 0;
|
|
||||||
bool result = isHomogeneousAggregate(Ty, Base, Members);
|
|
||||||
DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
|
|
||||||
return result;
|
|
||||||
} else {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
HABaseType Base = HA_UNKNOWN;
|
||||||
|
uint64_t Members = 0;
|
||||||
|
bool result = isHomogeneousAggregate(Ty, Base, Members);
|
||||||
|
DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
111
test/CodeGen/ARM/aapcs-hfa-code.ll
Normal file
111
test/CodeGen/ARM/aapcs-hfa-code.ll
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -o - | FileCheck %s
|
||||||
|
; RUN: llc < %s -mtriple=thumbv7em-none-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK-M4F
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc void @test_1float({ float } %a) {
|
||||||
|
call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 })
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_1float:
|
||||||
|
; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00
|
||||||
|
; CHECK: bl test_1float
|
||||||
|
|
||||||
|
; CHECK-M4F-LABEL: test_1float:
|
||||||
|
; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00
|
||||||
|
; CHECK-M4F: bl test_1float
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc void @test_2float({ float, float } %a) {
|
||||||
|
call arm_aapcs_vfpcc void @test_2float({ float, float } { float 1.0, float 2.0 })
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_2float:
|
||||||
|
; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00
|
||||||
|
; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00
|
||||||
|
; CHECK: bl test_2float
|
||||||
|
|
||||||
|
; CHECK-M4F-LABEL: test_2float:
|
||||||
|
; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00
|
||||||
|
; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00
|
||||||
|
; CHECK-M4F: bl test_2float
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc void @test_3float({ float, float, float } %a) {
|
||||||
|
call arm_aapcs_vfpcc void @test_3float({ float, float, float } { float 1.0, float 2.0, float 3.0 })
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_3float:
|
||||||
|
; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00
|
||||||
|
; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00
|
||||||
|
; CHECK-DAG: vmov.f32 s2, #3.{{0+}}e+00
|
||||||
|
; CHECK: bl test_3float
|
||||||
|
|
||||||
|
; CHECK-M4F-LABEL: test_3float:
|
||||||
|
; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00
|
||||||
|
; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00
|
||||||
|
; CHECK-M4F-DAG: vmov.f32 s2, #3.{{0+}}e+00
|
||||||
|
; CHECK-M4F: bl test_3float
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc void @test_1double({ double } %a) {
|
||||||
|
; CHECK-LABEL: test_1double:
|
||||||
|
; CHECK-DAG: vmov.f64 d0, #1.{{0+}}e+00
|
||||||
|
; CHECK: bl test_1double
|
||||||
|
|
||||||
|
; CHECK-M4F-LABEL: test_1double:
|
||||||
|
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||||
|
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||||
|
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||||
|
; CHECK-M4F-DAG: vmov s0, [[ONELO]]
|
||||||
|
; CHECK-M4F-DAG: vmov s1, [[ONEHI]]
|
||||||
|
; CHECK-M4F: bl test_1double
|
||||||
|
|
||||||
|
call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 })
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Final double argument might be put in s15 & [sp] if we're careless. It should
|
||||||
|
; go all on the stack.
|
||||||
|
define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3 x float], double %a) {
|
||||||
|
; CHECK-LABEL: test_1double_nosplit:
|
||||||
|
; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
|
||||||
|
; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
|
||||||
|
; CHECK-DAG: movt [[ONEHI]], #16368
|
||||||
|
; CHECK: strd [[ONELO]], [[ONEHI]], [sp]
|
||||||
|
; CHECK: bl test_1double_nosplit
|
||||||
|
|
||||||
|
; CHECK-M4F-LABEL: test_1double_nosplit:
|
||||||
|
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||||
|
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||||
|
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||||
|
; CHECK-M4F-DAG: str [[ONELO]], [sp]
|
||||||
|
; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4]
|
||||||
|
; CHECK-M4F: bl test_1double_nosplit
|
||||||
|
call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Final double argument might go at [sp, #4] if we're careless. Should go at
|
||||||
|
; [sp, #8] to preserve alignment.
|
||||||
|
define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double], float, double) {
|
||||||
|
call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0)
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_1double_misaligned:
|
||||||
|
; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
|
||||||
|
; CHECK-DAG: mov r[[BASE:[0-9]+]], sp
|
||||||
|
; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
|
||||||
|
; CHECK-DAG: movt [[ONEHI]], #16368
|
||||||
|
; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]!
|
||||||
|
; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4]
|
||||||
|
|
||||||
|
; CHECK-M4F-LABEL: test_1double_misaligned:
|
||||||
|
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||||
|
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||||
|
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||||
|
; CHECK-M4F-DAG: str [[ONELO]], [sp, #8]
|
||||||
|
; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12]
|
||||||
|
; CHECK-M4F: bl test_1double_misaligned
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user