mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
2bad53a983
Summary: SPE passes doubles the same as soft-float, in register pairs as i32 types. This is all handled by the target-independent layer. However, this is not optimal when splitting or reforming the doubles, as it pushes to the stack and loads from, on either side. For instance, to pass a double argument to a function, assuming the double value is in r5, the sequence currently looks like this: evstdd 5, X(1) lwz 3, X(1) lwz 4, X+4(1) Likewise, to form a double into r5 from args in r3 and r4: stw 3, X(1) stw 4, X+4(1) evldd 5, X(1) This optimizes the fence to use SPE instructions. Now, to pass a double to a function: mr 4, 5 evmergehi 3, 5, 5 And to form a double into r5 from args in r3 and r4: evmergelo 5, 3, 4 This is comparable to the way that gcc generates the double splits. This also fixes a bug with expanding builtins to libcalls, where the LowerCallTo() code path was generating intermediate illegal type nodes. Reviewers: nemanjai, hfinkel, joerg Subscribers: kbarton, jfb, jsji, llvm-commits Differential Revision: https://reviews.llvm.org/D54583 llvm-svn: 363526
163 lines
6.2 KiB
C++
163 lines
6.2 KiB
C++
//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PPCRegisterInfo.h"
|
|
#include "PPCCallingConv.h"
|
|
#include "PPCSubtarget.h"
|
|
#include "PPCCCState.h"
|
|
using namespace llvm;
|
|
|
|
inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
|
|
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
|
|
CCState &) {
|
|
llvm_unreachable("The AnyReg calling convention is only supported by the " \
|
|
"stackmap and patchpoint intrinsics.");
|
|
// gracefully fallback to PPC C calling convention on Release builds.
|
|
return false;
|
|
}
|
|
|
|
static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
|
CCValAssign::LocInfo &LocInfo,
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
CCState &State) {
|
|
return true;
|
|
}
|
|
|
|
static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
|
|
MVT &LocVT,
|
|
CCValAssign::LocInfo &LocInfo,
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
CCState &State) {
|
|
static const MCPhysReg ArgRegs[] = {
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
};
|
|
const unsigned NumArgRegs = array_lengthof(ArgRegs);
|
|
|
|
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
|
|
|
|
// Skip one register if the first unallocated register has an even register
|
|
// number and there are still argument registers available which have not been
|
|
// allocated yet. RegNum is actually an index into ArgRegs, which means we
|
|
// need to skip a register if RegNum is odd.
|
|
if (RegNum != NumArgRegs && RegNum % 2 == 1) {
|
|
State.AllocateReg(ArgRegs[RegNum]);
|
|
}
|
|
|
|
// Always return false here, as this function only makes sure that the first
|
|
// unallocated register has an odd register number and does not actually
|
|
// allocate a register for the current argument.
|
|
return false;
|
|
}
|
|
|
|
static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
|
|
unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
|
|
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
|
static const MCPhysReg ArgRegs[] = {
|
|
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
|
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
|
};
|
|
const unsigned NumArgRegs = array_lengthof(ArgRegs);
|
|
|
|
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
|
|
int RegsLeft = NumArgRegs - RegNum;
|
|
|
|
// Skip if there is not enough registers left for long double type (4 gpr regs
|
|
// in soft float mode) and put long double argument on the stack.
|
|
if (RegNum != NumArgRegs && RegsLeft < 4) {
|
|
for (int i = 0; i < RegsLeft; i++) {
|
|
State.AllocateReg(ArgRegs[RegNum + i]);
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
|
|
MVT &LocVT,
|
|
CCValAssign::LocInfo &LocInfo,
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
CCState &State) {
|
|
static const MCPhysReg ArgRegs[] = {
|
|
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
|
|
PPC::F8
|
|
};
|
|
|
|
const unsigned NumArgRegs = array_lengthof(ArgRegs);
|
|
|
|
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
|
|
|
|
// If there is only one Floating-point register left we need to put both f64
|
|
// values of a split ppc_fp128 value on the stack.
|
|
if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
|
|
State.AllocateReg(ArgRegs[RegNum]);
|
|
}
|
|
|
|
// Always return false here, as this function only makes sure that the two f64
|
|
// values a ppc_fp128 value is split into are both passed in registers or both
|
|
// passed on the stack and does not actually allocate a register for the
|
|
// current argument.
|
|
return false;
|
|
}
|
|
|
|
// Split F64 arguments into two 32-bit consecutive registers.
|
|
static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
|
|
MVT &LocVT,
|
|
CCValAssign::LocInfo &LocInfo,
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
CCState &State) {
|
|
static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 };
|
|
static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
|
|
|
|
// Try to get the first register.
|
|
unsigned Reg = State.AllocateReg(HiRegList);
|
|
if (!Reg)
|
|
return false;
|
|
|
|
unsigned i;
|
|
for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
|
|
if (HiRegList[i] == Reg)
|
|
break;
|
|
|
|
unsigned T = State.AllocateReg(LoRegList[i]);
|
|
(void)T;
|
|
assert(T == LoRegList[i] && "Could not allocate register");
|
|
|
|
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
|
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
|
|
LocVT, LocInfo));
|
|
return true;
|
|
}
|
|
|
|
// Same as above, but for return values, so only allocate for R3 and R4
|
|
static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
|
|
MVT &LocVT,
|
|
CCValAssign::LocInfo &LocInfo,
|
|
ISD::ArgFlagsTy &ArgFlags,
|
|
CCState &State) {
|
|
static const MCPhysReg HiRegList[] = { PPC::R3 };
|
|
static const MCPhysReg LoRegList[] = { PPC::R4 };
|
|
|
|
// Try to get the first register.
|
|
unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
|
|
if (!Reg)
|
|
return false;
|
|
|
|
unsigned i;
|
|
for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
|
|
if (HiRegList[i] == Reg)
|
|
break;
|
|
|
|
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
|
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
|
|
LocVT, LocInfo));
|
|
return true;
|
|
}
|
|
|
|
#include "PPCGenCallingConv.inc"
|