1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[FastISel][AArch64] Implement the FastLowerArguments hook.

This implements basic argument lowering for AArch64 in FastISel. It only
handles a small subset of the C calling convention. It supports simple
arguments that can be passed in GPR and FPR registers.

This should cover most of the trivial cases without falling back to
SelectionDAG.

This fixes <rdar://problem/17890986>.

llvm-svn: 214846
This commit is contained in:
Juergen Ributzka 2014-08-05 05:43:48 +00:00
parent 2c9a6f5e83
commit ec5a9526be
2 changed files with 252 additions and 3 deletions

View File

@ -94,6 +94,7 @@ class AArch64FastISel : public FastISel {
const AArch64Subtarget *Subtarget; const AArch64Subtarget *Subtarget;
LLVMContext *Context; LLVMContext *Context;
bool FastLowerArguments() override;
bool FastLowerCall(CallLoweringInfo &CLI) override; bool FastLowerCall(CallLoweringInfo &CLI) override;
bool FastLowerIntrinsicCall(const IntrinsicInst *II) override; bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
@ -1313,6 +1314,108 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
return true; return true;
} }
bool AArch64FastISel::FastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::C)
return false;
// Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
// FPR each.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
unsigned Idx = 0;
for (auto const &Arg : F->args()) {
// The first argument is at index 1.
++Idx;
if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
F->getAttributes().hasAttribute(Idx, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
EVT ArgVT = TLI.getValueType(ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
++GPRCnt;
break;
case MVT::f16:
case MVT::f32:
case MVT::f64:
++FPRCnt;
break;
}
if (GPRCnt > 8 || FPRCnt > 8)
return false;
}
static const MCPhysReg Registers[5][8] = {
{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
AArch64::W5, AArch64::W6, AArch64::W7 },
{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
AArch64::X5, AArch64::X6, AArch64::X7 },
{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
AArch64::H5, AArch64::H6, AArch64::H7 },
{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
AArch64::S5, AArch64::S6, AArch64::S7 },
{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
AArch64::D5, AArch64::D6, AArch64::D7 }
};
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
MVT VT = TLI.getSimpleValueType(Arg.getType());
unsigned SrcReg;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1:
case MVT::i8:
case MVT::i16: VT = MVT::i32; // fall-through
case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
}
// Skip unused arguments.
if (Arg.use_empty()) {
UpdateValueMap(&Arg, 0);
continue;
}
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
UpdateValueMap(&Arg, ResultReg);
}
return true;
}
bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI, bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
SmallVectorImpl<MVT> &OutVTs, SmallVectorImpl<MVT> &OutVTs,
unsigned &NumBytes) { unsigned &NumBytes) {

View File

@ -1,6 +1,6 @@
; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=arm64-apple-darwin < %s | FileCheck %s ; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=arm64-apple-darwin < %s | FileCheck %s
; RUN: llc -O0 -fast-isel-abort -code-model=large -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE ; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=large -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE
; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE ; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
define void @call0() nounwind { define void @call0() nounwind {
entry: entry:
@ -105,3 +105,149 @@ entry:
ret void ret void
} }
define zeroext i1 @call_arguments1(i1 %a1, i1 %a2, i1 %a3, i1 %a4, i1 %a5, i1 %a6, i1 %a7, i1 %a8) {
; CHECK-LABEL: call_arguments1
; CHECK: and {{w[0-9]+}}, w0, w1
; CHECK-NEXT: and {{w[0-9]+}}, w2, w3
; CHECK-NEXT: and {{w[0-9]+}}, w4, w5
; CHECK-NEXT: and {{w[0-9]+}}, w6, w7
%1 = and i1 %a1, %a2
%2 = and i1 %a3, %a4
%3 = and i1 %a5, %a6
%4 = and i1 %a7, %a8
%5 = and i1 %1, %2
%6 = and i1 %3, %4
%7 = and i1 %5, %6
ret i1 %7
}
define i32 @call_arguments2(i8 zeroext %a1, i8 zeroext %a2, i8 zeroext %a3, i8 zeroext %a4, i8 signext %a5, i8 signext %a6, i8 signext %a7, i8 signext %a8) {
; CHECK-LABEL: call_arguments2
; CHECK: add {{w[0-9]+}}, w0, w1
; CHECK-NEXT: add {{w[0-9]+}}, w2, w3
; CHECK-NEXT: add {{w[0-9]+}}, w4, w5
; CHECK-NEXT: add {{w[0-9]+}}, w6, w7
%a1z = zext i8 %a1 to i32
%a2z = zext i8 %a2 to i32
%a3z = zext i8 %a3 to i32
%a4z = zext i8 %a4 to i32
%a5s = sext i8 %a5 to i32
%a6s = sext i8 %a6 to i32
%a7s = sext i8 %a7 to i32
%a8s = sext i8 %a8 to i32
%1 = add i32 %a1z, %a2z
%2 = add i32 %a3z, %a4z
%3 = add i32 %a5s, %a6s
%4 = add i32 %a7s, %a8s
%5 = add i32 %1, %2
%6 = add i32 %3, %4
%7 = add i32 %5, %6
ret i32 %7
}
define i32 @call_arguments3(i16 zeroext %a1, i16 zeroext %a2, i16 zeroext %a3, i16 zeroext %a4, i16 signext %a5, i16 signext %a6, i16 signext %a7, i16 signext %a8) {
; CHECK-LABEL: call_arguments3
; CHECK: add {{w[0-9]+}}, w0, w1
; CHECK-NEXT: add {{w[0-9]+}}, w2, w3
; CHECK-NEXT: add {{w[0-9]+}}, w4, w5
; CHECK-NEXT: add {{w[0-9]+}}, w6, w7
%a1z = zext i16 %a1 to i32
%a2z = zext i16 %a2 to i32
%a3z = zext i16 %a3 to i32
%a4z = zext i16 %a4 to i32
%a5s = sext i16 %a5 to i32
%a6s = sext i16 %a6 to i32
%a7s = sext i16 %a7 to i32
%a8s = sext i16 %a8 to i32
%1 = add i32 %a1z, %a2z
%2 = add i32 %a3z, %a4z
%3 = add i32 %a5s, %a6s
%4 = add i32 %a7s, %a8s
%5 = add i32 %1, %2
%6 = add i32 %3, %4
%7 = add i32 %5, %6
ret i32 %7
}
define i32 @call_arguments4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
; CHECK-LABEL: call_arguments4
; CHECK: add {{w[0-9]+}}, w0, w1
; CHECK-NEXT: add {{w[0-9]+}}, w2, w3
; CHECK-NEXT: add {{w[0-9]+}}, w4, w5
; CHECK-NEXT: add {{w[0-9]+}}, w6, w7
%1 = add i32 %a1, %a2
%2 = add i32 %a3, %a4
%3 = add i32 %a5, %a6
%4 = add i32 %a7, %a8
%5 = add i32 %1, %2
%6 = add i32 %3, %4
%7 = add i32 %5, %6
ret i32 %7
}
define i64 @call_arguments5(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8) {
; CHECK-LABEL: call_arguments5
; CHECK: add {{x[0-9]+}}, x0, x1
; CHECK-NEXT: add {{x[0-9]+}}, x2, x3
; CHECK-NEXT: add {{x[0-9]+}}, x4, x5
; CHECK-NEXT: add {{x[0-9]+}}, x6, x7
%1 = add i64 %a1, %a2
%2 = add i64 %a3, %a4
%3 = add i64 %a5, %a6
%4 = add i64 %a7, %a8
%5 = add i64 %1, %2
%6 = add i64 %3, %4
%7 = add i64 %5, %6
ret i64 %7
}
define float @call_arguments6(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8) {
; CHECK-LABEL: call_arguments6
; CHECK: fadd {{s[0-9]+}}, s0, s1
; CHECK-NEXT: fadd {{s[0-9]+}}, s2, s3
; CHECK-NEXT: fadd {{s[0-9]+}}, s4, s5
; CHECK-NEXT: fadd {{s[0-9]+}}, s6, s7
%1 = fadd float %a1, %a2
%2 = fadd float %a3, %a4
%3 = fadd float %a5, %a6
%4 = fadd float %a7, %a8
%5 = fadd float %1, %2
%6 = fadd float %3, %4
%7 = fadd float %5, %6
ret float %7
}
define double @call_arguments7(double %a1, double %a2, double %a3, double %a4, double %a5, double %a6, double %a7, double %a8) {
; CHECK-LABEL: call_arguments7
; CHECK: fadd {{d[0-9]+}}, d0, d1
; CHECK-NEXT: fadd {{d[0-9]+}}, d2, d3
; CHECK-NEXT: fadd {{d[0-9]+}}, d4, d5
; CHECK-NEXT: fadd {{d[0-9]+}}, d6, d7
%1 = fadd double %a1, %a2
%2 = fadd double %a3, %a4
%3 = fadd double %a5, %a6
%4 = fadd double %a7, %a8
%5 = fadd double %1, %2
%6 = fadd double %3, %4
%7 = fadd double %5, %6
ret double %7
}
define i64 @call_arguments8(i32 %a1, i64 %a2, i32 %a3, i64 %a4) {
; CHECK-LABEL: call_arguments8
; CHECK: ubfx [[REG1:x[0-9]+]], {{x[0-9]+}}, #0, #32
; CHECK: ubfx [[REG2:x[0-9]+]], {{x[0-9]+}}, #0, #32
; CHECK: add {{x[0-9]+}}, [[REG1]], x1
; CHECK-NEXT: add {{x[0-9]+}}, [[REG2]], x3
%aa1 = zext i32 %a1 to i64
%aa3 = zext i32 %a3 to i64
%1 = add i64 %aa1, %a2
%2 = add i64 %aa3, %a4
%3 = add i64 %1, %2
ret i64 %3
}
define void @call_arguments9(i8 %a1, i16 %a2, i32 %a3, i64 %a4, float %a5, double %a6, i64 %a7, double %a8) {
; CHECK-LABEL: call_arguments9
ret void
}