[FastISel][AArch64] Implement the FastLowerArguments hook.

This implements basic argument lowering for AArch64 in FastISel. It only handles a small subset of the C calling convention. It supports simple arguments that can be passed in GPR and FPR registers. This should cover most of the trivial cases without falling back to SelectionDAG. This fixes <rdar://problem/17890986>. llvm-svn: 214846
2025-02-01 05:01:59 +01:00 · 2014-08-05 05:43:48 +00:00 · 2014-08-05 05:43:48 +00:00 · ec5a9526be
commit ec5a9526be
parent 2c9a6f5e83
2 changed files with 252 additions and 3 deletions
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@ -94,6 +94,7 @@ class AArch64FastISel : public FastISel {
  const AArch64Subtarget *Subtarget;
  LLVMContext *Context;
  bool FastLowerArguments() override;
  bool FastLowerCall(CallLoweringInfo &CLI) override;
  bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
@ -1313,6 +1314,108 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
  return true;
 }
 bool AArch64FastISel::FastLowerArguments() {
  if (!FuncInfo.CanLowerReturn)
    return false;
  const Function *F = FuncInfo.Fn;
  if (F->isVarArg())
    return false;
  CallingConv::ID CC = F->getCallingConv();
  if (CC != CallingConv::C)
    return false;
  // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
  // FPR each.
  unsigned GPRCnt = 0;
  unsigned FPRCnt = 0;
  unsigned Idx = 0;
  for (auto const &Arg : F->args()) {
    // The first argument is at index 1.
    ++Idx;
    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
      return false;
    Type *ArgTy = Arg.getType();
    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
      return false;
    EVT ArgVT = TLI.getValueType(ArgTy);
    if (!ArgVT.isSimple()) return false;
    switch (ArgVT.getSimpleVT().SimpleTy) {
    default: return false;
    case MVT::i1:
    case MVT::i8:
    case MVT::i16:
    case MVT::i32:
    case MVT::i64:
      ++GPRCnt;
      break;
    case MVT::f16:
    case MVT::f32:
    case MVT::f64:
      ++FPRCnt;
      break;
    }
    if (GPRCnt > 8 || FPRCnt > 8)
      return false;
  }
  static const MCPhysReg Registers[5][8] = {
    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
      AArch64::W5, AArch64::W6, AArch64::W7 },
    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
      AArch64::X5, AArch64::X6, AArch64::X7 },
    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
      AArch64::H5, AArch64::H6, AArch64::H7 },
    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
      AArch64::S5, AArch64::S6, AArch64::S7 },
    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
      AArch64::D5, AArch64::D6, AArch64::D7 }
  };
  unsigned GPRIdx = 0;
  unsigned FPRIdx = 0;
  for (auto const &Arg : F->args()) {
    MVT VT = TLI.getSimpleValueType(Arg.getType());
    unsigned SrcReg;
    switch (VT.SimpleTy) {
    default: llvm_unreachable("Unexpected value type.");
    case MVT::i1:
    case MVT::i8:
    case MVT::i16: VT = MVT::i32; // fall-through
    case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
    case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
    case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
    case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
    case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
    }
    // Skip unused arguments.
    if (Arg.use_empty()) {
      UpdateValueMap(&Arg, 0);
      continue;
    }
    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
    // Without this, EmitLiveInCopies may eliminate the livein if its only
    // use is a bitcast (which isn't turned into an instruction).
    unsigned ResultReg = createResultReg(RC);
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
            TII.get(TargetOpcode::COPY), ResultReg)
      .addReg(DstReg, getKillRegState(true));
    UpdateValueMap(&Arg, ResultReg);
  }
  return true;
 }
 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
                                      SmallVectorImpl<MVT> &OutVTs,
                                      unsigned &NumBytes) {
--- a/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@ -1,6 +1,6 @@
-; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=arm64-apple-darwin   < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=arm64-apple-darwin   < %s | FileCheck %s
-; RUN: llc -O0 -fast-isel-abort -code-model=large -mtriple=arm64-apple-darwin   < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=large -mtriple=arm64-apple-darwin   < %s | FileCheck %s --check-prefix=LARGE
-; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
 define void @call0() nounwind {
 entry:
@ -105,3 +105,149 @@ entry:
  ret void
 }
 define zeroext i1 @call_arguments1(i1 %a1, i1 %a2, i1 %a3, i1 %a4, i1 %a5, i1 %a6, i1 %a7, i1 %a8) {
 ; CHECK-LABEL: call_arguments1
 ; CHECK:       and {{w[0-9]+}}, w0, w1
 ; CHECK-NEXT:  and {{w[0-9]+}}, w2, w3
 ; CHECK-NEXT:  and {{w[0-9]+}}, w4, w5
 ; CHECK-NEXT:  and {{w[0-9]+}}, w6, w7
  %1 = and i1 %a1, %a2
  %2 = and i1 %a3, %a4
  %3 = and i1 %a5, %a6
  %4 = and i1 %a7, %a8
  %5 = and i1 %1, %2
  %6 = and i1 %3, %4
  %7 = and i1 %5, %6
  ret i1 %7
 }
 define i32 @call_arguments2(i8 zeroext %a1, i8 zeroext %a2, i8 zeroext %a3, i8 zeroext %a4, i8 signext %a5, i8 signext %a6, i8 signext %a7, i8 signext %a8) {
 ; CHECK-LABEL: call_arguments2
 ; CHECK:       add {{w[0-9]+}}, w0, w1
 ; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
 ; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
 ; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
  %a1z = zext i8 %a1 to i32
  %a2z = zext i8 %a2 to i32
  %a3z = zext i8 %a3 to i32
  %a4z = zext i8 %a4 to i32
  %a5s = sext i8 %a5 to i32
  %a6s = sext i8 %a6 to i32
  %a7s = sext i8 %a7 to i32
  %a8s = sext i8 %a8 to i32
  %1 = add i32 %a1z, %a2z
  %2 = add i32 %a3z, %a4z
  %3 = add i32 %a5s, %a6s
  %4 = add i32 %a7s, %a8s
  %5 = add i32 %1, %2
  %6 = add i32 %3, %4
  %7 = add i32 %5, %6
  ret i32 %7
 }
 define i32 @call_arguments3(i16 zeroext %a1, i16 zeroext %a2, i16 zeroext %a3, i16 zeroext %a4, i16 signext %a5, i16 signext %a6, i16 signext %a7, i16 signext %a8) {
 ; CHECK-LABEL: call_arguments3
 ; CHECK:       add {{w[0-9]+}}, w0, w1
 ; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
 ; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
 ; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
  %a1z = zext i16 %a1 to i32
  %a2z = zext i16 %a2 to i32
  %a3z = zext i16 %a3 to i32
  %a4z = zext i16 %a4 to i32
  %a5s = sext i16 %a5 to i32
  %a6s = sext i16 %a6 to i32
  %a7s = sext i16 %a7 to i32
  %a8s = sext i16 %a8 to i32
  %1 = add i32 %a1z, %a2z
  %2 = add i32 %a3z, %a4z
  %3 = add i32 %a5s, %a6s
  %4 = add i32 %a7s, %a8s
  %5 = add i32 %1, %2
  %6 = add i32 %3, %4
  %7 = add i32 %5, %6
  ret i32 %7
 }
 define i32 @call_arguments4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
 ; CHECK-LABEL: call_arguments4
 ; CHECK:       add {{w[0-9]+}}, w0, w1
 ; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
 ; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
 ; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
  %1 = add i32 %a1, %a2
  %2 = add i32 %a3, %a4
  %3 = add i32 %a5, %a6
  %4 = add i32 %a7, %a8
  %5 = add i32 %1, %2
  %6 = add i32 %3, %4
  %7 = add i32 %5, %6
  ret i32 %7
 }
 define i64 @call_arguments5(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8) {
 ; CHECK-LABEL: call_arguments5
 ; CHECK:       add {{x[0-9]+}}, x0, x1
 ; CHECK-NEXT:  add {{x[0-9]+}}, x2, x3
 ; CHECK-NEXT:  add {{x[0-9]+}}, x4, x5
 ; CHECK-NEXT:  add {{x[0-9]+}}, x6, x7
  %1 = add i64 %a1, %a2
  %2 = add i64 %a3, %a4
  %3 = add i64 %a5, %a6
  %4 = add i64 %a7, %a8
  %5 = add i64 %1, %2
  %6 = add i64 %3, %4
  %7 = add i64 %5, %6
  ret i64 %7
 }
 define float @call_arguments6(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8) {
 ; CHECK-LABEL: call_arguments6
 ; CHECK:       fadd {{s[0-9]+}}, s0, s1
 ; CHECK-NEXT:  fadd {{s[0-9]+}}, s2, s3
 ; CHECK-NEXT:  fadd {{s[0-9]+}}, s4, s5
 ; CHECK-NEXT:  fadd {{s[0-9]+}}, s6, s7
  %1 = fadd float %a1, %a2
  %2 = fadd float %a3, %a4
  %3 = fadd float %a5, %a6
  %4 = fadd float %a7, %a8
  %5 = fadd float %1, %2
  %6 = fadd float %3, %4
  %7 = fadd float %5, %6
  ret float %7
 }
 define double @call_arguments7(double %a1, double %a2, double %a3, double %a4, double %a5, double %a6, double %a7, double %a8) {
 ; CHECK-LABEL: call_arguments7
 ; CHECK:       fadd {{d[0-9]+}}, d0, d1
 ; CHECK-NEXT:  fadd {{d[0-9]+}}, d2, d3
 ; CHECK-NEXT:  fadd {{d[0-9]+}}, d4, d5
 ; CHECK-NEXT:  fadd {{d[0-9]+}}, d6, d7
  %1 = fadd double %a1, %a2
  %2 = fadd double %a3, %a4
  %3 = fadd double %a5, %a6
  %4 = fadd double %a7, %a8
  %5 = fadd double %1, %2
  %6 = fadd double %3, %4
  %7 = fadd double %5, %6
  ret double %7
 }
 define i64 @call_arguments8(i32 %a1, i64 %a2, i32 %a3, i64 %a4) {
 ; CHECK-LABEL: call_arguments8
 ; CHECK:       ubfx  [[REG1:x[0-9]+]], {{x[0-9]+}}, #0, #32
 ; CHECK:       ubfx  [[REG2:x[0-9]+]], {{x[0-9]+}}, #0, #32
 ; CHECK:       add {{x[0-9]+}}, [[REG1]], x1
 ; CHECK-NEXT:  add {{x[0-9]+}}, [[REG2]], x3
  %aa1 = zext i32 %a1 to i64
  %aa3 = zext i32 %a3 to i64
  %1 = add i64 %aa1, %a2
  %2 = add i64 %aa3, %a4
  %3 = add i64 %1, %2
  ret i64 %3
 }
 define void @call_arguments9(i8 %a1, i16 %a2, i32 %a3, i64 %a4, float %a5, double %a6, i64 %a7, double %a8) {
 ; CHECK-LABEL: call_arguments9
  ret void
 }