AArch64: support i128 (& larger) returns in GlobalISel

2025-01-31 12:41:49 +01:00 · 2021-07-26 13:53:34 +01:00 · 2021-07-26 13:53:34 +01:00 · c8cc09ffa5
commit c8cc09ffa5
parent 845ad210b0
4 changed files with 65 additions and 37 deletions
--- a/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@ -362,11 +362,6 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
    CallingConv::ID CC = F.getCallingConv();

    for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
-      if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
-        LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
-        return false;
-      }
-
      Register CurVReg = VRegs[i];
      ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
      setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
@ -375,7 +370,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
      // when widened using ANYEXT. We need to do it explicitly here.
      if (MRI.getType(CurVReg).getSizeInBits() == 1) {
        CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
-      } else {
+      } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
+                 1) {
        // Some types will need extending as specified by the CC.
        MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
        if (EVT(NewVT) != SplitEVTs[i]) {
--- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@ -1,5 +1,3 @@
-; RUN: not --crash llc -O0 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
-; RUN: llc -O0 -global-isel -global-isel-abort=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=FALLBACK
 ; RUN: llc -O0 -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o %t.out 2> %t.err
 ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out
 ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-ERR < %t.err
@ -15,23 +13,6 @@ target triple = "aarch64--"

 ; BIG-ENDIAN: unable to translate in big endian mode

-; We use __fixunstfti as the common denominator for __fixunstfti on Linux and
-; ___fixunstfti on iOS
-; ERROR: unable to translate instruction: ret
-; FALLBACK: ldr q0,
-; FALLBACK-NEXT: bl __fixunstfti
-;
-; FALLBACK-WITH-REPORT-ERR: unable to translate instruction: ret
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ABIi128
-; FALLBACK-WITH-REPORT-OUT-LABEL: ABIi128:
-; FALLBACK-WITH-REPORT-OUT: ldr q0,
-; FALLBACK-WITH-REPORT-OUT-NEXT: bl __fixunstfti
-define i128 @ABIi128(i128 %arg1) {
-  %farg1 =       bitcast i128 %arg1 to fp128
-  %res = fptoui fp128 %farg1 to i128
-  ret i128 %res
-}
-
 ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %3:_(<3 x s32>), %4:_(p0) :: (store (<3 x s32>) into %ir.addr + 16, align 16, basealign 32) (in function: odd_vector)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector
 ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector:
@ -41,15 +22,6 @@ define void @odd_vector(<7 x i32>* %addr) {
  ret void
 }

-  ; AArch64 was asserting instead of returning an invalid mapping for unknown
-  ; sizes.
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: ret: '  ret i128 undef' (in function: sequence_sizes)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for sequence_sizes
-; FALLBACK-WITH-REPORT-LABEL: sequence_sizes:
-define i128 @sequence_sizes([8 x i8] %in) {
-  ret i128 undef
-}
-
 ; Make sure we don't mess up metadata arguments.
 declare void @llvm.write_register.i64(metadata, i64)

--- a/test/CodeGen/AArch64/GlobalISel/translate-ret.ll
+++ b/test/CodeGen/AArch64/GlobalISel/translate-ret.ll
@ -0,0 +1,59 @@
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -global-isel -global-isel-abort=1 -stop-after=irtranslator | FileCheck %s
+
+define i128 @func_i128(i128* %ptr) {
+; CHECK-LABEL: name: func_i128
+; CHECK: [[PTR:%.*]]:_(p0) = COPY $x0
+; CHECK: [[VAL:%.*]]:_(s128) = G_LOAD [[PTR]]
+; CHECK: [[LO:%.*]]:_(s64), [[HI:%.*]]:_(s64) = G_UNMERGE_VALUES [[VAL]]
+; CHECK: $x0 = COPY [[LO]]
+; CHECK: $x1 = COPY [[HI]]
+; CHECK: RET_ReallyLR
+
+  %val = load i128, i128* %ptr
+  ret i128 %val
+}
+
+define <8 x float> @func_v8f32(<8 x float>* %ptr) {
+; CHECK-LABEL: name: func_v8f32
+; CHECK: [[PTR:%.*]]:_(p0) = COPY $x0
+; CHECK: [[VAL:%.*]]:_(<8 x s32>) = G_LOAD [[PTR]]
+; CHECK: [[LO:%.*]]:_(<4 x s32>), [[HI:%.*]]:_(<4 x s32>) = G_UNMERGE_VALUES [[VAL]]
+; CHECK: $q0 = COPY [[LO]]
+; CHECK: $q1 = COPY [[HI]]
+; CHECK: RET_ReallyLR
+
+  %val = load <8 x float>, <8 x float>* %ptr
+  ret <8 x float> %val
+}
+
+; A bit weird, but s0-s5 is what SDAG does too.
+define <6 x float> @func_v6f32(<6 x float>* %ptr) {
+; CHECK-LABEL: name: func_v6f32
+; CHECK: [[PTR:%.*]]:_(p0) = COPY $x0
+; CHECK: [[VAL:%.*]]:_(<6 x s32>) = G_LOAD [[PTR]]
+; CHECK: [[V1:%.*]]:_(s32), [[V2:%.*]]:_(s32), [[V3:%.*]]:_(s32), [[V4:%.*]]:_(s32), [[V5:%.*]]:_(s32), [[V6:%.*]]:_(s32) = G_UNMERGE_VALUES [[VAL]]
+; CHECK: $s0 = COPY [[V1]]
+; CHECK: $s1 = COPY [[V2]]
+; CHECK: $s2 = COPY [[V3]]
+; CHECK: $s3 = COPY [[V4]]
+; CHECK: $s4 = COPY [[V5]]
+; CHECK: $s5 = COPY [[V6]]
+; CHECK: RET_ReallyLR
+
+  %val = load <6 x float>, <6 x float>* %ptr
+  ret <6 x float> %val
+}
+
+define i128 @ABIi128(i128 %arg1) {
+; CHECK-LABEL: name: ABIi128
+; CHECK: [[LO:%.*]]:_(s64) = COPY $x0
+; CHECK: [[HI:%.*]]:_(s64) = COPY $x1
+; CHECK: [[IN:%.*]]:_(s128) = G_MERGE_VALUES [[LO]](s64), [[HI]](s64)
+; CHECK: [[IN_FP:%.*]]:_(s128) = G_FPTOUI [[IN]](s128)
+; CHECK: [[LO_OUT:%.*]]:_(s64), [[HI_OUT:%.*]]:_(s64) = G_UNMERGE_VALUES [[IN_FP]](s128)
+; CHECK: $x0 = COPY [[LO_OUT]]
+; CHECK: $x1 = COPY [[HI_OUT]]
+  %farg1 =       bitcast i128 %arg1 to fp128
+  %res = fptoui fp128 %farg1 to i128
+  ret i128 %res
+}
--- a/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/test/CodeGen/AArch64/arm64-subvector-extend.ll
@ -259,9 +259,10 @@ define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
 ; CHECK-NEXT:   .cfi_startproc
 ; CHECK-NEXT:    fmov    x8, d0
 ; CHECK-NEXT:    asr x1, x8, #63
-; CHECK-NEXT:    mov.d   v0[1], x1
-; CHECK-NEXT:    fmov    x0, d0
-; CHECK-NEXT:    ret
+  ; X0 & X1 are the real return registers, SDAG messes with v0 too for unknown reasons.
+; CHECK:    {{(mov.d   v0[1], x1)?}}
+; CHECK:    fmov    x0, d0
+; CHECK:    ret
 ;
  %res = sext <1 x i64> %arg to <1 x i128>
  ret <1 x i128> %res