[ARM64-BE] Make big endian (scalar) argument passing work correctly.

This completes the port of r204814 (cpirker "AArch64_BE function argument passing for ARM ABI") from AArch64 to ARM64, and fixes a bunch of issues found during later development along the way. The biggest of these was that the alignment fixup logic wasn't replicated into all the places it should have been. llvm-svn: 208192
2024-11-24 03:33:20 +01:00 · 2014-05-07 11:28:36 +00:00 · 2014-05-07 11:28:36 +00:00 · c6eeb59eb7
commit c6eeb59eb7
parent 328bcb73da
6 changed files with 67 additions and 15 deletions
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/ARM64/ARM64ISelLowering.cpp
@ -1678,8 +1678,10 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
      int Size = Ins[i].Flags.getByValSize();
      unsigned NumRegs = (Size + 7) / 8;

+      // FIXME: This works on big-endian for composite byvals, which are the common
+      // case. It should also work for fundamental types too.
      unsigned FrameIdx =
-          MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+        MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
      InVals.push_back(FrameIdxN);

@ -1737,13 +1739,33 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
      assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
      unsigned ArgOffset = VA.getLocMemOffset();
      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
-      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+
+      uint32_t BEAlign = 0;
+      if (ArgSize < 8 && !Subtarget->isLittleEndian())
+        BEAlign = 8 - ArgSize;
+
+      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);

      // Create load nodes to retrieve arguments from the stack.
      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
-                                   MachinePointerInfo::getFixedStack(FI), false,
-                                   false, false, 0));
+      SDValue ArgValue;
+
+      // If the loc type and val type are not the same, create an anyext load.
+      if (VA.getLocVT().getSizeInBits() != VA.getValVT().getSizeInBits()) {
+        // We should only get here if this is a pure integer.
+        assert(!VA.getValVT().isVector() && VA.getValVT().isInteger() &&
+               "Only integer extension supported!");
+        ArgValue = DAG.getExtLoad(ISD::EXTLOAD, DL, VA.getValVT(), Chain, FIN,
+                                  MachinePointerInfo::getFixedStack(FI),
+                                  VA.getLocVT(),
+                                  false, false, false, 0);
+      } else {
+        ArgValue = DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
+                               MachinePointerInfo::getFixedStack(FI), false,
+                               false, false, 0);
+      }
+
+      InVals.push_back(ArgValue);
    }
  }

@ -2089,8 +2111,18 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
      // There's no reason we can't support stack args w/ tailcall, but
      // we currently don't, so assert if we see one.
      assert(!IsTailCall && "stack argument with tail call!?");
+
+      // FIXME: This works on big-endian for composite byvals, which are the common
+      // case. It should also work for fundamental types too.
+      uint32_t BEAlign = 0;
+      if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+        unsigned OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8;
+        if (OpSize < 8)
+          BEAlign = 8 - OpSize;
+      }
+
      unsigned LocMemOffset = VA.getLocMemOffset();
-      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset + BEAlign);
      PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);

      if (Outs[i].Flags.isByVal()) {
--- a/test/CodeGen/AArch64/adc.ll
+++ b/test/CodeGen/AArch64/adc.ll
@ -1,6 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s

 define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
 ; CHECK-LABEL: test_simple:
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@ -1,9 +1,12 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s

 %myStruct = type { i64 , i8, i32 }

@ -152,7 +155,7 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
    %retval = load volatile i32* %stacked
    ret i32 %retval
 ; CHECK-LE: ldr w0, [sp, #16]
-; CHECK-BE: ldr w0, [sp, #20]
+; CHECK-BE-AARCH64: ldr w0, [sp, #20]
 }

 define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
@ -162,8 +165,10 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
    store float %var8, float* @varfloat
    ; Beware as above: the offset would be different on big-endian
    ; machines if the first ldr were changed to use s-registers.
-; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
-; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]

    ret void
 }
@ -188,7 +193,7 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
    ; Nothing local on stack in current codegen, so first stack is 16 away
 ; CHECK-LE: add     x[[REG:[0-9]+]], sp, #16
 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8]
-; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24]
+; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24]

    ; Important point is that we address sp+24 for second dword
 ; CHECK-AARCH64: ldr     {{x[0-9]+}}, [sp, #16]
@ -205,3 +210,14 @@ define i32 @test_extern() {
 ; CHECK: bl memcpy
  ret i32 0
 }
+
+
+; A sub-i32 stack argument must be loaded on big endian with ldr{h,b}, not just
+; implicitly extended to a 32-bit load.
+define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                        i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                        i16 %stack1) {
+; CHECK-LABEL: stacked_i16
+; CHECK-ARM64-BE: ldrh
+  ret i16 %stack1
+}
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@ -2,9 +2,11 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s
+
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s

 %myStruct = type { i64 , i8, i32 }

@ -149,9 +151,9 @@ define void @check_i128_align() {

  call void @check_i128_regalign(i32 0, i128 42)
 ; CHECK-NOT: mov x1
-; CHECK-LE: movz x2, #42
+; CHECK-LE: movz x2, #{{0x2a|42}}
 ; CHECK-LE: mov x3, xzr
-; CHECK-BE: movz x3, #42
+; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}}
 ; CHECK-BE: mov x2, xzr
 ; CHECK: bl check_i128_regalign

--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
 ; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
 ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s

 define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: test_128bitmul:
--- a/test/CodeGen/ARM64/aapcs.ll
+++ b/test/CodeGen/ARM64/aapcs.ll
@ -21,7 +21,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,

  %ext_bool = zext i1 %bool to i64
  store volatile i64 %ext_bool, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp]
+; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
 ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
 ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]

@ -37,7 +37,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,

  %ext_int = zext i32 %int to i64
  store volatile i64 %ext_int, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24]
+; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24]
 ; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]

  store volatile i64 %long, i64* @var64, align 8