diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index e2331e83b38..f5df1c5e292 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -352,7 +352,6 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const AArch64FunctionInfo *AFI = MF.getInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the @@ -377,14 +376,6 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { if (!MFI.isMaxCallFrameSizeComputed() || MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) return true; - // If there are both SVE and non-SVE objects on the stack, make the frame - // pointer available since it may be more performant to use it. - uint64_t CalleeStackSize = AFI->isCalleeSavedStackSizeComputed() - ? AFI->getCalleeSavedStackSize() - : 0; - uint64_t NonSVEStackSize = MFI.getStackSize() - CalleeStackSize; - if (AFI->getStackSizeSVE() && NonSVEStackSize) - return true; return false; } @@ -1980,6 +1971,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame() && !isSVE) { + // We shouldn't prefer using the FP when there is an SVE area + // in between the FP and the non-SVE locals/spills. + PreferFP &= !SVEStackSize; + // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // @@ -2000,10 +1995,6 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( bool FPOffsetFits = !ForSimm || FPOffset >= -256; PreferFP |= Offset > -FPOffset; - // The FP offset will not fit if there is an SVE area in the way. - if (SVEStackSize && FPOffset < 0) - FPOffsetFits = false; - if (MFI.hasVarSizedObjects()) { // If we have variable sized objects, we can use either FP or BP, as the // SP offset is unknown. We can use the base pointer if we have one and diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 0298d9e5c35..f60e2b6c316 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -248,10 +248,6 @@ public: return getCalleeSavedStackSize(); } - bool isCalleeSavedStackSizeComputed() const { - return HasCalleeSavedStackSize; - } - unsigned getCalleeSavedStackSize() const { assert(HasCalleeSavedStackSize && "CalleeSavedStackSize has not been calculated"); diff --git a/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir index 3e65e40449e..75917ef32ae 100644 --- a/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir +++ b/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir @@ -12,16 +12,16 @@ # CHECK1: : DW_OP_breg31 WSP+16) # CHECK1: DW_AT_type {{.*}}ty32 # -# CHECK2: : DW_OP_breg29 W29+0, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) +# CHECK2: : DW_OP_breg31 WSP+16, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) # CHECK2: DW_AT_type {{.*}}svint32_t # -# CHECK3: : DW_OP_breg29 W29+0, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) +# CHECK3: : DW_OP_breg31 WSP+16, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) # CHECK3: DW_AT_type {{.*}}svint32_t # -# CHECK4: : DW_OP_breg29 W29+0, DW_OP_lit17, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) +# CHECK4: : DW_OP_breg31 WSP+16, DW_OP_lit7, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) # CHECK4: DW_AT_type {{.*}}svbool_t # -# CHECK5: : DW_OP_breg29 W29+0, DW_OP_lit18, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus) +# CHECK5: : DW_OP_breg31 WSP+16, DW_OP_lit6, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus) # CHECK5: DW_AT_type {{.*}}svbool_t --- | diff --git a/test/CodeGen/AArch64/framelayout-sve.mir b/test/CodeGen/AArch64/framelayout-sve.mir index 3418228c350..b3d17cf17bb 100644 --- a/test/CodeGen/AArch64/framelayout-sve.mir +++ b/test/CodeGen/AArch64/framelayout-sve.mir @@ -57,7 +57,6 @@ # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION @@ -68,9 +67,11 @@ # CHECK-NEXT: RET_ReallyLR # ASM-LABEL: test_allocate_sve: -# ASM: .cfi_offset w29, -16 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG +# ASM-NEXT: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: test_allocate_sve stack: - { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 } @@ -95,7 +96,6 @@ body: | # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32 # CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-4: frame-setup CFI_INSTRUCTION @@ -109,11 +109,13 @@ body: | # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_allocate_sve_gpr_callee_saves: -# ASM: .cfi_offset w20, -8 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG +# ASM-NEXT: .cfi_offset w20, -8 # ASM-NEXT: .cfi_offset w21, -16 # ASM-NEXT: .cfi_offset w29, -32 # -# UNWINDINFO: DW_CFA_offset: reg20 -8 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: test_allocate_sve_gpr_callee_saves @@ -182,14 +184,16 @@ body: | # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION -# CHECK-NEXT: STR_ZXI $z0, $fp, -1 -# CHECK-NEXT: STR_ZXI $z1, $fp, -2 -# CHECK-NEXT: STR_PXI $p0, $fp, -17 +# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 +# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2 +# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 +# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1 +# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 +# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 @@ -197,9 +201,11 @@ body: | # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_address_sve: -# ASM: .cfi_offset w29, -16 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG +# ASM-NEXT: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: test_address_sve frameInfo: @@ -292,12 +298,12 @@ body: | # CHECK: bb.0.entry: # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION -# CHECK-NEXT: $x0 = LDRXui $fp, 2 +# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1 +# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 @@ -305,9 +311,11 @@ body: | # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: test_stack_arg_sve: -# ASM: .cfi_offset w29, -16 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG +# ASM-NEXT: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: test_stack_arg_sve fixedStack: @@ -452,9 +460,11 @@ body: | # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_pregs_sve: -# ASM: .cfi_offset w29, -16 +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG +# ASM-NEXT: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: save_restore_pregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -470,7 +480,6 @@ body: | ... # CHECK-LABEL: name: save_restore_zregs_sve # CHECK: $sp = frame-setup STRXpre killed $fp, $sp, -16 -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 # CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 @@ -487,11 +496,13 @@ body: | # CHECK-NEXT: RET_ReallyLR # # ASM-LABEL: save_restore_zregs_sve: -# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG # ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG -# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 @@ -547,7 +558,8 @@ body: | # CHECK: RET_ReallyLR # # ASM-LABEL: save_restore_sve: -# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG +# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG # ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG # ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG @@ -560,7 +572,8 @@ body: | # ASM-NEXT: .cfi_offset w21, -24 # ASM-NEXT: .cfi_offset w29, -32 # -# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus # UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus diff --git a/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/test/CodeGen/AArch64/sve-calling-convention-mixed.ll index f6bcb7e2e2d..806dd7e57de 100644 --- a/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -41,18 +41,15 @@ define float @foo2(double* %x0, double* %x1) nounwind { ; CHECK-LABEL: foo2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: sub sp, sp, #16 // =16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: addvl x8, x29, #-4 +; CHECK-NEXT: add x8, sp, #16 // =16 +; CHECK-NEXT: add x9, sp, #16 // =16 ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: st1d { z16.d }, p0, [x29, #-4, mul vl] -; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 ; CHECK-NEXT: mov w3, #3 @@ -60,8 +57,12 @@ define float @foo2(double* %x0, double* %x1) nounwind { ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: str x8, [sp, #-16]! ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: add sp, sp, #16 // =16