From fdd124178e9bf5d0a33119700f6cf0f4501f6b38 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 18 Jul 2013 04:28:21 +0000 Subject: [PATCH] PPC: Support dynamic allocas with large alignment Support for dynamic stack alignments in the PPC backend has been unfinished, in part because it depends on dynamic stack realignment (which I only just recently implemented fully). Now we can also support dynamic allocas with higher than the default target stack alignment (16 bytes). In order to round-up the requested size to the maximum requested alignment, we need an additional register to hold the rounded-up size. We're already using one scavenged register to hold the previous stack-pointer value (which needs to be stored with the signal-safe stdux update), and so when we have dynamic allocas and a large alignment, we allocate two emergency spill slots for the scavenger. llvm-svn: 186562 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 6 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 74 ++++++++++++++-------- test/CodeGen/PowerPC/dyn-alloca-aligned.ll | 39 ++++++++++++ 3 files changed, 92 insertions(+), 27 deletions(-) create mode 100644 test/CodeGen/PowerPC/dyn-alloca-aligned.ll diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index d846365112d..24d3a0b951a 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1237,8 +1237,12 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, RC->getAlignment(), false)); + // Might we have over-aligned allocas? + bool HasAlVars = MFI->hasVarSizedObjects() && + MFI->getMaxAlignment() > getStackAlignment(); + // These kinds of spills might need two registers. - if (spillsCR(MF) || spillsVRSAVE(MF)) + if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index fdc604a8457..b762a57b9c9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -269,8 +269,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Get stack alignments. unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); - if (MaxAlign > TargetAlign) - report_fatal_error("Dynamic alloca with large aligns not supported"); + assert((maxCallFrameSize & (MaxAlign-1)) == 0 && + "Maximum call-frame size not sufficiently aligned"); // Determine the previous frame's address. If FrameSize can't be // represented as 16 bits or we need special alignment, then we load the @@ -295,40 +295,62 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { .addImm(0) .addReg(PPC::R1); } - + + bool KillNegSizeReg = MI.getOperand(1).isKill(); + unsigned NegSizeReg = MI.getOperand(1).getReg(); + // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { + if (MaxAlign > TargetAlign) { + unsigned UnalNegSizeReg = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC); + + // Unfortunately, there is no andi, only andi., and we can't insert that + // here because we might clobber cr0 while it is live. + BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg) + .addImm(~(MaxAlign-1)); + + unsigned NegSizeReg1 = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC); + BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg) + .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg)) + .addReg(NegSizeReg1, RegState::Kill); + KillNegSizeReg = true; + } + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(Reg, RegState::Kill) .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - if (!MI.getOperand(1).isKill()) - BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) - .addReg(PPC::X1) - .addImm(maxCallFrameSize); - else - // Implicitly kill the register. - BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) - .addReg(PPC::X1) - .addImm(maxCallFrameSize) - .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + .addReg(NegSizeReg, getKillRegState(KillNegSizeReg)); + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize); } else { + if (MaxAlign > TargetAlign) { + unsigned UnalNegSizeReg = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC); + + // Unfortunately, there is no andi, only andi., and we can't insert that + // here because we might clobber cr0 while it is live. + BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg) + .addImm(~(MaxAlign-1)); + + unsigned NegSizeReg1 = NegSizeReg; + NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC); + BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg) + .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg)) + .addReg(NegSizeReg1, RegState::Kill); + KillNegSizeReg = true; + } + BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(Reg, RegState::Kill) .addReg(PPC::R1) - .addReg(MI.getOperand(1).getReg()); - - if (!MI.getOperand(1).isKill()) - BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) - .addReg(PPC::R1) - .addImm(maxCallFrameSize); - else - // Implicitly kill the register. - BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) - .addReg(PPC::R1) - .addImm(maxCallFrameSize) - .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + .addReg(NegSizeReg, getKillRegState(KillNegSizeReg)); + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize); } // Discard the DYNALLOC instruction. diff --git a/test/CodeGen/PowerPC/dyn-alloca-aligned.ll b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll new file mode 100644 index 00000000000..a18ada73ff1 --- /dev/null +++ b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s = type { i32, i32 } + +declare void @bar(i32*, i32*) #0 + +define void @goo(%struct.s* byval nocapture readonly %a, i32 signext %n) #0 { +entry: + %0 = zext i32 %n to i64 + %vla = alloca i32, i64 %0, align 128 + %vla1 = alloca i32, i64 %0, align 128 + %a2 = getelementptr inbounds %struct.s* %a, i64 0, i32 0 + %1 = load i32* %a2, align 4, !tbaa !0 + store i32 %1, i32* %vla1, align 128, !tbaa !0 + %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1 + %2 = load i32* %b, align 4, !tbaa !0 + %arrayidx3 = getelementptr inbounds i32* %vla1, i64 1 + store i32 %2, i32* %arrayidx3, align 4, !tbaa !0 + call void @bar(i32* %vla1, i32* %vla) #0 + ret void + +; CHECK-LABEL: @goo + +; CHECK-DAG: li [[REG1:[0-9]+]], -128 +; CHECK-DAG: neg [[REG2:[0-9]+]], +; CHECK: and [[REG1]], [[REG2]], [[REG1]] +; CHECK: stdux {{[0-9]+}}, 1, [[REG1]] + +; CHECK: blr + +} + +attributes #0 = { nounwind } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"}