mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
Fix PR4789. Teach eliminateFrameIndex how to handle VLDRQ and VSTRQ which cannot fold any immediate offset.
llvm-svn: 80191
This commit is contained in:
parent
28c0eed122
commit
984f8efcaa
@ -893,9 +893,9 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
|
||||
}
|
||||
}
|
||||
|
||||
int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int Offset,
|
||||
const ARMBaseInstrInfo &TII) {
|
||||
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int &Offset,
|
||||
const ARMBaseInstrInfo &TII) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
const TargetInstrDesc &Desc = MI.getDesc();
|
||||
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
|
||||
@ -912,7 +912,8 @@ int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
MI.setDesc(TII.get(ARM::MOVr));
|
||||
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
||||
MI.RemoveOperand(FrameRegIdx+1);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
} else if (Offset < 0) {
|
||||
Offset = -Offset;
|
||||
isSub = true;
|
||||
@ -924,7 +925,8 @@ int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
// Replace the FrameIndex with sp / fp
|
||||
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
||||
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, pull as much of the immedidate into this ADDri/SUBri
|
||||
@ -962,7 +964,8 @@ int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
break;
|
||||
}
|
||||
case ARMII::AddrMode4:
|
||||
break;
|
||||
// Can't fold any offset even if it's zero.
|
||||
return false;
|
||||
case ARMII::AddrMode5: {
|
||||
ImmIdx = FrameRegIdx+1;
|
||||
InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
|
||||
@ -996,7 +999,8 @@ int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
if (isSub)
|
||||
ImmedOffset |= 1 << NumBits;
|
||||
ImmOp.ChangeToImmediate(ImmedOffset);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, it didn't fit. Pull in what we can to simplify the immed.
|
||||
@ -1008,5 +1012,6 @@ int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
}
|
||||
}
|
||||
|
||||
return (isSub) ? -Offset : Offset;
|
||||
Offset = (isSub) ? -Offset : Offset;
|
||||
return Offset == 0;
|
||||
}
|
||||
|
@ -317,15 +317,16 @@ void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
|
||||
|
||||
|
||||
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
|
||||
/// Rewrite MI to access 'Offset' bytes from the FP. Return the offset that
|
||||
/// could not be handled directly in MI.
|
||||
int rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int Offset,
|
||||
const ARMBaseInstrInfo &TII);
|
||||
/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
|
||||
/// offset could not be handled directly in MI, and return the left-over
|
||||
/// portion by reference.
|
||||
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int &Offset,
|
||||
const ARMBaseInstrInfo &TII);
|
||||
|
||||
int rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int Offset,
|
||||
const ARMBaseInstrInfo &TII);
|
||||
bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int &Offset,
|
||||
const ARMBaseInstrInfo &TII);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
|
@ -1047,19 +1047,24 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
}
|
||||
|
||||
// modify MI as necessary to handle as much of 'Offset' as possible
|
||||
bool Done = false;
|
||||
if (!AFI->isThumbFunction())
|
||||
Offset = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
|
||||
Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
|
||||
else {
|
||||
assert(AFI->isThumb2Function());
|
||||
Offset = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
|
||||
Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
|
||||
}
|
||||
if (Offset == 0)
|
||||
if (Done)
|
||||
return;
|
||||
|
||||
const TargetInstrDesc &Desc = MI.getDesc();
|
||||
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
|
||||
|
||||
// If we get here, the immediate doesn't fit into the instruction. We folded
|
||||
// as much as possible above, handle the rest, providing a register that is
|
||||
// SP+LargeImm.
|
||||
assert(Offset && "This code isn't needed if offset already handled!");
|
||||
assert((Offset || AddrMode == ARMII::AddrMode4) &&
|
||||
"This code isn't needed if offset already handled!");
|
||||
|
||||
// Insert a set of r12 with the full address: r12 = sp + offset
|
||||
// If the offset we have is too large to fit into the instruction, we need
|
||||
@ -1073,15 +1078,20 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
ARMCC::CondCodes Pred = (PIdx == -1)
|
||||
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
|
||||
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
|
||||
if (!AFI->isThumbFunction())
|
||||
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
|
||||
Offset, Pred, PredReg, TII);
|
||||
if (Offset == 0)
|
||||
// Must be addrmode4.
|
||||
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
|
||||
else {
|
||||
assert(AFI->isThumb2Function());
|
||||
emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
|
||||
Offset, Pred, PredReg, TII);
|
||||
if (!AFI->isThumbFunction())
|
||||
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
|
||||
Offset, Pred, PredReg, TII);
|
||||
else {
|
||||
assert(AFI->isThumb2Function());
|
||||
emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
|
||||
Offset, Pred, PredReg, TII);
|
||||
}
|
||||
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
|
||||
}
|
||||
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
|
||||
}
|
||||
|
||||
/// Move iterator pass the next bunch of callee save load / store ops for
|
||||
|
@ -319,9 +319,9 @@ immediateOffsetOpcode(unsigned opcode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int Offset,
|
||||
const ARMBaseInstrInfo &TII) {
|
||||
bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned FrameReg, int &Offset,
|
||||
const ARMBaseInstrInfo &TII) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
const TargetInstrDesc &Desc = MI.getDesc();
|
||||
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
|
||||
@ -340,7 +340,8 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
MI.setDesc(TII.get(ARM::tMOVgpr2gpr));
|
||||
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
||||
MI.RemoveOperand(FrameRegIdx+1);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Offset < 0) {
|
||||
@ -355,7 +356,8 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
if (ARM_AM::getT2SOImmVal(Offset) != -1) {
|
||||
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
||||
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
}
|
||||
// Another common case: imm12.
|
||||
if (Offset < 4096) {
|
||||
@ -365,7 +367,8 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
MI.setDesc(TII.get(NewOpc));
|
||||
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
||||
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, extract 8 adjacent bits from the immediate into this
|
||||
@ -387,7 +390,7 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
|
||||
if (OffsetReg != 0) {
|
||||
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
|
||||
return Offset;
|
||||
return Offset == 0;
|
||||
}
|
||||
|
||||
MI.RemoveOperand(FrameRegIdx+1);
|
||||
@ -413,11 +416,14 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
NumBits = 12;
|
||||
}
|
||||
} else {
|
||||
// VFP address modes.
|
||||
assert(AddrMode == ARMII::AddrMode5);
|
||||
int InstrOffs=ARM_AM::getAM5Offset(MI.getOperand(FrameRegIdx+1).getImm());
|
||||
if (ARM_AM::getAM5Op(MI.getOperand(FrameRegIdx+1).getImm()) ==ARM_AM::sub)
|
||||
InstrOffs *= -1;
|
||||
// VFP and NEON address modes.
|
||||
int InstrOffs = 0;
|
||||
if (AddrMode == ARMII::AddrMode5) {
|
||||
const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
|
||||
InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
|
||||
if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
|
||||
InstrOffs *= -1;
|
||||
}
|
||||
NumBits = 8;
|
||||
Scale = 4;
|
||||
Offset += InstrOffs * 4;
|
||||
@ -448,7 +454,8 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
ImmedOffset = -ImmedOffset;
|
||||
}
|
||||
ImmOp.ChangeToImmediate(ImmedOffset);
|
||||
return 0;
|
||||
Offset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, offset doesn't fit. Pull in what we can to simplify
|
||||
@ -468,5 +475,6 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
Offset &= ~(Mask*Scale);
|
||||
}
|
||||
|
||||
return (isSub) ? -Offset : Offset;
|
||||
Offset = (isSub) ? -Offset : Offset;
|
||||
return Offset == 0;
|
||||
}
|
||||
|
57
test/CodeGen/ARM/spill-q.ll
Normal file
57
test/CodeGen/ARM/spill-q.ll
Normal file
@ -0,0 +1,57 @@
|
||||
; RUN: llvm-as < %s | llc -mtriple=armv7-elf -mattr=+neon | FileCheck %s
|
||||
; PR4789
|
||||
|
||||
%bar = type { float, float, float }
|
||||
%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
|
||||
%foo = type { <4 x float> }
|
||||
%quux = type { i32 (...)**, %baz*, i32 }
|
||||
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
|
||||
|
||||
declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
|
||||
|
||||
define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
|
||||
; CHECK: aaa:
|
||||
; CHECK: vstmia sp
|
||||
; CHECK: vldmia sp
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
store float 6.300000e+01, float* undef, align 4
|
||||
%1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
store float 0.000000e+00, float* undef, align 4
|
||||
%2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
%val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb193, %entry
|
||||
%besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
|
||||
%part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
|
||||
%3 = fmul <4 x float> zeroinitializer, %0 ; <<4 x float>> [#uses=2]
|
||||
%4 = fadd <4 x float> %3, %part0.0.0261 ; <<4 x float>> [#uses=1]
|
||||
%5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
|
||||
%6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
|
||||
%7 = fmul <4 x float> %1, undef ; <<4 x float>> [#uses=1]
|
||||
%8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
|
||||
%9 = fptosi <4 x float> %8 to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%10 = sitofp <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1]
|
||||
%11 = fmul <4 x float> %10, %2 ; <<4 x float>> [#uses=1]
|
||||
%12 = fmul <4 x float> undef, %6 ; <<4 x float>> [#uses=1]
|
||||
%13 = fmul <4 x float> %11, %4 ; <<4 x float>> [#uses=1]
|
||||
%14 = fsub <4 x float> %12, %13 ; <<4 x float>> [#uses=1]
|
||||
%15 = fsub <4 x float> %14, undef ; <<4 x float>> [#uses=1]
|
||||
%16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
|
||||
%17 = fadd <4 x float> %16, undef ; <<4 x float>> [#uses=1]
|
||||
%18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1]
|
||||
%19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
|
||||
%20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
%21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2]
|
||||
%22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
|
||||
br i1 undef, label %bb193, label %bb186
|
||||
|
||||
bb186: ; preds = %bb4
|
||||
br label %bb193
|
||||
|
||||
bb193: ; preds = %bb186, %bb4
|
||||
%besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
|
||||
%23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
br label %bb4
|
||||
}
|
57
test/CodeGen/Thumb2/thumb2-spill-q.ll
Normal file
57
test/CodeGen/Thumb2/thumb2-spill-q.ll
Normal file
@ -0,0 +1,57 @@
|
||||
; RUN: llvm-as < %s | llc -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
|
||||
; PR4789
|
||||
|
||||
%bar = type { float, float, float }
|
||||
%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
|
||||
%foo = type { <4 x float> }
|
||||
%quux = type { i32 (...)**, %baz*, i32 }
|
||||
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
|
||||
|
||||
declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
|
||||
|
||||
define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
|
||||
; CHECK: aaa:
|
||||
; CHECK: vstmia sp
|
||||
; CHECK: vldmia sp
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
store float 6.300000e+01, float* undef, align 4
|
||||
%1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
store float 0.000000e+00, float* undef, align 4
|
||||
%2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
|
||||
%val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb193, %entry
|
||||
%besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
|
||||
%part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
|
||||
%3 = fmul <4 x float> zeroinitializer, %0 ; <<4 x float>> [#uses=2]
|
||||
%4 = fadd <4 x float> %3, %part0.0.0261 ; <<4 x float>> [#uses=1]
|
||||
%5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
|
||||
%6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
|
||||
%7 = fmul <4 x float> %1, undef ; <<4 x float>> [#uses=1]
|
||||
%8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
|
||||
%9 = fptosi <4 x float> %8 to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%10 = sitofp <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1]
|
||||
%11 = fmul <4 x float> %10, %2 ; <<4 x float>> [#uses=1]
|
||||
%12 = fmul <4 x float> undef, %6 ; <<4 x float>> [#uses=1]
|
||||
%13 = fmul <4 x float> %11, %4 ; <<4 x float>> [#uses=1]
|
||||
%14 = fsub <4 x float> %12, %13 ; <<4 x float>> [#uses=1]
|
||||
%15 = fsub <4 x float> %14, undef ; <<4 x float>> [#uses=1]
|
||||
%16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
|
||||
%17 = fadd <4 x float> %16, undef ; <<4 x float>> [#uses=1]
|
||||
%18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1]
|
||||
%19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
|
||||
%20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
%21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2]
|
||||
%22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
|
||||
br i1 undef, label %bb193, label %bb186
|
||||
|
||||
bb186: ; preds = %bb4
|
||||
br label %bb193
|
||||
|
||||
bb193: ; preds = %bb186, %bb4
|
||||
%besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
|
||||
%23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
|
||||
br label %bb4
|
||||
}
|
Loading…
Reference in New Issue
Block a user