diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1b843c42813..672163f569a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -886,10 +886,13 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm, return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0; case ARMII::AddrModeT2_i7s4: return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0; + case ARMII::AddrMode2: case ARMII::AddrModeT2_i8: return std::abs(Imm) < (((1 << 8) * 1) - 1); case ARMII::AddrModeT2_i12: return Imm >= 0 && Imm < (((1 << 12) * 1) - 1); + case ARMII::AddrModeT2_i8s4: + return std::abs(Imm) < (((1 << 8) * 4) - 1) && Imm % 4 == 0; default: llvm_unreachable("Unhandled Addressing mode"); } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5fe61809f31..bf6f7752b40 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1502,12 +1502,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); - if (Offset == Bytes) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); - } else if (!isAM5 && Offset == -Bytes) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); - } else + if (MergeInstr == MBB.end()) return false; + + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + if ((isAM5 && Offset != Bytes) || + (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII)) + return false; + } } LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr); MBB.erase(MergeInstr); @@ -1546,7 +1550,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { (void)MIB; LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB); } else { - int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift); auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) @@ -1576,7 +1580,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { // the vestigal zero-reg offset register. When that's fixed, this clause // can be removed entirely. if (isAM2 && NewOpc == ARM::STR_POST_IMM) { - int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift); // STR_PRE, STR_POST auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) @@ -1629,13 +1633,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); unsigned NewOpc; - if (Offset == 8 || Offset == -8) { + if (Offset != 0) { NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI); - if (Offset == 8 || Offset == -8) { - NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; - } else + if (MergeInstr == MBB.end()) + return false; + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; + if (!isLegalAddressImm(NewOpc, Offset, TII)) return false; } LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr); diff --git a/test/CodeGen/ARM/arm-shrink-wrapping.ll b/test/CodeGen/ARM/arm-shrink-wrapping.ll index b5c63af5a34..ac0e5b76b6b 100644 --- a/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1190,11 +1190,10 @@ define i32 @callVariadicFunc(i32 %cond, i32 %N) "frame-pointer"="all" { ; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then ; THUMB-ENABLE-NEXT: push {r7, lr} ; THUMB-ENABLE-NEXT: mov r7, sp -; THUMB-ENABLE-NEXT: sub sp, #12 +; THUMB-ENABLE-NEXT: strd r1, r1, [sp, #-12]! ; THUMB-ENABLE-NEXT: mov r0, r1 ; THUMB-ENABLE-NEXT: mov r2, r1 ; THUMB-ENABLE-NEXT: mov r3, r1 -; THUMB-ENABLE-NEXT: strd r1, r1, [sp] ; THUMB-ENABLE-NEXT: str r1, [sp, #8] ; THUMB-ENABLE-NEXT: bl _someVariadicFunc ; THUMB-ENABLE-NEXT: lsls r0, r0, #3 diff --git a/test/CodeGen/Thumb2/mve-float32regloops.ll b/test/CodeGen/Thumb2/mve-float32regloops.ll index 7e4603e4b4c..44a152b32c0 100644 --- a/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1715,7 +1715,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(%struct.arm_biquad_casd_ ; CHECK-NEXT: vmov r3, s10 ; CHECK-NEXT: vldrw.u32 q3, [r11, #48] ; CHECK-NEXT: vfma.f32 q1, q0, r3 -; CHECK-NEXT: ldr r3, [r1] +; CHECK-NEXT: ldr r3, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q7, r6 ; CHECK-NEXT: vldrw.u32 q6, [r11, #64] ; CHECK-NEXT: vfma.f32 q1, q3, r3 @@ -1725,7 +1725,6 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(%struct.arm_biquad_casd_ ; CHECK-NEXT: vfma.f32 q1, q5, r0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vfma.f32 q1, q4, r7 -; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vfma.f32 q1, q0, r9 ; CHECK-NEXT: vmov.f32 s2, s8 ; CHECK-NEXT: vstrb.8 q1, [r5], #16