diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index acd71bce015..25237bf50dd 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -678,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && "Expected promotable zero stores."); - MachineBasicBlock::iterator NextI = I; - ++NextI; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == MergeMI) - ++NextI; + NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); bool IsScaled = !TII->isUnscaledLdSt(Opc); @@ -748,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function &Fn) { auto MBB = MI.getParent(); - for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(), - E = MBB->rend(); - I != E; I++) { + for (MachineInstr &I : + instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) { if (!Limit) return false; --Limit; - bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) { + bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) { return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() && TRI->regsOverlap(MOP.getReg(), DefReg); }); - if (!Fn(*I, isDef)) + if (!Fn(I, isDef)) return false; if (isDef) break; @@ -783,14 +782,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, const LdStPairFlags &Flags) { - MachineBasicBlock::iterator NextI = I; - ++NextI; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) - ++NextI; + NextI = next_nodbg(NextI, E); int SExtIdx = Flags.getSExtIdx(); unsigned Opc = @@ -1009,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, MachineBasicBlock::iterator StoreI) { - MachineBasicBlock::iterator NextI = LoadI; - ++NextI; + MachineBasicBlock::iterator NextI = + next_nodbg(LoadI, LoadI->getParent()->end()); int LoadSize = TII->getMemScale(*LoadI); int StoreSize = TII->getMemScale(*StoreI); @@ -1188,7 +1187,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( unsigned Count = 0; do { - --MBBI; + MBBI = prev_nodbg(MBBI, B); MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there @@ -1440,7 +1439,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MBBI = I; MachineBasicBlock::iterator MBBIWithRenameReg; MachineInstr &FirstMI = *I; - ++MBBI; + MBBI = next_nodbg(MBBI, E); bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); @@ -1468,7 +1467,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Remember any instructions that read/write memory between FirstMI and MI. SmallVector MemInsns; - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + for (unsigned Count = 0; MBBI != E && Count < Limit; + MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; UsedInBetween.accumulate(MI); @@ -1637,12 +1637,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); // Return the instruction following the merged instruction, which is // the instruction following our unmerged load. Unless that's the add/sub // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; + if (NextI == Update) + NextI = next_nodbg(NextI, E); int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && @@ -1780,7 +1781,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); UsedRegUnits.clear(); - ++MBBI; + MBBI = next_nodbg(MBBI, E); // We can't post-increment the stack pointer if any instruction between // the memory access (I) and the increment (MBBI) can access the memory @@ -1796,7 +1797,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( return E; } - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + for (unsigned Count = 0; MBBI != E && Count < Limit; + MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there @@ -1854,7 +1856,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( UsedRegUnits.clear(); unsigned Count = 0; do { - --MBBI; + MBBI = prev_nodbg(MBBI, B); MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there diff --git a/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir b/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir new file mode 100644 index 00000000000..ce2174a5857 --- /dev/null +++ b/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir @@ -0,0 +1,377 @@ +# Strip out debug info, then run ldst-opt with limit=1. +# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s +# +# Run ldst-opt with limit=1, then strip out debug info. +# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s +--- + +### STG and its offset limits + +# CHECK-LABEL: name: test_STG_post +# CHECK: STGPostIndex $x0, $x0, 7 +name: test_STG_post +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STG_post_same_reg +# CHECK: STGPostIndex $x1, $x0, 7 +name: test_STG_post_same_reg +body: | + bb.0.entry: + liveins: $x0, $x1 + + STGOffset $x1, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STG_post_unaligned +# CHECK: STGOffset $x0, $x0, 0 +# CHECK-NEXT: ADDXri $x0, 8, 0 +name: test_STG_post_unaligned +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 8, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STG_post2 +# CHECK: STGPostIndex $x0, $x0, -256 +name: test_STG_post2 +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = SUBXri $x0, 4096, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STG_post3 +# CHECK: STGOffset $x0, $x0, 0 +# CHECK-NEXT: SUBXri $x0, 4112, 0 +name: test_STG_post3 +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = SUBXri $x0, 4112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STG_post4 +# CHECK: STGPostIndex $x0, $x0, 255 +name: test_STG_post4 +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 4080, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STG_post5 +# CHECK: STGOffset $x0, $x0, 0 +# CHECK-NEXT: ADDXri $x0, 4096, 0 +name: test_STG_post5 +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 4096, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +### The rest of ST*G variants. + +# CHECK-LABEL: name: test_STZG_post +# CHECK: STZGPostIndex $x0, $x0, 7 +name: test_STZG_post +body: | + bb.0.entry: + liveins: $x0 + + STZGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_ST2G_post +# CHECK: ST2GPostIndex $x0, $x0, 7 +name: test_ST2G_post +body: | + bb.0.entry: + liveins: $x0 + + ST2GOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STZ2G_post +# CHECK: STZ2GPostIndex $x0, $x0, 7 +name: test_STZ2G_post +body: | + bb.0.entry: + liveins: $x0 + + STZ2GOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +### STGP and its offset limits + +# CHECK-LABEL: name: test_STGP_post +# CHECK: STGPpost $x1, $x2, $x0, 7 +name: test_STGP_post +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_post2 +# CHECK: STGPpost $x1, $x2, $x0, -64 +name: test_STGP_post2 +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = SUBXri $x0, 1024, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_post3 +# CHECK: STGPi $x1, $x2, $x0, 0 +# CHECK-NEXT: SUBXri $x0, 1040, 0 +name: test_STGP_post3 +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = SUBXri $x0, 1040, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_post4 +# CHECK: STGPpost $x1, $x2, $x0, 63 +name: test_STGP_post4 +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 1008, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_post5 +# CHECK: STGPi $x1, $x2, $x0, 0 +# CHECK-NEXT: ADDXri $x0, 1024, 0 +name: test_STGP_post5 +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 1024, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +### Pre-indexed forms + +# CHECK-LABEL: name: test_STG_pre +# CHECK: STGPreIndex $x0, $x0, 10 +name: test_STG_pre +body: | + bb.0.entry: + liveins: $x0 + + STGOffset $x0, $x0, 10 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 160, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_pre +# CHECK: STGPpre $x1, $x2, $x0, 10 +name: test_STGP_pre +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + STGPi $x1, $x2, $x0, 10 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 160, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +### Pre-indexed forms with add/sub coming before the store. + +# CHECK-LABEL: name: test_STG_pre_back +# CHECK: STGPreIndex $x0, $x0, 2 +name: test_STG_pre_back +body: | + bb.0.entry: + liveins: $x0 + + $x0 = ADDXri $x0, 32, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + STGOffset $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_pre_back +# CHECK: STGPpre $x1, $x2, $x0, -3 +name: test_STGP_pre_back +body: | + bb.0.entry: + liveins: $x0, $x1, $x2 + + $x0 = SUBXri $x0, 48, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + STGPi $x1, $x2, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +### STGP with source register == address register + +# CHECK-LABEL: name: test_STGP_post_same_reg +# CHECK: STGPpost $x0, $x0, $x0, 7 +name: test_STGP_post_same_reg +body: | + bb.0.entry: + liveins: $x0 + + STGPi $x0, $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# CHECK-LABEL: name: test_STGP_pre_same_reg +# CHECK: STGPpre $x0, $x0, $x0, 7 +name: test_STGP_pre_same_reg +body: | + bb.0.entry: + liveins: $x0 + + STGPi $x0, $x0, $x0, 7 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + $x0 = ADDXri $x0, 112, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... + +# This case can not be merged because the source register is always read before writeback. +# CHECK-LABEL: name: test_STGP_pre_back_same_reg +# CHECK: SUBXri $x0, 48, 0 +# CHECK-NEXT: STGPi $x0, $x0, $x0, 0 +name: test_STGP_pre_back_same_reg +body: | + bb.0.entry: + liveins: $x0 + + $x0 = SUBXri $x0, 48, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + STGPi $x0, $x0, $x0, 0 + DBG_VALUE $x0, 0 + DBG_VALUE $x0, 0 + RET_ReallyLR implicit $x0 +... diff --git a/test/CodeGen/AArch64/ldst-opt-mte.mir b/test/CodeGen/AArch64/ldst-opt-mte.mir index b44258abd88..fd09af8a85f 100644 --- a/test/CodeGen/AArch64/ldst-opt-mte.mir +++ b/test/CodeGen/AArch64/ldst-opt-mte.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s --- ### STG and its offset limits