1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[AArch64] Enable clustering memory accesses to fixed stack objects

Summary:
r347747 added support for clustering mem ops with FI base operands
including support for fixed stack objects in shouldClusterFI, but
apparently this was never tested.

This patch fixes shouldClusterFI to work with scaled as well as
unscaled load/store instructions, and fixes the ordering of memory ops
in MemOpInfo::operator< to ensure that memory addresses always
increase, regardless of which direction the stack grows.

Subscribers: MatzeB, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71334
This commit is contained in:
Jay Foad 2019-12-11 10:29:23 +00:00
parent 4659cb8849
commit 84a719b414
8 changed files with 143 additions and 145 deletions

View File

@ -1498,7 +1498,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
: BaseOp->getIndex() < RHS.BaseOp->getIndex(); : BaseOp->getIndex() < RHS.BaseOp->getIndex();
if (Offset != RHS.Offset) if (Offset != RHS.Offset)
return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset; return Offset < RHS.Offset;
return SU->NodeNum < RHS.SU->NodeNum; return SU->NodeNum < RHS.SU->NodeNum;
} }

View File

@ -2230,54 +2230,82 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
return true; return true;
} }
static unsigned getOffsetStride(unsigned Opc) { // Scaling factor for unscaled load or store.
int AArch64InstrInfo::getMemScale(unsigned Opc) {
switch (Opc) { switch (Opc) {
default: default:
return 0; llvm_unreachable("Opcode has unknown scale!");
case AArch64::LDURQi: case AArch64::LDRBBui:
case AArch64::STURQi: case AArch64::LDURBBi:
return 16; case AArch64::LDRSBWui:
case AArch64::LDURXi: case AArch64::LDURSBWi:
case AArch64::LDURDi: case AArch64::STRBBui:
case AArch64::STURXi: case AArch64::STURBBi:
case AArch64::STURDi: return 1;
return 8; case AArch64::LDRHHui:
case AArch64::LDURWi: case AArch64::LDURHHi:
case AArch64::LDRSHWui:
case AArch64::LDURSHWi:
case AArch64::STRHHui:
case AArch64::STURHHi:
return 2;
case AArch64::LDRSui:
case AArch64::LDURSi: case AArch64::LDURSi:
case AArch64::LDRSWui:
case AArch64::LDURSWi: case AArch64::LDURSWi:
case AArch64::STURWi: case AArch64::LDRWui:
case AArch64::LDURWi:
case AArch64::STRSui:
case AArch64::STURSi: case AArch64::STURSi:
case AArch64::STRWui:
case AArch64::STURWi:
case AArch64::LDPSi:
case AArch64::LDPSWi:
case AArch64::LDPWi:
case AArch64::STPSi:
case AArch64::STPWi:
return 4; return 4;
case AArch64::LDRDui:
case AArch64::LDURDi:
case AArch64::LDRXui:
case AArch64::LDURXi:
case AArch64::STRDui:
case AArch64::STURDi:
case AArch64::STRXui:
case AArch64::STURXi:
case AArch64::LDPDi:
case AArch64::LDPXi:
case AArch64::STPDi:
case AArch64::STPXi:
return 8;
case AArch64::LDRQui:
case AArch64::LDURQi:
case AArch64::STRQui:
case AArch64::STURQi:
case AArch64::LDPQi:
case AArch64::STPQi:
case AArch64::STGOffset:
case AArch64::STZGOffset:
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
case AArch64::STGPi:
return 16;
} }
} }
// Scale the unscaled offsets. Returns false if the unscaled offset can't be // Scale the unscaled offsets. Returns false if the unscaled offset can't be
// scaled. // scaled.
static bool scaleOffset(unsigned Opc, int64_t &Offset) { static bool scaleOffset(unsigned Opc, int64_t &Offset) {
unsigned OffsetStride = getOffsetStride(Opc); int Scale = AArch64InstrInfo::getMemScale(Opc);
if (OffsetStride == 0)
return false;
// If the byte-offset isn't a multiple of the stride, we can't scale this // If the byte-offset isn't a multiple of the stride, we can't scale this
// offset. // offset.
if (Offset % OffsetStride != 0) if (Offset % Scale != 0)
return false; return false;
// Convert the byte-offset used by unscaled into an "element" offset used // Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions. // by the scaled pair load/store instructions.
Offset /= OffsetStride; Offset /= Scale;
return true;
}
// Unscale the scaled offsets. Returns false if the scaled offset can't be
// unscaled.
static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
unsigned OffsetStride = getOffsetStride(Opc);
if (OffsetStride == 0)
return false;
// Convert the "element" offset used by scaled pair load/store instructions
// into the byte-offset used by unscaled.
Offset *= OffsetStride;
return true; return true;
} }
@ -2308,15 +2336,17 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
// Get the byte-offset from the object offset. // Convert to scaled object offsets.
if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2)) int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
if (ObjectOffset1 % Scale1 != 0)
return false; return false;
ObjectOffset1 /= Scale1;
int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
if (ObjectOffset2 % Scale2 != 0)
return false;
ObjectOffset2 /= Scale2;
ObjectOffset1 += Offset1; ObjectOffset1 += Offset1;
ObjectOffset2 += Offset2; ObjectOffset2 += Offset2;
// Get the "element" index in the object.
if (!scaleOffset(Opcode1, ObjectOffset1) ||
!scaleOffset(Opcode2, ObjectOffset2))
return false;
return ObjectOffset1 + 1 == ObjectOffset2; return ObjectOffset1 + 1 == ObjectOffset2;
} }
@ -2376,7 +2406,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
// The caller should already have ordered First/SecondLdSt by offset. // The caller should already have ordered First/SecondLdSt by offset.
// Note: except for non-equal frame index bases // Note: except for non-equal frame index bases
if (BaseOp1.isFI()) { if (BaseOp1.isFI()) {
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) && assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
"Caller should have ordered offsets."); "Caller should have ordered offsets.");
const MachineFrameInfo &MFI = const MachineFrameInfo &MFI =
@ -2385,8 +2415,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
BaseOp2.getIndex(), Offset2, SecondOpc); BaseOp2.getIndex(), Offset2, SecondOpc);
} }
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
"Caller should have ordered offsets.");
return Offset1 + 1 == Offset2; return Offset1 + 1 == Offset2;
} }

View File

@ -89,6 +89,12 @@ public:
/// if there is a corresponding unscaled variant available. /// if there is a corresponding unscaled variant available.
static Optional<unsigned> getUnscaledLdSt(unsigned Opc); static Optional<unsigned> getUnscaledLdSt(unsigned Opc);
/// Scaling factor for (scaled or unscaled) load or store.
static int getMemScale(unsigned Opc);
static int getMemScale(const MachineInstr &MI) {
return getMemScale(MI.getOpcode());
}
/// Returns the index for the immediate for a given instruction. /// Returns the index for the immediate for a given instruction.
static unsigned getLoadStoreImmIdx(unsigned Opc); static unsigned getLoadStoreImmIdx(unsigned Opc);

View File

@ -230,69 +230,6 @@ static bool isTagStore(const MachineInstr &MI) {
} }
} }
// Scaling factor for unscaled load or store.
static int getMemScale(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
llvm_unreachable("Opcode has unknown scale!");
case AArch64::LDRBBui:
case AArch64::LDURBBi:
case AArch64::LDRSBWui:
case AArch64::LDURSBWi:
case AArch64::STRBBui:
case AArch64::STURBBi:
return 1;
case AArch64::LDRHHui:
case AArch64::LDURHHi:
case AArch64::LDRSHWui:
case AArch64::LDURSHWi:
case AArch64::STRHHui:
case AArch64::STURHHi:
return 2;
case AArch64::LDRSui:
case AArch64::LDURSi:
case AArch64::LDRSWui:
case AArch64::LDURSWi:
case AArch64::LDRWui:
case AArch64::LDURWi:
case AArch64::STRSui:
case AArch64::STURSi:
case AArch64::STRWui:
case AArch64::STURWi:
case AArch64::LDPSi:
case AArch64::LDPSWi:
case AArch64::LDPWi:
case AArch64::STPSi:
case AArch64::STPWi:
return 4;
case AArch64::LDRDui:
case AArch64::LDURDi:
case AArch64::LDRXui:
case AArch64::LDURXi:
case AArch64::STRDui:
case AArch64::STURDi:
case AArch64::STRXui:
case AArch64::STURXi:
case AArch64::LDPDi:
case AArch64::LDPXi:
case AArch64::STPDi:
case AArch64::STPXi:
return 8;
case AArch64::LDRQui:
case AArch64::LDURQi:
case AArch64::STRQui:
case AArch64::STURQi:
case AArch64::LDPQi:
case AArch64::STPQi:
case AArch64::STGOffset:
case AArch64::STZGOffset:
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
case AArch64::STGPi:
return 16;
}
}
static unsigned getMatchingNonSExtOpcode(unsigned Opc, static unsigned getMatchingNonSExtOpcode(unsigned Opc,
bool *IsValidLdStrOpc = nullptr) { bool *IsValidLdStrOpc = nullptr) {
if (IsValidLdStrOpc) if (IsValidLdStrOpc)
@ -603,7 +540,7 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
// ST*G and all paired ldst have the same scale in pre/post-indexed variants // ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant. // as in the "unsigned offset" variant.
// All other pre/post indexed ldst instructions are unscaled. // All other pre/post indexed ldst instructions are unscaled.
Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1; Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
if (IsPaired) { if (IsPaired) {
MinOffset = -64; MinOffset = -64;
@ -635,8 +572,8 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
MachineInstr &StoreInst, MachineInstr &StoreInst,
const AArch64InstrInfo *TII) { const AArch64InstrInfo *TII) {
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = getMemScale(LoadInst); int LoadSize = TII->getMemScale(LoadInst);
int StoreSize = getMemScale(StoreInst); int StoreSize = TII->getMemScale(StoreInst);
int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst) int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
? getLdStOffsetOp(StoreInst).getImm() ? getLdStOffsetOp(StoreInst).getImm()
: getLdStOffsetOp(StoreInst).getImm() * StoreSize; : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
@ -746,7 +683,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
unsigned Opc = I->getOpcode(); unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc); bool IsScaled = !TII->isUnscaledLdSt(Opc);
int OffsetStride = IsScaled ? 1 : getMemScale(*I); int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
bool MergeForward = Flags.getMergeForward(); bool MergeForward = Flags.getMergeForward();
// Insert our new paired instruction after whichever of the paired // Insert our new paired instruction after whichever of the paired
@ -853,7 +790,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
unsigned Opc = unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
bool IsUnscaled = TII->isUnscaledLdSt(Opc); bool IsUnscaled = TII->isUnscaledLdSt(Opc);
int OffsetStride = IsUnscaled ? getMemScale(*I) : 1; int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
bool MergeForward = Flags.getMergeForward(); bool MergeForward = Flags.getMergeForward();
@ -938,11 +875,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// We're trying to pair instructions that differ in how they are scaled. If // We're trying to pair instructions that differ in how they are scaled. If
// I is scaled then scale the offset of Paired accordingly. Otherwise, do // I is scaled then scale the offset of Paired accordingly. Otherwise, do
// the opposite (i.e., make Paired's offset unscaled). // the opposite (i.e., make Paired's offset unscaled).
int MemSize = getMemScale(*Paired); int MemSize = TII->getMemScale(*Paired);
if (PairedIsUnscaled) { if (PairedIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't // If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together. // pair the operations together.
assert(!(PairedOffset % getMemScale(*Paired)) && assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
"Offset should be a multiple of the stride!"); "Offset should be a multiple of the stride!");
PairedOffset /= MemSize; PairedOffset /= MemSize;
} else { } else {
@ -967,9 +904,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary. // Scale the immediate offset, if necessary.
if (TII->isUnscaledLdSt(RtMI->getOpcode())) { if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
assert(!(OffsetImm % getMemScale(*RtMI)) && assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
"Unscaled offset cannot be scaled."); "Unscaled offset cannot be scaled.");
OffsetImm /= getMemScale(*RtMI); OffsetImm /= TII->getMemScale(*RtMI);
} }
// Construct the new instruction. // Construct the new instruction.
@ -1069,8 +1006,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator NextI = LoadI; MachineBasicBlock::iterator NextI = LoadI;
++NextI; ++NextI;
int LoadSize = getMemScale(*LoadI); int LoadSize = TII->getMemScale(*LoadI);
int StoreSize = getMemScale(*StoreI); int StoreSize = TII->getMemScale(*StoreI);
Register LdRt = getLdStRegOp(*LoadI).getReg(); Register LdRt = getLdStRegOp(*LoadI).getReg();
const MachineOperand &StMO = getLdStRegOp(*StoreI); const MachineOperand &StMO = getLdStRegOp(*StoreI);
Register StRt = getLdStRegOp(*StoreI).getReg(); Register StRt = getLdStRegOp(*StoreI).getReg();
@ -1489,7 +1426,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Register Reg = getLdStRegOp(FirstMI).getReg(); Register Reg = getLdStRegOp(FirstMI).getReg();
Register BaseReg = getLdStBaseOp(FirstMI).getReg(); Register BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm(); int Offset = getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
Optional<bool> MaybeCanRename = None; Optional<bool> MaybeCanRename = None;
@ -1534,7 +1471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// We're trying to pair instructions that differ in how they are scaled. // We're trying to pair instructions that differ in how they are scaled.
// If FirstMI is scaled then scale the offset of MI accordingly. // If FirstMI is scaled then scale the offset of MI accordingly.
// Otherwise, do the opposite (i.e., make MI's offset unscaled). // Otherwise, do the opposite (i.e., make MI's offset unscaled).
int MemSize = getMemScale(MI); int MemSize = TII->getMemScale(MI);
if (MIIsUnscaled) { if (MIIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't // If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together: bail and keep looking. // pair the operations together: bail and keep looking.
@ -1792,7 +1729,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator MBBI = I; MachineBasicBlock::iterator MBBI = I;
Register BaseReg = getLdStBaseOp(MemMI).getReg(); Register BaseReg = getLdStBaseOp(MemMI).getReg();
int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions // Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're // can't be formed if the memory instruction doesn't have the offset we're
@ -1963,7 +1900,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// with Offset-1) // with Offset-1)
bool IsUnscaled = TII->isUnscaledLdSt(MI); bool IsUnscaled = TII->isUnscaledLdSt(MI);
int Offset = getLdStOffsetOp(MI).getImm(); int Offset = getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset. // Allow one more for offset.
if (Offset > 0) if (Offset > 0)
Offset -= OffsetStride; Offset -= OffsetStride;
@ -2029,7 +1966,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// The immediate in the load/store is scaled by the size of the memory // The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for, // operation. The immediate in the add we're looking for,
// however, is not, so adjust here. // however, is not, so adjust here.
int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
// Look forward to try to find a pre-index instruction. For example, // Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64] // ldr x1, [x0, #64]

View File

@ -113,9 +113,9 @@ define void @bzero_20_stack() {
define void @bzero_26_stack() { define void @bzero_26_stack() {
; CHECK-LABEL: bzero_26_stack: ; CHECK-LABEL: bzero_26_stack:
; CHECK: stp xzr, xzr, [sp, #8] ; CHECK: stp xzr, xzr, [sp]
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: strh wzr, [sp, #24] ; CHECK-NEXT: strh wzr, [sp, #24]
; CHECK-NEXT: str xzr, [sp, #16]
; CHECK-NEXT: bl something ; CHECK-NEXT: bl something
%buf = alloca [26 x i8], align 1 %buf = alloca [26 x i8], align 1
%cast = bitcast [26 x i8]* %buf to i8* %cast = bitcast [26 x i8]* %buf to i8*
@ -259,9 +259,9 @@ define void @memset_12_stack() {
define void @memset_16_stack() { define void @memset_16_stack() {
; CHECK-LABEL: memset_16_stack: ; CHECK-LABEL: memset_16_stack:
; CHECK: mov x8, #-6148914691236517206 ; CHECK: mov x8, #-6148914691236517206
; CHECK-NEXT: str x8, [sp, #-32]!
; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: stp x8, x30, [sp, #8] ; CHECK-NEXT: stp x8, x30, [sp, #8]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl something ; CHECK-NEXT: bl something
%buf = alloca [16 x i8], align 1 %buf = alloca [16 x i8], align 1
%cast = bitcast [16 x i8]* %buf to i8* %cast = bitcast [16 x i8]* %buf to i8*

View File

@ -1,11 +1,10 @@
#RUN: llc -mtriple=aarch64-- -mcpu=cyclone -run-pass machine-scheduler -o - %s | FileCheck %s #RUN: llc -mtriple=aarch64-- -mcpu=cyclone -run-pass machine-scheduler -o - %s | FileCheck %s
...
--- ---
name: merge_stack name: merge_stack
# CHECK-LABEL: name: merge_stack # CHECK-LABEL: name: merge_stack
tracksRegLiveness: true tracksRegLiveness: true
stack: stack:
- { id: 0, size: 64, alignment: 8 } - { id: 0, size: 16, alignment: 8 }
body: | body: |
bb.0: bb.0:
liveins: $w0, $w1 liveins: $w0, $w1
@ -25,3 +24,30 @@ body: |
; CHECK-NEXT: STRXui ; CHECK-NEXT: STRXui
; CHECK-NEXT: STRXui ; CHECK-NEXT: STRXui
; CHECK-NEXT: RET ; CHECK-NEXT: RET
...
---
name: merge_fixedstack
# CHECK-LABEL: name: merge_fixedstack
tracksRegLiveness: true
fixedStack:
- { id: 0, size: 16, alignment: 8, offset: -16 }
body: |
bb.0:
liveins: $w0, $w1
%0:gpr32 = COPY $w0
%1:gpr32 = COPY $w1
undef %3.sub_32:gpr64 = ORRWrs $wzr, %0, 0
STRXui %3, %fixed-stack.0, 0 :: (store 8)
undef %5.sub_32:gpr64 = ORRWrs $wzr, %1, 0
STRXui %5, %fixed-stack.0, 1 :: (store 8)
RET_ReallyLR
; CHECK: COPY
; CHECK-NEXT: COPY
; CHECK-NEXT: ORRWrs
; CHECK-NEXT: ORRWrs
; CHECK-NEXT: STRXui
; CHECK-NEXT: STRXui
; CHECK-NEXT: RET
...

View File

@ -4,20 +4,20 @@
define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, <2 x i128> *%Out) { define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, <2 x i128> *%Out) {
; CHECK-LABEL: foo: ; CHECK-LABEL: foo:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: and w9, w0, #0x1 ; CHECK-NEXT: and w8, w0, #0x1
; CHECK-NEXT: fmov s0, wzr ; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: ldp x10, x8, [sp, #8] ; CHECK-NEXT: ldp x10, x9, [sp, #8]
; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: ldr x9, [sp] ; CHECK-NEXT: ldr x8, [sp]
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: fmov w11, s0
; CHECK-NEXT: tst w11, #0x1 ; CHECK-NEXT: tst w11, #0x1
; CHECK-NEXT: csel x11, x2, x6, ne ; CHECK-NEXT: csel x11, x2, x6, ne
; CHECK-NEXT: csel x12, x3, x7, ne ; CHECK-NEXT: csel x12, x3, x7, ne
; CHECK-NEXT: csel x9, x4, x9, ne ; CHECK-NEXT: csel x8, x4, x8, ne
; CHECK-NEXT: csel x10, x5, x10, ne ; CHECK-NEXT: csel x10, x5, x10, ne
; CHECK-NEXT: stp x9, x10, [x8, #16] ; CHECK-NEXT: stp x8, x10, [x9, #16]
; CHECK-NEXT: stp x11, x12, [x8] ; CHECK-NEXT: stp x11, x12, [x9]
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%cond = and i32 %In1, 1 %cond = and i32 %In1, 1
%cbool = icmp eq i32 %cond, 0 %cbool = icmp eq i32 %cond, 0
@ -31,25 +31,25 @@ define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, <2 x i128> *%Out) {
define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, <2 x i96> *%Out) { define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, <2 x i96> *%Out) {
; CHECK-LABEL: bar: ; CHECK-LABEL: bar:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: and w10, w0, #0x1 ; CHECK-NEXT: and w9, w0, #0x1
; CHECK-NEXT: fmov s0, wzr ; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: fmov s1, w10 ; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ldp x11, x8, [sp, #8] ; CHECK-NEXT: ldp x11, x8, [sp, #8]
; CHECK-NEXT: ldr x9, [sp] ; CHECK-NEXT: ldr x10, [sp]
; CHECK-NEXT: dup v1.4s, v0.s[0] ; CHECK-NEXT: dup v1.4s, v0.s[0]
; CHECK-NEXT: mov x10, v1.d[1] ; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: lsr x10, x10, #32 ; CHECK-NEXT: lsr x9, x9, #32
; CHECK-NEXT: tst w10, #0x1 ; CHECK-NEXT: tst w9, #0x1
; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: csel x11, x5, x11, ne ; CHECK-NEXT: csel x11, x5, x11, ne
; CHECK-NEXT: csel x9, x4, x9, ne ; CHECK-NEXT: csel x10, x4, x10, ne
; CHECK-NEXT: tst w10, #0x1 ; CHECK-NEXT: tst w9, #0x1
; CHECK-NEXT: csel x10, x3, x7, ne ; CHECK-NEXT: csel x9, x3, x7, ne
; CHECK-NEXT: csel x12, x2, x6, ne ; CHECK-NEXT: csel x12, x2, x6, ne
; CHECK-NEXT: stur x9, [x8, #12] ; CHECK-NEXT: stur x10, [x8, #12]
; CHECK-NEXT: str x12, [x8] ; CHECK-NEXT: str x12, [x8]
; CHECK-NEXT: str w10, [x8, #8] ; CHECK-NEXT: str w9, [x8, #8]
; CHECK-NEXT: str w11, [x8, #20] ; CHECK-NEXT: str w11, [x8, #20]
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%cond = and i32 %In1, 1 %cond = and i32 %In1, 1

View File

@ -39,8 +39,8 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*,
; Make sure that there is an dependence edge between fi#-2 and fi#-4. ; Make sure that there is an dependence edge between fi#-2 and fi#-4.
; Without this edge the scheduler would be free to move the store accross the load. ; Without this edge the scheduler would be free to move the store accross the load.
; COMMON: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2 ; COMMON: {{^SU(.*)}}: [[VRB]]:gpr64 = LDRXui %fixed-stack.2
; COMMON-NOT: SU ; COMMON-NOT: {{^SU(.*)}}:
; COMMON: Successors: ; COMMON: Successors:
; COMMON: SU([[DEPSTOREB:.*]]): Ord Latency=0 ; COMMON: SU([[DEPSTOREB:.*]]): Ord Latency=0
; COMMON: SU([[DEPSTOREA:.*]]): Ord Latency=0 ; COMMON: SU([[DEPSTOREA:.*]]): Ord Latency=0