mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[PowerPC] Implement instruction clustering for stores
On Power10, it's profitable to schedule some stores with adjacent target address together. This patch implements this feature. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D86754
This commit is contained in:
parent
60f196bf71
commit
2bb8ef68b6
@ -174,6 +174,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
|
||||
"HasAddisLoadFusion", "true",
|
||||
"Power8 Addis-Load fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true",
|
||||
"Target supports store clustering",
|
||||
[FeatureFusion]>;
|
||||
def FeatureUnalignedFloats :
|
||||
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
|
||||
"true", "CPU does not trap on unaligned FP access">;
|
||||
@ -345,10 +348,12 @@ def ProcessorFeatures {
|
||||
// Power10
|
||||
// For P10 CPU we assume that all of the existing features from Power9
|
||||
// still exist with the exception of those we know are Power9 specific.
|
||||
list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion];
|
||||
list<SubtargetFeature> P10AdditionalFeatures =
|
||||
[DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
|
||||
FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA,
|
||||
FeaturePairedVectorMemops];
|
||||
!listconcat(FusionFeatures, [
|
||||
DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
|
||||
FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA,
|
||||
FeaturePairedVectorMemops]);
|
||||
list<SubtargetFeature> P10SpecificFeatures = [];
|
||||
list<SubtargetFeature> P10InheritableFeatures =
|
||||
!listconcat(P9InheritableFeatures, P10AdditionalFeatures);
|
||||
|
@ -2222,6 +2222,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
|
||||
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
|
||||
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
const MachineOperand *BaseOp;
|
||||
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
|
||||
return false;
|
||||
BaseOps.push_back(BaseOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isLdStSafeToCluster(const MachineInstr &LdSt,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
// If this is a volatile load/store, don't mess with it.
|
||||
if (LdSt.hasOrderedMemoryRef())
|
||||
return false;
|
||||
|
||||
if (LdSt.getOperand(2).isFI())
|
||||
return true;
|
||||
|
||||
assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
|
||||
// Can't cluster if the instruction modifies the base register
|
||||
// or it is update form. e.g. ld r2,3(r2)
|
||||
if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Only cluster instruction pair that have the same opcode, and they are
|
||||
// clusterable according to PowerPC specification.
|
||||
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
|
||||
const PPCSubtarget &Subtarget) {
|
||||
switch (FirstOpc) {
|
||||
default:
|
||||
return false;
|
||||
case PPC::STD:
|
||||
case PPC::STFD:
|
||||
case PPC::STXSD:
|
||||
case PPC::DFSTOREf64:
|
||||
return FirstOpc == SecondOpc;
|
||||
// PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
|
||||
// 32bit and 64bit instruction selection. They are clusterable pair though
|
||||
// they are different opcode.
|
||||
case PPC::STW:
|
||||
case PPC::STW8:
|
||||
return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
|
||||
}
|
||||
}
|
||||
|
||||
bool PPCInstrInfo::shouldClusterMemOps(
|
||||
ArrayRef<const MachineOperand *> BaseOps1,
|
||||
ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
|
||||
unsigned NumBytes) const {
|
||||
|
||||
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
|
||||
const MachineOperand &BaseOp1 = *BaseOps1.front();
|
||||
const MachineOperand &BaseOp2 = *BaseOps2.front();
|
||||
assert(BaseOp1.isReg() ||
|
||||
BaseOp1.isFI() &&
|
||||
"Only base registers and frame indices are supported.");
|
||||
|
||||
// The NumLoads means the number of loads that has been clustered.
|
||||
// Don't cluster memory op if there are already two ops clustered at least.
|
||||
if (NumLoads > 2)
|
||||
return false;
|
||||
|
||||
// Cluster the load/store only when they have the same base
|
||||
// register or FI.
|
||||
if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
|
||||
(BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
|
||||
(BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
|
||||
return false;
|
||||
|
||||
// Check if the load/store are clusterable according to the PowerPC
|
||||
// specification.
|
||||
const MachineInstr &FirstLdSt = *BaseOp1.getParent();
|
||||
const MachineInstr &SecondLdSt = *BaseOp2.getParent();
|
||||
unsigned FirstOpc = FirstLdSt.getOpcode();
|
||||
unsigned SecondOpc = SecondLdSt.getOpcode();
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
// Cluster the load/store only when they have the same opcode, and they are
|
||||
// clusterable opcode according to PowerPC specification.
|
||||
if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
|
||||
return false;
|
||||
|
||||
// Can't cluster load/store that have ordered or volatile memory reference.
|
||||
if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
|
||||
!isLdStSafeToCluster(SecondLdSt, TRI))
|
||||
return false;
|
||||
|
||||
int64_t Offset1 = 0, Offset2 = 0;
|
||||
unsigned Width1 = 0, Width2 = 0;
|
||||
const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
|
||||
if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
|
||||
!getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
|
||||
Width1 != Width2)
|
||||
return false;
|
||||
|
||||
assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
|
||||
"getMemOperandWithOffsetWidth return incorrect base op");
|
||||
// The caller should already have ordered FirstMemOp/SecondMemOp by offset.
|
||||
assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
|
||||
return Offset1 + Width1 == Offset2;
|
||||
}
|
||||
|
||||
/// GetInstSize - Return the number of bytes of code the specified
|
||||
/// instruction may be. This returns the maximum number of bytes.
|
||||
///
|
||||
@ -4664,7 +4770,8 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth(
|
||||
return false;
|
||||
|
||||
// Handle only loads/stores with base register followed by immediate offset.
|
||||
if (LdSt.getNumExplicitOperands() != 3)
|
||||
if (!LdSt.getOperand(1).isImm() ||
|
||||
(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
|
||||
return false;
|
||||
if (!LdSt.getOperand(1).isImm() ||
|
||||
(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
|
||||
|
@ -494,6 +494,19 @@ public:
|
||||
int64_t &Offset, unsigned &Width,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
/// Get the base operand and byte offset of an instruction that reads/writes
|
||||
/// memory.
|
||||
bool getMemOperandsWithOffsetWidth(
|
||||
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
|
||||
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
|
||||
const TargetRegisterInfo *TRI) const override;
|
||||
|
||||
/// Returns true if the two given memory operations should be scheduled
|
||||
/// adjacent.
|
||||
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
|
||||
ArrayRef<const MachineOperand *> BaseOps2,
|
||||
unsigned NumLoads, unsigned NumBytes) const override;
|
||||
|
||||
/// Return true if two MIs access different memory addresses and false
|
||||
/// otherwise
|
||||
bool
|
||||
|
@ -108,6 +108,7 @@ void PPCSubtarget::initializeEnvironment() {
|
||||
HasHTM = false;
|
||||
HasFloat128 = false;
|
||||
HasFusion = false;
|
||||
HasStoreFusion = false;
|
||||
HasAddiLoadFusion = false;
|
||||
HasAddisLoadFusion = false;
|
||||
IsISA3_0 = false;
|
||||
|
@ -137,6 +137,7 @@ protected:
|
||||
bool HasHTM;
|
||||
bool HasFloat128;
|
||||
bool HasFusion;
|
||||
bool HasStoreFusion;
|
||||
bool HasAddiLoadFusion;
|
||||
bool HasAddisLoadFusion;
|
||||
bool IsISA3_0;
|
||||
@ -308,6 +309,7 @@ public:
|
||||
bool isISA3_1() const { return IsISA3_1; }
|
||||
bool useLongCalls() const { return UseLongCalls; }
|
||||
bool hasFusion() const { return HasFusion; }
|
||||
bool hasStoreFusion() const { return HasStoreFusion; }
|
||||
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
|
||||
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
|
||||
bool needsSwapsForVSXMemOps() const {
|
||||
|
@ -271,6 +271,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
|
||||
std::make_unique<GenericScheduler>(C));
|
||||
// add DAG Mutations here.
|
||||
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
|
||||
if (ST.hasStoreFusion())
|
||||
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
|
||||
if (ST.hasFusion())
|
||||
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
|
||||
|
||||
@ -285,6 +287,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
|
||||
std::make_unique<PPCPostRASchedStrategy>(C) :
|
||||
std::make_unique<PostGenericScheduler>(C), true);
|
||||
// add DAG Mutations here.
|
||||
if (ST.hasStoreFusion())
|
||||
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
|
||||
if (ST.hasFusion())
|
||||
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
|
||||
return DAG;
|
||||
|
268
test/CodeGen/PowerPC/fusion-load-store.ll
Normal file
268
test/CodeGen/PowerPC/fusion-load-store.ll
Normal file
@ -0,0 +1,268 @@
|
||||
; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The
|
||||
; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused.
|
||||
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \
|
||||
; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \
|
||||
; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s
|
||||
|
||||
define i64 @store_i64(i64* nocapture %P, i64 %v) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i64:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24
|
||||
; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16
|
||||
; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8
|
||||
; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i64:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
|
||||
; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16
|
||||
; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8
|
||||
; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24
|
||||
; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32
|
||||
%arrayidx = getelementptr inbounds i64, i64* %P, i64 3
|
||||
store i64 %v, i64* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
|
||||
store i64 %v, i64* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
|
||||
store i64 %v, i64* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
|
||||
store i64 %v, i64* %arrayidx3
|
||||
ret i64 %v
|
||||
}
|
||||
|
||||
define i32 @store_i32(i32* nocapture %P, i32 %v) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52
|
||||
; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48
|
||||
; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44
|
||||
; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
|
||||
; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48
|
||||
; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44
|
||||
; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52
|
||||
; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56
|
||||
%arrayidx = getelementptr inbounds i32, i32* %P, i32 13
|
||||
store i32 %v, i32* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12
|
||||
store i32 %v, i32* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11
|
||||
store i32 %v, i32* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14
|
||||
store i32 %v, i32* %arrayidx3
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i64_neg:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
|
||||
; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24
|
||||
; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8
|
||||
; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16
|
||||
; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i64_neg:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
|
||||
; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8
|
||||
; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16
|
||||
; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24
|
||||
; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32
|
||||
%arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
|
||||
store i64 %v, i64* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
|
||||
store i64 %v, i64* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
|
||||
store i64 %v, i64* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
|
||||
store i64 %v, i64* %arrayidx3
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32_neg:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
|
||||
; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12
|
||||
; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4
|
||||
; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8
|
||||
; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32_neg:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
|
||||
; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4
|
||||
; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8
|
||||
; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12
|
||||
; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16
|
||||
%arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
|
||||
store i32 %v, i32* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
|
||||
store i32 %v, i32* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
|
||||
store i32 %v, i32* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
|
||||
store i32 %v, i32* %arrayidx3
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_double(double* nocapture %P, double %v) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_double:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24
|
||||
; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8
|
||||
; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16
|
||||
; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_double:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]])
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
|
||||
; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8
|
||||
; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16
|
||||
; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24
|
||||
; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32
|
||||
%arrayidx = getelementptr inbounds double, double* %P, i64 3
|
||||
store double %v, double* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds double, double* %P, i64 1
|
||||
store double %v, double* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds double, double* %P, i64 2
|
||||
store double %v, double* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds double, double* %P, i64 4
|
||||
store double %v, double* %arrayidx3
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_float(float* nocapture %P, float %v) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_float:%bb.0
|
||||
; CHECK-NOT: Cluster ld/st
|
||||
; CHECK-NOT: Cluster ld/st
|
||||
; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12
|
||||
; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4
|
||||
; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8
|
||||
; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_float:%bb.0
|
||||
; CHECK-NOT: Cluster ld/st
|
||||
; CHECK-NOT: Cluster ld/st
|
||||
; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12
|
||||
; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4
|
||||
; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8
|
||||
; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16
|
||||
%arrayidx = getelementptr inbounds float, float* %P, i64 3
|
||||
store float %v, float* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds float, float* %P, i64 1
|
||||
store float %v, float* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds float, float* %P, i64 2
|
||||
store float %v, float* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds float, float* %P, i64 4
|
||||
store float %v, float* %arrayidx3
|
||||
ret void
|
||||
}
|
||||
|
||||
; Cannot fuse the store/load if there is volatile in between
|
||||
define i64 @store_volatile(i64* nocapture %P, i64 %v) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_volatile:%bb.0
|
||||
; CHECK-NOT: Cluster ld/st
|
||||
; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24
|
||||
; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16
|
||||
; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8
|
||||
; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_volatile:%bb.0
|
||||
; CHECK-NOT: Cluster ld/st
|
||||
; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24
|
||||
; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16
|
||||
; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8
|
||||
; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32
|
||||
%arrayidx = getelementptr inbounds i64, i64* %P, i64 3
|
||||
store volatile i64 %v, i64* %arrayidx
|
||||
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
|
||||
store volatile i64 %v, i64* %arrayidx1
|
||||
%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
|
||||
store volatile i64 %v, i64* %arrayidx2
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
|
||||
store volatile i64 %v, i64* %arrayidx3
|
||||
ret i64 %v
|
||||
}
|
||||
|
||||
@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4
|
||||
|
||||
define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32_stw_stw8:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]])
|
||||
; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24
|
||||
; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32_stw_stw8:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]])
|
||||
; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24
|
||||
; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20
|
||||
store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4
|
||||
store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4
|
||||
%add = add nsw i32 %n, %m
|
||||
store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_i32_stw8(i32 signext %m, i32 signext %n) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32_stw8:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24
|
||||
; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_i32_stw8:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]])
|
||||
; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24
|
||||
; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28
|
||||
store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4
|
||||
store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar(i64*)
|
||||
|
||||
define void @store_frame_index(i32 %a, i32 %b) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: store_frame_index:%bb.0
|
||||
; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]])
|
||||
; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf
|
||||
; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf
|
||||
%buf = alloca [8 x i64], align 8
|
||||
%0 = bitcast [8 x i64]* %buf to i8*
|
||||
%conv = zext i32 %a to i64
|
||||
%arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0
|
||||
store i64 %conv, i64* %arrayidx, align 8
|
||||
%conv1 = zext i32 %b to i64
|
||||
%arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1
|
||||
store i64 %conv1, i64* %arrayidx2, align 8
|
||||
call void @bar(i64* nonnull %arrayidx)
|
||||
ret void
|
||||
}
|
@ -104,6 +104,7 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3
|
||||
; CHECK-P9-NOT: .localentry
|
||||
; CHECK-ALL: # %bb.0: # %entry
|
||||
; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill
|
||||
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
||||
; CHECK-S-NEXT: add r11, r4, r3
|
||||
; CHECK-S-NEXT: sub r29, r8, r9
|
||||
; CHECK-S-NEXT: add r9, r10, r9
|
||||
@ -119,7 +120,6 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3
|
||||
; CHECK-S-NEXT: mullw r3, r3, r7
|
||||
; CHECK-S-NEXT: sub r2, r6, r7
|
||||
; CHECK-S-NEXT: mullw r3, r3, r8
|
||||
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
||||
; CHECK-S-NEXT: add r30, r8, r7
|
||||
; CHECK-S-NEXT: mullw r3, r3, r2
|
||||
; CHECK-S-NEXT: mullw r3, r3, r30
|
||||
|
Loading…
Reference in New Issue
Block a user