mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[CodeGen] Add dependency printer
Add SDep printer to make debugging sessions more productive. Differential revision: https://reviews.llvm.org/D35144 llvm-svn: 307799
This commit is contained in:
parent
228039ae12
commit
296d928945
@ -235,6 +235,9 @@ class TargetRegisterInfo;
|
||||
"SDep::Output edge cannot use the zero register!");
|
||||
Contents.Reg = Reg;
|
||||
}
|
||||
|
||||
raw_ostream &print(raw_ostream &O,
|
||||
const TargetRegisterInfo *TRI = nullptr) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@ -458,7 +461,10 @@ class TargetRegisterInfo;
|
||||
|
||||
void dump(const ScheduleDAG *G) const;
|
||||
void dumpAll(const ScheduleDAG *G) const;
|
||||
void print(raw_ostream &O, const ScheduleDAG *G) const;
|
||||
raw_ostream &print(raw_ostream &O,
|
||||
const SUnit *N = nullptr,
|
||||
const SUnit *X = nullptr) const;
|
||||
raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const;
|
||||
|
||||
private:
|
||||
void ComputeDepth();
|
||||
|
@ -67,6 +67,41 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
|
||||
return &TII->get(Node->getMachineOpcode());
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
|
||||
switch (getKind()) {
|
||||
case Data: OS << "Data"; break;
|
||||
case Anti: OS << "Anti"; break;
|
||||
case Output: OS << "Out "; break;
|
||||
case Order: OS << "Ord "; break;
|
||||
}
|
||||
|
||||
switch (getKind()) {
|
||||
case Data:
|
||||
OS << " Latency=" << getLatency();
|
||||
if (TRI && isAssignedRegDep())
|
||||
OS << " Reg=" << PrintReg(getReg(), TRI);
|
||||
break;
|
||||
case Anti:
|
||||
case Output:
|
||||
OS << " Latency=" << getLatency();
|
||||
break;
|
||||
case Order:
|
||||
OS << " Latency=" << getLatency();
|
||||
switch(Contents.OrdKind) {
|
||||
case Barrier: OS << " Barrier"; break;
|
||||
case MayAliasMem:
|
||||
case MustAliasMem: OS << " Memory"; break;
|
||||
case Artificial: OS << " Artificial"; break;
|
||||
case Weak: OS << " Weak"; break;
|
||||
case Cluster: OS << " Cluster"; break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return OS;
|
||||
}
|
||||
|
||||
bool SUnit::addPred(const SDep &D, bool Required) {
|
||||
// If this node already has this dependence, don't add a redundant one.
|
||||
for (SDep &PredDep : Preds) {
|
||||
@ -302,16 +337,24 @@ void SUnit::biasCriticalPath() {
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
|
||||
if (this == &DAG->ExitSU)
|
||||
OS << "ExitSU";
|
||||
else if (this == &DAG->EntrySU)
|
||||
raw_ostream &SUnit::print(raw_ostream &OS,
|
||||
const SUnit *Entry, const SUnit *Exit) const {
|
||||
if (this == Entry)
|
||||
OS << "EntrySU";
|
||||
else if (this == Exit)
|
||||
OS << "ExitSU";
|
||||
else
|
||||
OS << "SU(" << NodeNum << ")";
|
||||
return OS;
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const {
|
||||
LLVM_DUMP_METHOD
|
||||
raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const {
|
||||
return print(OS, &G->EntrySU, &G->ExitSU);
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
void SUnit::dump(const ScheduleDAG *G) const {
|
||||
print(dbgs(), G);
|
||||
dbgs() << ": ";
|
||||
G->dumpNode(this);
|
||||
@ -333,40 +376,18 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
|
||||
|
||||
if (Preds.size() != 0) {
|
||||
dbgs() << " Predecessors:\n";
|
||||
for (const SDep &SuccDep : Preds) {
|
||||
for (const SDep &Dep : Preds) {
|
||||
dbgs() << " ";
|
||||
switch (SuccDep.getKind()) {
|
||||
case SDep::Data: dbgs() << "data "; break;
|
||||
case SDep::Anti: dbgs() << "anti "; break;
|
||||
case SDep::Output: dbgs() << "out "; break;
|
||||
case SDep::Order: dbgs() << "ord "; break;
|
||||
}
|
||||
SuccDep.getSUnit()->print(dbgs(), G);
|
||||
if (SuccDep.isArtificial())
|
||||
dbgs() << " *";
|
||||
dbgs() << ": Latency=" << SuccDep.getLatency();
|
||||
if (SuccDep.isAssignedRegDep())
|
||||
dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
|
||||
dbgs() << "\n";
|
||||
Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
|
||||
Dep.print(dbgs(), G->TRI); dbgs() << '\n';
|
||||
}
|
||||
}
|
||||
if (Succs.size() != 0) {
|
||||
dbgs() << " Successors:\n";
|
||||
for (const SDep &SuccDep : Succs) {
|
||||
for (const SDep &Dep : Succs) {
|
||||
dbgs() << " ";
|
||||
switch (SuccDep.getKind()) {
|
||||
case SDep::Data: dbgs() << "data "; break;
|
||||
case SDep::Anti: dbgs() << "anti "; break;
|
||||
case SDep::Output: dbgs() << "out "; break;
|
||||
case SDep::Order: dbgs() << "ord "; break;
|
||||
}
|
||||
SuccDep.getSUnit()->print(dbgs(), G);
|
||||
if (SuccDep.isArtificial())
|
||||
dbgs() << " *";
|
||||
dbgs() << ": Latency=" << SuccDep.getLatency();
|
||||
if (SuccDep.isAssignedRegDep())
|
||||
dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
|
||||
dbgs() << "\n";
|
||||
Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
|
||||
Dep.print(dbgs(), G->TRI); dbgs() << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -13,9 +13,9 @@
|
||||
; CHECK: SU(2): STRWui %WZR
|
||||
; CHECK: SU(3): %X21<def>, %X20<def> = LDPXi %SP
|
||||
; CHECK: Predecessors:
|
||||
; CHECK-NEXT: out SU(0)
|
||||
; CHECK-NEXT: out SU(0)
|
||||
; CHECK-NEXT: ord SU(0)
|
||||
; CHECK-NEXT: SU(0): Out
|
||||
; CHECK-NEXT: SU(0): Out
|
||||
; CHECK-NEXT: SU(0): Ord
|
||||
; CHECK-NEXT: Successors:
|
||||
define void @test1() {
|
||||
entry:
|
||||
|
@ -8,8 +8,8 @@
|
||||
; CHECK: shiftable
|
||||
; CHECK: SU(2): %vreg2<def> = SUBXri %vreg1, 20, 0
|
||||
; CHECK: Successors:
|
||||
; CHECK-NEXT: data SU(4): Latency=1 Reg=%vreg2
|
||||
; CHECK-NEXT: data SU(3): Latency=2 Reg=%vreg2
|
||||
; CHECK-NEXT: SU(4): Data Latency=1 Reg=%vreg2
|
||||
; CHECK-NEXT: SU(3): Data Latency=2 Reg=%vreg2
|
||||
; CHECK: ********** INTERVALS **********
|
||||
define i64 @shiftable(i64 %A, i64 %B) {
|
||||
%tmp0 = sub i64 %B, 20
|
||||
|
@ -7,11 +7,11 @@
|
||||
; CHECK: misched_bug:BB#0 entry
|
||||
; CHECK: SU(2): %vreg2<def> = LDRWui %vreg0, 1; mem:LD4[%ptr1_plus1] GPR32:%vreg2 GPR64common:%vreg0
|
||||
; CHECK: Successors:
|
||||
; CHECK-NEXT: data SU(5): Latency=4 Reg=%vreg2
|
||||
; CHECK-NEXT: ord SU(4): Latency=0
|
||||
; CHECK-NEXT: SU(5): Data Latency=4 Reg=%vreg2
|
||||
; CHECK-NEXT: SU(4): Ord Latency=0
|
||||
; CHECK: SU(3): STRWui %WZR, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0
|
||||
; CHECK: Successors:
|
||||
; CHECK: ord SU(4): Latency=0
|
||||
; CHECK: SU(4): Ord Latency=0
|
||||
; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1
|
||||
; CHECK: SU(5): %W0<def> = COPY %vreg2; GPR32:%vreg2
|
||||
; CHECK: ** ScheduleDAGMI::schedule picking next node
|
||||
|
@ -37,8 +37,8 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*,
|
||||
; CHECK: SU({{.*}}): [[VRB]]<def> = LDRXui <fi#-2>
|
||||
; CHECK-NOT: SU
|
||||
; CHECK: Successors:
|
||||
; CHECK: ord SU([[DEPSTOREB:.*]]): Latency=0
|
||||
; CHECK: ord SU([[DEPSTOREA:.*]]): Latency=0
|
||||
; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0
|
||||
; CHECK: SU([[DEPSTOREA:.*]]): Ord Latency=0
|
||||
|
||||
; CHECK: SU([[DEPSTOREA]]): STRXui %vreg{{.*}}, <fi#-4>
|
||||
; CHECK: SU([[DEPSTOREB]]): STRXui %vreg{{.*}}, <fi#-3>
|
||||
|
@ -6,23 +6,23 @@
|
||||
|
||||
; CHECK: ** List Scheduling
|
||||
; CHECK: SU(2){{.*}}STR{{.*}}Volatile
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK: ord SU(3): Latency=1
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: SU(3): Ord Latency=1
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: SU(3){{.*}}LDR{{.*}}Volatile
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK: ord SU(2): Latency=1
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: SU(2): Ord Latency=1
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: Successors:
|
||||
; CHECK: ** List Scheduling
|
||||
; CHECK: SU(2){{.*}}STR{{.*}}
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK: ord SU(3): Latency=1
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: SU(3): Ord Latency=1
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: SU(3){{.*}}LDR{{.*}}
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK: ord SU(2): Latency=1
|
||||
; CHECK-NOT: ord SU
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: SU(2): Ord Latency=1
|
||||
; CHECK-NOT: SU({{.*}}): Ord
|
||||
; CHECK: Successors:
|
||||
define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
|
||||
entry:
|
||||
|
@ -13,13 +13,13 @@
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 4
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; CHECK-SAME: Latency=1
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=3
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=3
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=4
|
||||
define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize {
|
||||
%1 = load i32, i32* @a, align 4
|
||||
|
@ -8,9 +8,9 @@
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 3
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; CHECK-SAME: Latency=3
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=3
|
||||
|
||||
define i32 @foo(i32* %a) nounwind optsize {
|
||||
|
@ -10,7 +10,7 @@
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 2
|
||||
; CHECK: Successors
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; CHECK-SAME: Latency=1
|
||||
|
||||
define i32 @bar(i32 %v0, i32 %v1, i32 %v2, i32* %addr) {
|
||||
|
@ -11,7 +11,7 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; > VMULS common latency = 5
|
||||
; CHECK: Latency : 5
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMULS read-advanced latency to VMLAS = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
@ -20,7 +20,7 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; > VMLAS common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLAS read-advanced latency to the next VMLAS = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
@ -28,7 +28,7 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; CHECK-FAST: VFMAS
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLAS not-optimized latency to VMOVRS = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
@ -50,7 +50,7 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; > VMULfd common latency = 5
|
||||
; CHECK: Latency : 5
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; VMULfd read-advanced latency to VMLAfd = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
@ -59,7 +59,7 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; > VMLAfd common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLAfd read-advanced latency to the next VMLAfd = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
@ -67,7 +67,7 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; CHECK-FAST: VFMAfd
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLAfd not-optimized latency to VMOVRRD = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
@ -88,7 +88,7 @@ define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; > VMULS common latency = 5
|
||||
; CHECK: Latency : 5
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMULS read-advanced latency to VMLSS = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
@ -97,7 +97,7 @@ define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; > VMLSS common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLSS read-advanced latency to the next VMLSS = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
@ -105,7 +105,7 @@ define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float
|
||||
; CHECK-FAST: VFMSS
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLSS not-optimized latency to VMOVRS = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
@ -127,7 +127,7 @@ define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; > VMULfd common latency = 5
|
||||
; CHECK: Latency : 5
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; VMULfd read-advanced latency to VMLSfd = 0
|
||||
; CHECK-SAME: Latency=0
|
||||
|
||||
@ -136,7 +136,7 @@ define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; > VMLSfd common latency = 9
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLSfd read-advanced latency to the next VMLSfd = 4
|
||||
; CHECK-SAME: Latency=4
|
||||
|
||||
@ -144,7 +144,7 @@ define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
|
||||
; CHECK-FAST: VFMSfd
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLSfd not-optimized latency to VMOVRRD = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
@ -165,7 +165,7 @@ define float @Test5(float %f1, float %f2, float %f3) {
|
||||
; CHECK-FAST: VFNMS
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLAS not-optimized latency to VMOVRS = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
@ -184,7 +184,7 @@ define float @Test6(float %f1, float %f2, float %f3) {
|
||||
; CHECK-FAST: VFNMA
|
||||
; CHECK: Latency : 9
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; > VMLAS not-optimized latency to VMOVRS = 9
|
||||
; CHECK-SAME: Latency=9
|
||||
|
||||
|
@ -13,15 +13,15 @@
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 6
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; CHECK-SAME: Latency=1
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=1
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=5
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=5
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=6
|
||||
define i32 @bar(i32* %iptr) minsize optsize {
|
||||
%1 = load double, double* @a, align 8
|
||||
|
@ -8,11 +8,11 @@
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 6
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; CHECK-SAME: Latency=5
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=5
|
||||
; CHECK-NEXT: data
|
||||
; CHECK-NEXT: Data
|
||||
; CHECK-SAME: Latency=6
|
||||
|
||||
define double @foo(double* %a) nounwind optsize {
|
||||
|
@ -9,7 +9,7 @@
|
||||
; CHECK: rdefs left
|
||||
; CHECK-NEXT: Latency : 4
|
||||
; CHECK: Successors:
|
||||
; CHECK: data
|
||||
; CHECK: Data
|
||||
; CHECK-SAME: Latency=1
|
||||
|
||||
@a = global double 0.0, align 4
|
||||
|
Loading…
Reference in New Issue
Block a user