mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[HazardRec] Allow inserting multiple wait-states simultaneously
If a target can encode multiple wait-states into a noop allow emitting such instructions directly. Reviewed By: rampitec, dmgreen Differential Revision: https://reviews.llvm.org/D89753
This commit is contained in:
parent
0252848f5a
commit
31d1e01743
@ -114,6 +114,14 @@ public:
|
|||||||
// Default implementation: count it as a cycle.
|
// Default implementation: count it as a cycle.
|
||||||
AdvanceCycle();
|
AdvanceCycle();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// EmitNoops - This callback is invoked when noops were added to the
|
||||||
|
/// instruction stream.
|
||||||
|
virtual void EmitNoops(unsigned Quantity) {
|
||||||
|
// Default implementation: count it as a cycle.
|
||||||
|
for (unsigned i = 0; i < Quantity; ++i)
|
||||||
|
EmitNoop();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
@ -1343,6 +1343,11 @@ public:
|
|||||||
virtual void insertNoop(MachineBasicBlock &MBB,
|
virtual void insertNoop(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator MI) const;
|
MachineBasicBlock::iterator MI) const;
|
||||||
|
|
||||||
|
/// Insert noops into the instruction stream at the specified point.
|
||||||
|
virtual void insertNoops(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned Quantity) const;
|
||||||
|
|
||||||
/// Return the noop instruction to use for a noop.
|
/// Return the noop instruction to use for a noop.
|
||||||
virtual void getNoop(MCInst &NopInst) const;
|
virtual void getNoop(MCInst &NopInst) const;
|
||||||
|
|
||||||
|
@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
|
|||||||
for (MachineInstr &MI : MBB) {
|
for (MachineInstr &MI : MBB) {
|
||||||
// If we need to emit noops prior to this instruction, then do so.
|
// If we need to emit noops prior to this instruction, then do so.
|
||||||
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
|
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
|
||||||
for (unsigned i = 0; i != NumPreNoops; ++i) {
|
HazardRec->EmitNoops(NumPreNoops);
|
||||||
HazardRec->EmitNoop();
|
TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
|
||||||
TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
|
NumNoops += NumPreNoops;
|
||||||
++NumNoops;
|
|
||||||
}
|
|
||||||
|
|
||||||
HazardRec->EmitInstruction(&MI);
|
HazardRec->EmitInstruction(&MI);
|
||||||
if (HazardRec->atIssueLimit()) {
|
if (HazardRec->atIssueLimit()) {
|
||||||
|
@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
|||||||
llvm_unreachable("Target didn't implement insertNoop!");
|
llvm_unreachable("Target didn't implement insertNoop!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// insertNoops - Insert noops into the instruction stream at the specified
|
||||||
|
/// point.
|
||||||
|
void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned Quantity) const {
|
||||||
|
for (unsigned i = 0; i < Quantity; ++i)
|
||||||
|
insertNoop(MBB, MI);
|
||||||
|
}
|
||||||
|
|
||||||
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
|
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
|
||||||
return strncmp(Str, MAI.getCommentString().data(),
|
return strncmp(Str, MAI.getCommentString().data(),
|
||||||
MAI.getCommentString().size()) == 0;
|
MAI.getCommentString().size()) == 0;
|
||||||
|
@ -1047,9 +1047,6 @@ void GCNPassConfig::addPreEmitPass() {
|
|||||||
//
|
//
|
||||||
// Here we add a stand-alone hazard recognizer pass which can handle all
|
// Here we add a stand-alone hazard recognizer pass which can handle all
|
||||||
// cases.
|
// cases.
|
||||||
//
|
|
||||||
// FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
|
|
||||||
// be better for it to emit S_NOP <N> when possible.
|
|
||||||
addPass(&PostRAHazardRecognizerID);
|
addPass(&PostRAHazardRecognizerID);
|
||||||
addPass(&BranchRelaxationPassID);
|
addPass(&BranchRelaxationPassID);
|
||||||
}
|
}
|
||||||
|
@ -1533,25 +1533,24 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||||||
.addMemOperand(MMO);
|
.addMemOperand(MMO);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI,
|
|
||||||
int Count) const {
|
|
||||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
|
||||||
while (Count > 0) {
|
|
||||||
int Arg;
|
|
||||||
if (Count >= 8)
|
|
||||||
Arg = 7;
|
|
||||||
else
|
|
||||||
Arg = Count - 1;
|
|
||||||
Count -= 8;
|
|
||||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
|
|
||||||
.addImm(Arg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator MI) const {
|
MachineBasicBlock::iterator MI) const {
|
||||||
insertWaitStates(MBB, MI, 1);
|
insertNoops(MBB, MI, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SIInstrInfo::insertNoops(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator MI,
|
||||||
|
unsigned Quantity) const {
|
||||||
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||||
|
while (Quantity > 0) {
|
||||||
|
unsigned Arg;
|
||||||
|
if (Quantity >= 8)
|
||||||
|
Arg = 7;
|
||||||
|
else
|
||||||
|
Arg = Quantity - 1;
|
||||||
|
Quantity -= Arg + 1;
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
|
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
|
||||||
|
@ -898,12 +898,12 @@ public:
|
|||||||
/// VALU if necessary. If present, \p MDT is updated.
|
/// VALU if necessary. If present, \p MDT is updated.
|
||||||
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
|
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
|
||||||
|
|
||||||
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
|
|
||||||
int Count) const;
|
|
||||||
|
|
||||||
void insertNoop(MachineBasicBlock &MBB,
|
void insertNoop(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator MI) const override;
|
MachineBasicBlock::iterator MI) const override;
|
||||||
|
|
||||||
|
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||||
|
unsigned Quantity) const override;
|
||||||
|
|
||||||
void insertReturn(MachineBasicBlock &MBB) const;
|
void insertReturn(MachineBasicBlock &MBB) const;
|
||||||
/// Return the number of wait states that result from executing this
|
/// Return the number of wait states that result from executing this
|
||||||
/// instruction.
|
/// instruction.
|
||||||
|
@ -2,9 +2,7 @@
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FCMPSWAP
|
# GCN: FLAT_ATOMIC_FCMPSWAP
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fcmpswap_to_s_denorm_mode
|
name: flat_atomic_fcmpswap_to_s_denorm_mode
|
||||||
@ -16,9 +14,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
|
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
|
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||||
@ -30,9 +26,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMAX
|
# GCN: FLAT_ATOMIC_FMAX
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmax_to_s_denorm_mode
|
name: flat_atomic_fmax_to_s_denorm_mode
|
||||||
@ -44,9 +38,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMAX_X2
|
# GCN: FLAT_ATOMIC_FMAX_X2
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmax_x2_to_s_denorm_mode
|
name: flat_atomic_fmax_x2_to_s_denorm_mode
|
||||||
@ -58,9 +50,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMIN
|
# GCN: FLAT_ATOMIC_FMIN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmin_to_s_denorm_mode
|
name: flat_atomic_fmin_to_s_denorm_mode
|
||||||
@ -72,9 +62,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMIN_X2
|
# GCN: FLAT_ATOMIC_FMIN_X2
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmin_x2_to_s_denorm_mode
|
name: flat_atomic_fmin_x2_to_s_denorm_mode
|
||||||
@ -86,9 +74,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
|
# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||||
@ -100,9 +86,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMAX_RTN
|
# GCN: FLAT_ATOMIC_FMAX_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmax_rtn_to_s_denorm_mode
|
name: flat_atomic_fmax_rtn_to_s_denorm_mode
|
||||||
@ -114,9 +98,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMAX_X2_RTN
|
# GCN: FLAT_ATOMIC_FMAX_X2_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
|
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||||
@ -128,9 +110,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMIN_RTN
|
# GCN: FLAT_ATOMIC_FMIN_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmin_rtn_to_s_denorm_mode
|
name: flat_atomic_fmin_rtn_to_s_denorm_mode
|
||||||
@ -142,9 +122,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FMIN_X2_RTN
|
# GCN: FLAT_ATOMIC_FMIN_X2_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
|
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||||
@ -156,9 +134,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||||
# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
|
# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
|
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||||
@ -170,9 +146,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP
|
# GCN: GLOBAL_ATOMIC_FCMPSWAP
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fcmpswap_to_s_denorm_mode
|
name: global_atomic_fcmpswap_to_s_denorm_mode
|
||||||
@ -184,9 +158,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
|
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
|
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||||
@ -198,9 +170,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMAX
|
# GCN: GLOBAL_ATOMIC_FMAX
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmax_to_s_denorm_mode
|
name: global_atomic_fmax_to_s_denorm_mode
|
||||||
@ -212,9 +182,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMAX_X2
|
# GCN: GLOBAL_ATOMIC_FMAX_X2
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmax_x2_to_s_denorm_mode
|
name: global_atomic_fmax_x2_to_s_denorm_mode
|
||||||
@ -226,9 +194,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMIN
|
# GCN: GLOBAL_ATOMIC_FMIN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmin_to_s_denorm_mode
|
name: global_atomic_fmin_to_s_denorm_mode
|
||||||
@ -240,9 +206,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMIN_X2
|
# GCN: GLOBAL_ATOMIC_FMIN_X2
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmin_x2_to_s_denorm_mode
|
name: global_atomic_fmin_x2_to_s_denorm_mode
|
||||||
@ -254,9 +218,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
|
# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
|
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||||
@ -268,9 +230,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
|
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||||
@ -282,9 +242,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMAX_RTN
|
# GCN: GLOBAL_ATOMIC_FMAX_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmax_rtn_to_s_denorm_mode
|
name: global_atomic_fmax_rtn_to_s_denorm_mode
|
||||||
@ -296,9 +254,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
|
# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
|
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||||
@ -310,9 +266,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMIN_RTN
|
# GCN: GLOBAL_ATOMIC_FMIN_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmin_rtn_to_s_denorm_mode
|
name: global_atomic_fmin_rtn_to_s_denorm_mode
|
||||||
@ -324,9 +278,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
|
# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
|
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||||
@ -338,9 +290,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
|
# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
||||||
@ -352,9 +302,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
|
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
||||||
@ -366,9 +314,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
|
# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
||||||
@ -380,9 +326,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
|
# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
||||||
@ -394,9 +338,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
|
# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
||||||
@ -408,9 +350,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
||||||
# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
|
# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_DENORM_MODE
|
# GCN-NEXT: S_DENORM_MODE
|
||||||
---
|
---
|
||||||
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
||||||
|
@ -612,8 +612,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
|
|||||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
|
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
|
||||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||||
; SI-NEXT: s_nop 0
|
; SI-NEXT: s_nop 1
|
||||||
; SI-NEXT: s_nop 0
|
|
||||||
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
||||||
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
||||||
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
|
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
|
||||||
@ -740,8 +739,7 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
|
|||||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
|
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
|
||||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||||
; SI-NEXT: s_nop 0
|
; SI-NEXT: s_nop 1
|
||||||
; SI-NEXT: s_nop 0
|
|
||||||
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
||||||
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
||||||
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
|
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
|
||||||
@ -1842,8 +1840,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
|
|||||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13
|
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13
|
||||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||||
; SI-NEXT: s_nop 0
|
; SI-NEXT: s_nop 1
|
||||||
; SI-NEXT: s_nop 0
|
|
||||||
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
|
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
|
||||||
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
|
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
|
||||||
; SI-NEXT: v_bfe_u32 v10, v9, 20, 11
|
; SI-NEXT: v_bfe_u32 v10, v9, 20, 11
|
||||||
@ -1876,8 +1873,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
|
|||||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11
|
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11
|
||||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||||
; SI-NEXT: s_nop 0
|
; SI-NEXT: s_nop 1
|
||||||
; SI-NEXT: s_nop 0
|
|
||||||
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
|
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
|
||||||
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
|
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
|
||||||
; SI-NEXT: v_bfe_u32 v8, v7, 20, 11
|
; SI-NEXT: v_bfe_u32 v8, v7, 20, 11
|
||||||
|
@ -40,10 +40,7 @@ body: |
|
|||||||
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
|
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
|
||||||
# GCN: S_LOAD_DWORDX2_IMM
|
# GCN: S_LOAD_DWORDX2_IMM
|
||||||
# GCN-NEXT: }
|
# GCN-NEXT: }
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN: BUFFER_LOAD_DWORD_OFFEN
|
# GCN: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_hazard_ignore_bundle_instr
|
name: vmem_vcc_hazard_ignore_bundle_instr
|
||||||
@ -63,11 +60,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
|
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
|
||||||
# GCN: bb.2:
|
# GCN: bb.2:
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 4
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_min_of_two_after_bundle
|
name: vmem_vcc_min_of_two_after_bundle
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,SICI
|
||||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,SICI
|
||||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
|
||||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
|
||||||
|
|
||||||
@ -24,26 +24,17 @@
|
|||||||
|
|
||||||
# GCN-LABEL: bb.1:
|
# GCN-LABEL: bb.1:
|
||||||
# GCN: V_CMP_EQ_I32
|
# GCN: V_CMP_EQ_I32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_DIV_FMAS_F32
|
# GCN: V_DIV_FMAS_F32
|
||||||
|
|
||||||
# GCN-LABEL: bb.2:
|
# GCN-LABEL: bb.2:
|
||||||
# GCN: V_CMP_EQ_I32
|
# GCN: V_CMP_EQ_I32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_DIV_FMAS_F32
|
# GCN: V_DIV_FMAS_F32
|
||||||
|
|
||||||
# GCN-LABEL: bb.3:
|
# GCN-LABEL: bb.3:
|
||||||
# GCN: V_DIV_SCALE_F32
|
# GCN: V_DIV_SCALE_F32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_DIV_FMAS_F32
|
# GCN: V_DIV_FMAS_F32
|
||||||
name: div_fmas
|
name: div_fmas
|
||||||
|
|
||||||
@ -76,14 +67,12 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: bb.0:
|
# GCN-LABEL: bb.0:
|
||||||
# GCN: S_SETREG
|
# GCN: S_SETREG
|
||||||
# GCN: S_NOP 0
|
# GCN: S_NOP 1
|
||||||
# GCN: S_NOP 0
|
|
||||||
# GCN: S_GETREG
|
# GCN: S_GETREG
|
||||||
|
|
||||||
# GCN-LABEL: bb.1:
|
# GCN-LABEL: bb.1:
|
||||||
# GCN: S_SETREG_IMM32
|
# GCN: S_SETREG_IMM32
|
||||||
# GCN: S_NOP 0
|
# GCN: S_NOP 1
|
||||||
# GCN: S_NOP 0
|
|
||||||
# GCN: S_GETREG
|
# GCN: S_GETREG
|
||||||
|
|
||||||
# GCN-LABEL: bb.2:
|
# GCN-LABEL: bb.2:
|
||||||
@ -126,15 +115,15 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: bb.0:
|
# GCN-LABEL: bb.0:
|
||||||
# GCN: S_SETREG
|
# GCN: S_SETREG
|
||||||
# GCN: S_NOP 0
|
# SICI: S_NOP 0
|
||||||
# VI: S_NOP 0
|
# VI: S_NOP 1
|
||||||
# GCN-NEXT: S_SETREG
|
# GCN: S_SETREG
|
||||||
|
|
||||||
# GCN-LABEL: bb.1:
|
# GCN-LABEL: bb.1:
|
||||||
# GCN: S_SETREG
|
# GCN: S_SETREG
|
||||||
# GCN: S_NOP 0
|
# SICI: S_NOP 0
|
||||||
# VI: S_NOP 0
|
# VI: S_NOP 1
|
||||||
# GCN-NEXT: S_SETREG
|
# GCN: S_SETREG
|
||||||
|
|
||||||
# GCN-LABEL: bb.2:
|
# GCN-LABEL: bb.2:
|
||||||
# GCN: S_SETREG
|
# GCN: S_SETREG
|
||||||
@ -239,34 +228,22 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: bb.0:
|
# GCN-LABEL: bb.0:
|
||||||
# GCN: V_ADD_CO_U32
|
# GCN: V_ADD_CO_U32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_READLANE_B32
|
# GCN: V_READLANE_B32
|
||||||
|
|
||||||
# GCN-LABEL: bb.1:
|
# GCN-LABEL: bb.1:
|
||||||
# GCN: V_ADD_CO_U32
|
# GCN: V_ADD_CO_U32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_WRITELANE_B32
|
# GCN: V_WRITELANE_B32
|
||||||
|
|
||||||
# GCN-LABEL: bb.2:
|
# GCN-LABEL: bb.2:
|
||||||
# GCN: V_ADD_CO_U32
|
# GCN: V_ADD_CO_U32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_READLANE_B32
|
# GCN: V_READLANE_B32
|
||||||
|
|
||||||
# GCN-LABEL: bb.3:
|
# GCN-LABEL: bb.3:
|
||||||
# GCN: V_ADD_CO_U32
|
# GCN: V_ADD_CO_U32
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: S_NOP
|
|
||||||
# GCN: V_WRITELANE_B32
|
# GCN: V_WRITELANE_B32
|
||||||
|
|
||||||
name: readwrite_lane
|
name: readwrite_lane
|
||||||
@ -429,17 +406,12 @@ body: |
|
|||||||
|
|
||||||
# VI-LABEL: bb.0:
|
# VI-LABEL: bb.0:
|
||||||
# VI: V_MOV_B32_e32
|
# VI: V_MOV_B32_e32
|
||||||
# VI-NEXT: S_NOP 0
|
# VI-NEXT: S_NOP 1
|
||||||
# VI-NEXT: S_NOP 0
|
|
||||||
# VI-NEXT: V_MOV_B32_dpp
|
# VI-NEXT: V_MOV_B32_dpp
|
||||||
|
|
||||||
# VI-LABEL: bb.1:
|
# VI-LABEL: bb.1:
|
||||||
# VI: V_CMPX_EQ_I32_e32
|
# VI: V_CMPX_EQ_I32_e32
|
||||||
# VI-NEXT: S_NOP 0
|
# VI-NEXT: S_NOP 4
|
||||||
# VI-NEXT: S_NOP 0
|
|
||||||
# VI-NEXT: S_NOP 0
|
|
||||||
# VI-NEXT: S_NOP 0
|
|
||||||
# VI-NEXT: S_NOP 0
|
|
||||||
# VI-NEXT: V_MOV_B32_dpp
|
# VI-NEXT: V_MOV_B32_dpp
|
||||||
|
|
||||||
name: dpp
|
name: dpp
|
||||||
|
@ -7,9 +7,7 @@
|
|||||||
; VI-LABEL: {{^}}dpp_test:
|
; VI-LABEL: {{^}}dpp_test:
|
||||||
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
|
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
|
||||||
; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
|
; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
|
||||||
; PREGFX10-OPT: s_nop 1
|
; PREGFX10: s_nop 1
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
|
; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
|
||||||
; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
|
; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
|
||||||
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
|
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
|
||||||
@ -21,14 +19,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
|
|||||||
; VI-LABEL: {{^}}dpp_wait_states:
|
; VI-LABEL: {{^}}dpp_wait_states:
|
||||||
; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
|
; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
|
||||||
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
|
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
|
||||||
; PREGFX10-OPT: s_nop 1
|
; PREGFX10: s_nop 1
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||||
; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:
|
; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:
|
||||||
; PREGFX10-OPT: s_nop 1
|
; PREGFX10: s_nop 1
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||||
; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||||
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
|
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
|
||||||
@ -44,13 +38,10 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
|
|||||||
; PREGFX10-OPT: s_mov_b32
|
; PREGFX10-OPT: s_mov_b32
|
||||||
; PREGFX10-NOOPT: s_waitcnt
|
; PREGFX10-NOOPT: s_waitcnt
|
||||||
; PREGFX10-NOOPT: v_mov_b32_e32
|
; PREGFX10-NOOPT: v_mov_b32_e32
|
||||||
; PREGFX10-NOOPT-NEXT: s_nop 0
|
|
||||||
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||||
; PREGFX10-OPT: s_nop 1
|
; PREGFX10: s_nop 1
|
||||||
; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||||
; PREGFX10-OPT: s_nop 1
|
; PREGFX10: s_nop 1
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; PREGFX10-NOOPT: s_nop 0
|
|
||||||
; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||||
define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) {
|
define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) {
|
||||||
%cmp = fcmp oeq float %cond, 0.0
|
%cmp = fcmp oeq float %cond, 0.0
|
||||||
|
@ -5,9 +5,7 @@
|
|||||||
; GCN-LABEL: {{^}}dpp_test:
|
; GCN-LABEL: {{^}}dpp_test:
|
||||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||||
; GFX8-OPT: s_nop 1
|
; GFX8: s_nop 1
|
||||||
; GFX8-NOOPT: s_nop 0
|
|
||||||
; GFX8-NOOPT-NEXT: s_nop 0
|
|
||||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
|
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
|
||||||
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
||||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
|
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
|
||||||
@ -18,9 +16,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2)
|
|||||||
; GCN-LABEL: {{^}}dpp_test_bc:
|
; GCN-LABEL: {{^}}dpp_test_bc:
|
||||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||||
; GFX8-OPT: s_nop 1
|
; GFX8: s_nop 1
|
||||||
; GFX8-NOOPT: s_nop 0
|
|
||||||
; GFX8-NOOPT-NEXT: s_nop 0
|
|
||||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
|
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
|
||||||
define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
||||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
|
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
|
||||||
@ -34,8 +30,9 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in
|
|||||||
; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||||
; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
|
; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||||
; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||||
; GFX8: s_nop 0
|
; GFX8-NOOPT: s_nop 1
|
||||||
; GFX8-NEXT: s_nop 0
|
; GFX8-OPT: s_nop 0
|
||||||
|
; GFX8-OPT-NEXT: s_nop 0
|
||||||
; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
|
; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
|
||||||
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
|
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
|
||||||
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {
|
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {
|
||||||
|
@ -3,8 +3,7 @@
|
|||||||
# GCN-LABEL: name: valu_write_vgpr_mfma_read
|
# GCN-LABEL: name: valu_write_vgpr_mfma_read
|
||||||
# GCN: V_MOV_B32
|
# GCN: V_MOV_B32
|
||||||
# GCN: V_MOV_B32
|
# GCN: V_MOV_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: valu_write_vgpr_mfma_read
|
name: valu_write_vgpr_mfma_read
|
||||||
body: |
|
body: |
|
||||||
@ -17,8 +16,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
|
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
|
||||||
# GCN: V_MOV_B32
|
# GCN: V_MOV_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: valu_write_vgpr_accvgpr_write_read
|
name: valu_write_vgpr_accvgpr_write_read
|
||||||
body: |
|
body: |
|
||||||
@ -41,8 +39,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
|
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
|
||||||
# GCN: V_MFMA
|
# GCN: V_MFMA
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: mfma_write_agpr_mfma_read_overlap
|
name: mfma_write_agpr_mfma_read_overlap
|
||||||
body: |
|
body: |
|
||||||
@ -54,8 +51,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
|
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
|
||||||
# GCN: V_MFMA
|
# GCN: V_MFMA
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: mfma_write_agpr_mfma_read_partial
|
name: mfma_write_agpr_mfma_read_partial
|
||||||
body: |
|
body: |
|
||||||
@ -67,10 +63,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
|
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
|
||||||
# GCN: V_MFMA
|
# GCN: V_MFMA
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: mfma_write_agpr_mfma_srca_read_overlap
|
name: mfma_write_agpr_mfma_srca_read_overlap
|
||||||
body: |
|
body: |
|
||||||
@ -82,10 +75,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
|
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
|
||||||
# GCN: V_MFMA
|
# GCN: V_MFMA
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: mfma_write_agpr_mfma_srcb_read_overlap
|
name: mfma_write_agpr_mfma_srcb_read_overlap
|
||||||
body: |
|
body: |
|
||||||
@ -97,10 +87,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
|
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
|
||||||
# GCN: V_MFMA_F32_4X4X1F32
|
# GCN: V_MFMA_F32_4X4X1F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||||
name: mfma_4x4_write_agpr_accvgpr_read
|
name: mfma_4x4_write_agpr_accvgpr_read
|
||||||
body: |
|
body: |
|
||||||
@ -112,16 +99,8 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
|
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
|
||||||
# GCN: V_MFMA_F32_16X16X1F32
|
# GCN: V_MFMA_F32_16X16X1F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 7
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||||
name: mfma_16x16_write_agpr_accvgpr_read
|
name: mfma_16x16_write_agpr_accvgpr_read
|
||||||
body: |
|
body: |
|
||||||
@ -133,24 +112,9 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
|
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
|
||||||
# GCN: V_MFMA_F32_32X32X2F32
|
# GCN: V_MFMA_F32_32X32X2F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 7
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 7
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||||
name: mfma_32x32_write_agpr_accvgpr_read
|
name: mfma_32x32_write_agpr_accvgpr_read
|
||||||
body: |
|
body: |
|
||||||
@ -174,13 +138,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
|
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
|
||||||
# GCN: V_MFMA_F32_16X16X1F32
|
# GCN: V_MFMA_F32_16X16X1F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 6
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: mfma_16x16_write_agpr_accvgpr_write
|
name: mfma_16x16_write_agpr_accvgpr_write
|
||||||
body: |
|
body: |
|
||||||
@ -192,21 +150,8 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
|
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
|
||||||
# GCN: V_MFMA_F32_32X32X2F32
|
# GCN: V_MFMA_F32_32X32X2F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 7
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 6
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: mfma_32x32_write_agpr_accvgpr_write
|
name: mfma_32x32_write_agpr_accvgpr_write
|
||||||
body: |
|
body: |
|
||||||
@ -229,11 +174,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
|
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
|
||||||
# GCN: V_MFMA_F32_16X16X1F32
|
# GCN: V_MFMA_F32_16X16X1F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 4
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: mfma_16x16_read_srcc_accvgpr_write
|
name: mfma_16x16_read_srcc_accvgpr_write
|
||||||
body: |
|
body: |
|
||||||
@ -245,19 +186,8 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
|
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
|
||||||
# GCN: V_MFMA_F32_32X32X2F32
|
# GCN: V_MFMA_F32_32X32X2F32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 7
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 4
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: mfma_32x32_read_srcc_accvgpr_write
|
name: mfma_32x32_read_srcc_accvgpr_write
|
||||||
body: |
|
body: |
|
||||||
@ -280,8 +210,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: accvgpr_read_write_vgpr_mfma_read
|
name: accvgpr_read_write_vgpr_mfma_read
|
||||||
body: |
|
body: |
|
||||||
@ -293,8 +222,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: accvgpr_read_write_vgpr_accvgpr_write_read
|
name: accvgpr_read_write_vgpr_accvgpr_write_read
|
||||||
body: |
|
body: |
|
||||||
@ -318,9 +246,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
|
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
|
||||||
# GCN: V_ACCVGPR_WRITE_B32
|
# GCN: V_ACCVGPR_WRITE_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: accvgpr_write_agpr_mfma_read_srca
|
name: accvgpr_write_agpr_mfma_read_srca
|
||||||
body: |
|
body: |
|
||||||
@ -332,9 +258,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
|
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
|
||||||
# GCN: V_ACCVGPR_WRITE_B32
|
# GCN: V_ACCVGPR_WRITE_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: accvgpr_write_agpr_mfma_read_srcb
|
name: accvgpr_write_agpr_mfma_read_srcb
|
||||||
body: |
|
body: |
|
||||||
@ -346,9 +270,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
|
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
|
||||||
# GCN: V_ACCVGPR_WRITE_B32
|
# GCN: V_ACCVGPR_WRITE_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||||
name: accvgpr_write_agpr_accvgpr_read
|
name: accvgpr_write_agpr_accvgpr_read
|
||||||
body: |
|
body: |
|
||||||
@ -360,10 +282,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: vcmpx_write_exec_mfma
|
# GCN-LABEL: name: vcmpx_write_exec_mfma
|
||||||
# GCN: V_CMPX_EQ_I32_e32
|
# GCN: V_CMPX_EQ_I32_e32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_MFMA
|
# GCN-NEXT: V_MFMA
|
||||||
name: vcmpx_write_exec_mfma
|
name: vcmpx_write_exec_mfma
|
||||||
body: |
|
body: |
|
||||||
@ -375,10 +294,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
|
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
|
||||||
# GCN: V_CMPX_EQ_I32_e32
|
# GCN: V_CMPX_EQ_I32_e32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||||
name: vcmpx_write_exec_accvgpr_write
|
name: vcmpx_write_exec_accvgpr_write
|
||||||
body: |
|
body: |
|
||||||
@ -390,8 +306,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||||
name: accvgpr_read_write_vgpr_load
|
name: accvgpr_read_write_vgpr_load
|
||||||
body: |
|
body: |
|
||||||
@ -403,8 +318,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_ds_permute
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_ds_permute
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: DS_PERMUTE_B32
|
# GCN-NEXT: DS_PERMUTE_B32
|
||||||
name: accvgpr_read_write_vgpr_ds_permute
|
name: accvgpr_read_write_vgpr_ds_permute
|
||||||
body: |
|
body: |
|
||||||
@ -416,8 +330,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_flat_load
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_flat_load
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||||
name: accvgpr_read_write_vgpr_flat_load
|
name: accvgpr_read_write_vgpr_flat_load
|
||||||
body: |
|
body: |
|
||||||
@ -429,8 +342,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_buffer_store
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_buffer_store
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: BUFFER_STORE_DWORD_OFFSET
|
# GCN-NEXT: BUFFER_STORE_DWORD_OFFSET
|
||||||
name: accvgpr_read_write_vgpr_buffer_store
|
name: accvgpr_read_write_vgpr_buffer_store
|
||||||
body: |
|
body: |
|
||||||
@ -442,8 +354,7 @@ body: |
|
|||||||
|
|
||||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
|
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
|
||||||
# GCN: V_ACCVGPR_READ_B32
|
# GCN: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: DS_WRITE_B32
|
# GCN-NEXT: DS_WRITE_B32
|
||||||
name: accvgpr_read_write_vgpr_store
|
name: accvgpr_read_write_vgpr_store
|
||||||
body: |
|
body: |
|
||||||
@ -497,8 +408,7 @@ body: |
|
|||||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
||||||
# GCN: V_MOV_B32
|
# GCN: V_MOV_B32
|
||||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||||
# GCN-NEXT: S_NOP 0
|
# GCN-NEXT: S_NOP 1
|
||||||
# GCN-NEXT: S_NOP 0
|
|
||||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||||
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
||||||
body: |
|
body: |
|
||||||
|
@ -2,11 +2,7 @@
|
|||||||
|
|
||||||
# GCN-LABEL: name: vmem_vcc_fallthrough
|
# GCN-LABEL: name: vmem_vcc_fallthrough
|
||||||
# GCN: bb.1:
|
# GCN: bb.1:
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 4
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_fallthrough
|
name: vmem_vcc_fallthrough
|
||||||
@ -23,10 +19,7 @@ body: |
|
|||||||
...
|
...
|
||||||
# GCN-LABEL: name: vmem_vcc_branch_to_next
|
# GCN-LABEL: name: vmem_vcc_branch_to_next
|
||||||
# GCN: bb.1:
|
# GCN: bb.1:
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_branch_to_next
|
name: vmem_vcc_branch_to_next
|
||||||
@ -82,10 +75,7 @@ body: |
|
|||||||
...
|
...
|
||||||
# GCN-LABEL: name: vmem_vcc_branch_around
|
# GCN-LABEL: name: vmem_vcc_branch_around
|
||||||
# GCN: bb.2:
|
# GCN: bb.2:
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_branch_around
|
name: vmem_vcc_branch_around
|
||||||
@ -110,10 +100,7 @@ body: |
|
|||||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||||
...
|
...
|
||||||
# GCN-LABEL: name: vmem_vcc_branch_backedge
|
# GCN-LABEL: name: vmem_vcc_branch_backedge
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_branch_backedge
|
name: vmem_vcc_branch_backedge
|
||||||
@ -132,11 +119,7 @@ body: |
|
|||||||
...
|
...
|
||||||
# GCN-LABEL: name: vmem_vcc_min_of_two
|
# GCN-LABEL: name: vmem_vcc_min_of_two
|
||||||
# GCN: bb.2:
|
# GCN: bb.2:
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 4
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_min_of_two
|
name: vmem_vcc_min_of_two
|
||||||
@ -159,10 +142,7 @@ body: |
|
|||||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||||
...
|
...
|
||||||
# GCN-LABEL: name: vmem_vcc_self_loop
|
# GCN-LABEL: name: vmem_vcc_self_loop
|
||||||
# GCN: S_NOP
|
# GCN: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_self_loop
|
name: vmem_vcc_self_loop
|
||||||
@ -179,10 +159,7 @@ body: |
|
|||||||
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1
|
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1
|
||||||
# GCN: bb.1:
|
# GCN: bb.1:
|
||||||
# GCN: $sgpr0 = S_MOV_B32 0
|
# GCN: $sgpr0 = S_MOV_B32 0
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 3
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_min_of_two_self_loop1
|
name: vmem_vcc_min_of_two_self_loop1
|
||||||
@ -205,9 +182,7 @@ body: |
|
|||||||
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2
|
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2
|
||||||
# GCN: bb.1:
|
# GCN: bb.1:
|
||||||
# GCN: $sgpr0 = S_MOV_B32 0
|
# GCN: $sgpr0 = S_MOV_B32 0
|
||||||
# GCN-NEXT: S_NOP
|
# GCN-NEXT: S_NOP 2
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: S_NOP
|
|
||||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||||
---
|
---
|
||||||
name: vmem_vcc_min_of_two_self_loop2
|
name: vmem_vcc_min_of_two_self_loop2
|
||||||
|
Loading…
Reference in New Issue
Block a user