mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AMDGPU] Skip additional folding on the same operand.
Reviewers: rampitec, arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69355
This commit is contained in:
parent
87e90e0fd2
commit
b532a94abc
@ -312,6 +312,19 @@ static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
|
||||
return false;
|
||||
}
|
||||
|
||||
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
MachineInstr *MI, unsigned OpNo,
|
||||
MachineOperand *FoldOp, bool Commuted = false,
|
||||
int ShrinkOp = -1) {
|
||||
// Skip additional folding on the same operand.
|
||||
for (FoldCandidate &Fold : FoldList)
|
||||
if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
|
||||
return;
|
||||
LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
|
||||
<< " operand " << OpNo << "\n " << *MI << '\n');
|
||||
FoldList.push_back(FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
|
||||
}
|
||||
|
||||
static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
MachineInstr *MI, unsigned OpNo,
|
||||
MachineOperand *OpToFold,
|
||||
@ -344,7 +357,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
// Special case for s_setreg_b32
|
||||
if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
|
||||
MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
|
||||
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
|
||||
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -403,8 +416,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
unsigned MaybeCommutedOpc = MI->getOpcode();
|
||||
int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
|
||||
|
||||
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
|
||||
Op32));
|
||||
appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -412,11 +424,11 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
return false;
|
||||
}
|
||||
|
||||
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
|
||||
appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
|
||||
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -494,7 +506,7 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
|
||||
if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
|
||||
return false;
|
||||
|
||||
FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
|
||||
appendFoldCandidate(FoldList, UseMI, UseOpIdx, Op);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1398,5 +1410,5 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
foldInstOperand(MI, OpToFold);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -22,3 +22,21 @@ body: |
|
||||
%9:vgpr_32 = COPY %8
|
||||
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: no_extra_fold_on_same_opnd
|
||||
# The first XOR needs commuting to fold that immediate operand.
|
||||
# GCN: V_XOR_B32_e32 {{.*}} 0, %1
|
||||
# GCN: V_XOR_B32_e32 %2, %4.sub0
|
||||
name: no_extra_fold_on_same_opnd
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = IMPLICIT_DEF
|
||||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
|
||||
%5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
|
||||
%6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
|
||||
...
|
||||
|
@ -124,6 +124,30 @@ define amdgpu_kernel void @no_fold_tied_subregister() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; There should be exact one folding on the same operand.
|
||||
; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
|
||||
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
||||
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
define void @no_extra_fold_on_same_opnd() {
|
||||
entry:
|
||||
%s0 = load i32, i32 addrspace(5)* undef, align 4
|
||||
%s0.i64= zext i32 %s0 to i64
|
||||
br label %for.body.i.i
|
||||
|
||||
for.body.i.i:
|
||||
%s1 = load i32, i32 addrspace(1)* undef, align 8
|
||||
%s1.i64 = sext i32 %s1 to i64
|
||||
%xor = xor i64 %s1.i64, %s0.i64
|
||||
%flag = icmp ult i64 %xor, 8
|
||||
br i1 %flag, label %if.then, label %if.else
|
||||
|
||||
if.then:
|
||||
unreachable
|
||||
|
||||
if.else:
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue
Block a user