1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[AMDGPU] Skip additional folding on the same operand.

Reviewers: rampitec, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69355
This commit is contained in:
Michael Liao 2019-10-23 15:19:06 -04:00
parent 87e90e0fd2
commit b532a94abc
3 changed files with 61 additions and 7 deletions

View File

@ -312,6 +312,19 @@ static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
return false;
}
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *FoldOp, bool Commuted = false,
int ShrinkOp = -1) {
// Skip additional folding on the same operand.
for (FoldCandidate &Fold : FoldList)
if (Fold.UseMI == MI && Fold.UseOpNo == OpNo)
return;
LLVM_DEBUG(dbgs() << "Append " << (Commuted ? "commuted" : "normal")
<< " operand " << OpNo << "\n " << *MI << '\n');
FoldList.push_back(FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
}
static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
@ -344,7 +357,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Special case for s_setreg_b32
if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
@ -403,8 +416,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
unsigned MaybeCommutedOpc = MI->getOpcode();
int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
Op32));
appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
@ -412,11 +424,11 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
return false;
}
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
return true;
}
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
@ -494,7 +506,7 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
return false;
FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
appendFoldCandidate(FoldList, UseMI, UseOpIdx, Op);
return true;
}
@ -1398,5 +1410,5 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
foldInstOperand(MI, OpToFold);
}
}
return false;
return true;
}

View File

@ -22,3 +22,21 @@ body: |
%9:vgpr_32 = COPY %8
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
...
---
# GCN-LABEL: name: no_extra_fold_on_same_opnd
# The first XOR needs commuting to fold that immediate operand.
# GCN: V_XOR_B32_e32 {{.*}} 0, %1
# GCN: V_XOR_B32_e32 %2, %4.sub0
name: no_extra_fold_on_same_opnd
tracksRegLiveness: true
body: |
bb.0:
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
%5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
%6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
...

View File

@ -124,6 +124,30 @@ define amdgpu_kernel void @no_fold_tied_subregister() {
ret void
}
; There should be exact one folding on the same operand.
; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @no_extra_fold_on_same_opnd() {
entry:
%s0 = load i32, i32 addrspace(5)* undef, align 4
%s0.i64= zext i32 %s0 to i64
br label %for.body.i.i
for.body.i.i:
%s1 = load i32, i32 addrspace(1)* undef, align 8
%s1.i64 = sext i32 %s1 to i64
%xor = xor i64 %s1.i64, %s0.i64
%flag = icmp ult i64 %xor, 8
br i1 %flag, label %if.then, label %if.else
if.then:
unreachable
if.else:
unreachable
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }