mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 19:12:56 +02:00
[AMDGPU] Preserve operand order in SIFoldOperands
SIFoldOperands can commute operands even if no folding was done. This change is to preserve IR is no folding was done. Differential Revision: https://reviews.llvm.org/D33802 llvm-svn: 304625
This commit is contained in:
parent
53336baf28
commit
4633c07b46
@ -35,9 +35,12 @@ struct FoldCandidate {
|
||||
};
|
||||
unsigned char UseOpNo;
|
||||
MachineOperand::MachineOperandType Kind;
|
||||
bool Commuted;
|
||||
|
||||
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
|
||||
UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
|
||||
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
|
||||
bool Commuted_ = false) :
|
||||
UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
|
||||
Commuted(Commuted_) {
|
||||
if (FoldOp->isImm()) {
|
||||
ImmToFold = FoldOp->getImm();
|
||||
} else if (FoldOp->isFI()) {
|
||||
@ -59,6 +62,10 @@ struct FoldCandidate {
|
||||
bool isReg() const {
|
||||
return Kind == MachineOperand::MO_Register;
|
||||
}
|
||||
|
||||
bool isCommuted() const {
|
||||
return Commuted;
|
||||
}
|
||||
};
|
||||
|
||||
class SIFoldOperands : public MachineFunctionPass {
|
||||
@ -237,10 +244,15 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
|
||||
return false;
|
||||
|
||||
if (!TII->isOperandLegal(*MI, OpNo, OpToFold))
|
||||
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
|
||||
TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
|
||||
return false;
|
||||
}
|
||||
|
||||
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
|
||||
return true;
|
||||
}
|
||||
|
||||
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
|
||||
return true;
|
||||
}
|
||||
@ -699,6 +711,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
|
||||
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
|
||||
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
|
||||
tryFoldInst(TII, Fold.UseMI);
|
||||
} else if (Fold.isCommuted()) {
|
||||
// Restoring instruction's original operand order if fold has failed.
|
||||
TII->commuteInstruction(*Fold.UseMI, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspa
|
||||
; FIXME: Why isn't this being folded as a constant?
|
||||
; GCN-LABEL: {{^}}commute_ne_litk_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
|
||||
; GCN: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, [[K]]
|
||||
define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
@ -99,11 +99,9 @@ define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrsp
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
|
||||
|
||||
; GCN-LABEL: {{^}}commute_ule_64_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
|
||||
; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
|
||||
; GCN: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, [[K]]
|
||||
define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
@ -702,7 +700,7 @@ define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double ad
|
||||
; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}}
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
|
||||
define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
|
||||
entry:
|
||||
%stack0 = alloca i32
|
||||
|
@ -421,11 +421,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspac
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:
|
||||
; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
|
||||
; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
|
||||
; GCN: flat_load_dword [[IDX:v[0-9]+]]
|
||||
; GCN: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
|
||||
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
|
||||
|
||||
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
|
||||
@ -449,11 +448,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspac
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr:
|
||||
; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
|
||||
; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
|
||||
; GCN: flat_load_dword [[IDX:v[0-9]+]]
|
||||
; GCN: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
|
||||
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
|
||||
|
||||
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
|
||||
|
@ -85,9 +85,9 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i1
|
||||
|
||||
; FIXME: Need to handle non-uniform case for function below (load without gep).
|
||||
; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64:
|
||||
; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
|
||||
; VI: flat_load_ushort [[A:v[0-9]+]]
|
||||
; VI: flat_load_ushort [[B:v[0-9]+]]
|
||||
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
|
||||
; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
|
||||
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
|
||||
|
Loading…
Reference in New Issue
Block a user