mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
GlobalISel: Use extension instead of merge with undef in common case
This fixes not respecting signext/zeroext in these cases. In the anyext case, this avoids a larger merge with undef and should be a better canonical form. This should also handle this if a merge is needed, but I'm not aware of a case where that can happen. In a future change this will also allow AMDGPU to drop some custom code without introducing regressions.
This commit is contained in:
parent
b1584af557
commit
57c0d2e25b
@ -484,24 +484,32 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
|
||||
LLT DstTy = MRI.getType(DstRegs[0]);
|
||||
LLT LCMTy = getLCMType(SrcTy, PartTy);
|
||||
|
||||
const unsigned LCMSize = LCMTy.getSizeInBits();
|
||||
const unsigned DstSize = DstTy.getSizeInBits();
|
||||
const unsigned SrcSize = SrcTy.getSizeInBits();
|
||||
unsigned CoveringSize = LCMTy.getSizeInBits();
|
||||
|
||||
Register UnmergeSrc = SrcReg;
|
||||
if (LCMSize != SrcSize) {
|
||||
// Widen to the common type.
|
||||
Register Undef = B.buildUndef(SrcTy).getReg(0);
|
||||
SmallVector<Register, 8> MergeParts(1, SrcReg);
|
||||
for (unsigned Size = SrcSize; Size != LCMSize; Size += SrcSize)
|
||||
MergeParts.push_back(Undef);
|
||||
|
||||
UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
|
||||
if (CoveringSize != SrcSize) {
|
||||
// For scalars, it's common to be able to use a simple extension.
|
||||
if (SrcTy.isScalar() && DstTy.isScalar()) {
|
||||
CoveringSize = alignTo(SrcSize, DstSize);
|
||||
LLT CoverTy = LLT::scalar(CoveringSize);
|
||||
UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0);
|
||||
} else {
|
||||
// Widen to the common type.
|
||||
// FIXME: This should respect the extend type
|
||||
Register Undef = B.buildUndef(SrcTy).getReg(0);
|
||||
SmallVector<Register, 8> MergeParts(1, SrcReg);
|
||||
for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize)
|
||||
MergeParts.push_back(Undef);
|
||||
UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Unmerge to the original registers and pad with dead defs.
|
||||
SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
|
||||
for (unsigned Size = DstSize * DstRegs.size(); Size != LCMSize;
|
||||
for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize;
|
||||
Size += DstSize) {
|
||||
UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
|
||||
}
|
||||
|
@ -1121,9 +1121,8 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
|
||||
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
|
||||
; CHECK: $vgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
|
||||
@ -1181,9 +1180,8 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
|
||||
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
|
||||
; CHECK: $vgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
|
||||
@ -1241,9 +1239,8 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
|
||||
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
|
||||
; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
|
||||
; CHECK: $vgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[UV1]](s32)
|
||||
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
|
||||
|
Loading…
Reference in New Issue
Block a user