mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
AMDGPU/GlobalISel: Fix some crashs in g_unmerge_values/g_merge_values
This was crashing in the predicate function assuming the value is a vector. Copy more of what AArch64 uses. This probably needs more refinement later, but I don't exactly understand what it means in some cases, particularly since any legalization for these seems to be missing. llvm-svn: 351693
This commit is contained in:
parent
af0f1330ee
commit
3d37d26708
@ -35,6 +35,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
|
||||
const LLT S16 = LLT::scalar(16);
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
const LLT S64 = LLT::scalar(64);
|
||||
const LLT S256 = LLT::scalar(256);
|
||||
const LLT S512 = LLT::scalar(512);
|
||||
|
||||
const LLT V2S16 = LLT::vector(2, 16);
|
||||
@ -298,25 +299,85 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
|
||||
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
|
||||
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
|
||||
|
||||
auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
|
||||
const LLT &Ty = Query.Types[TypeIdx];
|
||||
if (Ty.isVector()) {
|
||||
const LLT &EltTy = Ty.getElementType();
|
||||
if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
|
||||
return true;
|
||||
if (!isPowerOf2_32(EltTy.getSizeInBits()))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
auto scalarize =
|
||||
[=](const LegalityQuery &Query, unsigned TypeIdx) {
|
||||
const LLT &Ty = Query.Types[TypeIdx];
|
||||
return std::make_pair(TypeIdx, Ty.getElementType());
|
||||
};
|
||||
|
||||
getActionDefinitionsBuilder(Op)
|
||||
// Break up vectors with weird elements into scalars
|
||||
.fewerElementsIf(
|
||||
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
|
||||
[=](const LegalityQuery &Query) { return scalarize(Query, 0); })
|
||||
.fewerElementsIf(
|
||||
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
|
||||
[=](const LegalityQuery &Query) { return scalarize(Query, 1); })
|
||||
.clampScalar(BigTyIdx, S32, S512)
|
||||
.widenScalarIf(
|
||||
[=](const LegalityQuery &Query) {
|
||||
const LLT &Ty = Query.Types[BigTyIdx];
|
||||
return !isPowerOf2_32(Ty.getSizeInBits()) &&
|
||||
Ty.getSizeInBits() % 16 != 0;
|
||||
},
|
||||
[=](const LegalityQuery &Query) {
|
||||
// Pick the next power of 2, or a multiple of 64 over 128.
|
||||
// Whichever is smaller.
|
||||
const LLT &Ty = Query.Types[BigTyIdx];
|
||||
unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
|
||||
if (NewSizeInBits >= 256) {
|
||||
unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
|
||||
if (RoundedTo < NewSizeInBits)
|
||||
NewSizeInBits = RoundedTo;
|
||||
}
|
||||
return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
|
||||
})
|
||||
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
|
||||
// Clamp the little scalar to s8-s256 and make it a power of 2. It's not
|
||||
// worth considering the multiples of 64 since 2*192 and 2*384 are not
|
||||
// valid.
|
||||
.clampScalar(LitTyIdx, S16, S256)
|
||||
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
|
||||
.legalIf([=](const LegalityQuery &Query) {
|
||||
const LLT &BigTy = Query.Types[BigTyIdx];
|
||||
const LLT &LitTy = Query.Types[LitTyIdx];
|
||||
return BigTy.getSizeInBits() % 32 == 0 &&
|
||||
LitTy.getSizeInBits() % 32 == 0 &&
|
||||
|
||||
if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
|
||||
return false;
|
||||
if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
|
||||
return false;
|
||||
|
||||
return BigTy.getSizeInBits() % 16 == 0 &&
|
||||
LitTy.getSizeInBits() % 16 == 0 &&
|
||||
BigTy.getSizeInBits() <= 512;
|
||||
})
|
||||
// Any vectors left are the wrong size. Scalarize them.
|
||||
.fewerElementsIf([](const LegalityQuery &Query) { return true; },
|
||||
[](const LegalityQuery &Query) {
|
||||
return std::make_pair(
|
||||
0, Query.Types[0].getElementType());
|
||||
})
|
||||
.fewerElementsIf([](const LegalityQuery &Query) { return true; },
|
||||
[](const LegalityQuery &Query) {
|
||||
return std::make_pair(
|
||||
1, Query.Types[1].getElementType());
|
||||
});
|
||||
.fewerElementsIf([](const LegalityQuery &Query) {
|
||||
return Query.Types[0].isVector();
|
||||
},
|
||||
[](const LegalityQuery &Query) {
|
||||
return std::make_pair(
|
||||
0, Query.Types[0].getElementType());
|
||||
})
|
||||
.fewerElementsIf([](const LegalityQuery &Query) {
|
||||
return Query.Types[1].isVector();
|
||||
},
|
||||
[](const LegalityQuery &Query) {
|
||||
return std::make_pair(
|
||||
1, Query.Types[1].getElementType());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,14 @@
|
||||
# RUN: not llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK: LLVM ERROR: unable to legalize instruction: %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0:_(<2 x s1>) (in function: test_unmerge_v2s1)
|
||||
|
||||
---
|
||||
name: test_unmerge_v2s1
|
||||
body: |
|
||||
bb.0:
|
||||
%0:_(<2 x s1>) = G_IMPLICIT_DEF
|
||||
%1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0
|
||||
S_NOP 0, implicit %1
|
||||
S_NOP 0, implicit %2
|
||||
...
|
||||
|
@ -2,17 +2,76 @@
|
||||
# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: test_unmerge_s64_s32
|
||||
name: test_unmerge_s32_s64
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_unmerge_s64_s32
|
||||
; CHECK-LABEL: name: test_unmerge_s32_s64
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
|
||||
; CHECK: $vgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[UV1]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[UV1]](s32)
|
||||
%0:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64)
|
||||
$vgpr0 = COPY %1(s32)
|
||||
$vgpr2 = COPY %2(s32)
|
||||
$vgpr1 = COPY %2(s32)
|
||||
...
|
||||
|
||||
---
|
||||
name: test_unmerge_s32_v2s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
; CHECK-LABEL: name: test_unmerge_s32_v2s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: $vgpr0 = COPY [[UV]](s32)
|
||||
; CHECK: $vgpr21 = COPY [[UV1]](s32)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
|
||||
$vgpr0 = COPY %1
|
||||
$vgpr21= COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_unmerge_s16_v2s16
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: test_unmerge_s16_v2s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
|
||||
%3:_(s32) = G_ANYEXT %1
|
||||
%4:_(s32) = G_ANYEXT %2
|
||||
$vgpr0 = COPY %3
|
||||
$vgpr1 = COPY %4
|
||||
...
|
||||
|
||||
---
|
||||
name: test_unmerge_s16_v3s16
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_unmerge_s16_v3s16
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
|
||||
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32)
|
||||
%0:_(<3 x s16>) = G_IMPLICIT_DEF
|
||||
%1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
|
||||
%4:_(s32) = G_ANYEXT %1
|
||||
%5:_(s32) = G_ANYEXT %2
|
||||
%6:_(s32) = G_ANYEXT %3
|
||||
$vgpr0 = COPY %4
|
||||
$vgpr1 = COPY %5
|
||||
$vgpr2 = COPY %6
|
||||
...
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user