1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

AMDGPU/GlobalISel: Fix some crashs in g_unmerge_values/g_merge_values

This was crashing in the predicate function assuming the value
is a vector.

Copy more of what AArch64 uses. This probably needs more refinement
later, but I don't exactly understand what it means in some cases,
particularly since any legalization for these seems to be missing.

llvm-svn: 351693
This commit is contained in:
Matt Arsenault 2019-01-20 18:40:36 +00:00
parent af0f1330ee
commit 3d37d26708
3 changed files with 150 additions and 16 deletions

View File

@ -35,6 +35,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT S256 = LLT::scalar(256);
const LLT S512 = LLT::scalar(512);
const LLT V2S16 = LLT::vector(2, 16);
@ -298,25 +299,85 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
const LLT &Ty = Query.Types[TypeIdx];
if (Ty.isVector()) {
const LLT &EltTy = Ty.getElementType();
if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
return true;
if (!isPowerOf2_32(EltTy.getSizeInBits()))
return true;
}
return false;
};
auto scalarize =
[=](const LegalityQuery &Query, unsigned TypeIdx) {
const LLT &Ty = Query.Types[TypeIdx];
return std::make_pair(TypeIdx, Ty.getElementType());
};
getActionDefinitionsBuilder(Op)
// Break up vectors with weird elements into scalars
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
[=](const LegalityQuery &Query) { return scalarize(Query, 0); })
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
[=](const LegalityQuery &Query) { return scalarize(Query, 1); })
.clampScalar(BigTyIdx, S32, S512)
.widenScalarIf(
[=](const LegalityQuery &Query) {
const LLT &Ty = Query.Types[BigTyIdx];
return !isPowerOf2_32(Ty.getSizeInBits()) &&
Ty.getSizeInBits() % 16 != 0;
},
[=](const LegalityQuery &Query) {
// Pick the next power of 2, or a multiple of 64 over 128.
// Whichever is smaller.
const LLT &Ty = Query.Types[BigTyIdx];
unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
if (NewSizeInBits >= 256) {
unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
if (RoundedTo < NewSizeInBits)
NewSizeInBits = RoundedTo;
}
return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
})
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
// Clamp the little scalar to s8-s256 and make it a power of 2. It's not
// worth considering the multiples of 64 since 2*192 and 2*384 are not
// valid.
.clampScalar(LitTyIdx, S16, S256)
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
.legalIf([=](const LegalityQuery &Query) {
const LLT &BigTy = Query.Types[BigTyIdx];
const LLT &LitTy = Query.Types[LitTyIdx];
return BigTy.getSizeInBits() % 32 == 0 &&
LitTy.getSizeInBits() % 32 == 0 &&
if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
return false;
if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
return false;
return BigTy.getSizeInBits() % 16 == 0 &&
LitTy.getSizeInBits() % 16 == 0 &&
BigTy.getSizeInBits() <= 512;
})
// Any vectors left are the wrong size. Scalarize them.
.fewerElementsIf([](const LegalityQuery &Query) { return true; },
[](const LegalityQuery &Query) {
return std::make_pair(
0, Query.Types[0].getElementType());
})
.fewerElementsIf([](const LegalityQuery &Query) { return true; },
[](const LegalityQuery &Query) {
return std::make_pair(
1, Query.Types[1].getElementType());
});
.fewerElementsIf([](const LegalityQuery &Query) {
return Query.Types[0].isVector();
},
[](const LegalityQuery &Query) {
return std::make_pair(
0, Query.Types[0].getElementType());
})
.fewerElementsIf([](const LegalityQuery &Query) {
return Query.Types[1].isVector();
},
[](const LegalityQuery &Query) {
return std::make_pair(
1, Query.Types[1].getElementType());
});
}

View File

@ -0,0 +1,14 @@
# RUN: not llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s 2>&1 | FileCheck %s
# CHECK: LLVM ERROR: unable to legalize instruction: %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0:_(<2 x s1>) (in function: test_unmerge_v2s1)
---
name: test_unmerge_v2s1
body: |
bb.0:
%0:_(<2 x s1>) = G_IMPLICIT_DEF
%1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0
S_NOP 0, implicit %1
S_NOP 0, implicit %2
...

View File

@ -2,17 +2,76 @@
# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s | FileCheck %s
---
name: test_unmerge_s64_s32
name: test_unmerge_s32_s64
body: |
bb.0:
; CHECK-LABEL: name: test_unmerge_s64_s32
; CHECK-LABEL: name: test_unmerge_s32_s64
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr2 = COPY [[UV1]](s32)
; CHECK: $vgpr1 = COPY [[UV1]](s32)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64)
$vgpr0 = COPY %1(s32)
$vgpr2 = COPY %2(s32)
$vgpr1 = COPY %2(s32)
...
---
name: test_unmerge_s32_v2s32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: test_unmerge_s32_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr21 = COPY [[UV1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
$vgpr0 = COPY %1
$vgpr21= COPY %2
...
---
name: test_unmerge_s16_v2s16
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: test_unmerge_s16_v2s16
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
%3:_(s32) = G_ANYEXT %1
%4:_(s32) = G_ANYEXT %2
$vgpr0 = COPY %3
$vgpr1 = COPY %4
...
---
name: test_unmerge_s16_v3s16
body: |
bb.0:
; CHECK-LABEL: name: test_unmerge_s16_v3s16
; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
%4:_(s32) = G_ANYEXT %1
%5:_(s32) = G_ANYEXT %2
%6:_(s32) = G_ANYEXT %3
$vgpr0 = COPY %4
$vgpr1 = COPY %5
$vgpr2 = COPY %6
...