1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

AMDGPU/GlobalISel: Make some G_MERGE_VALUEs legal

llvm-svn: 327267
This commit is contained in:
Matt Arsenault 2018-03-12 13:35:43 +00:00
parent 1e2851aabe
commit def6781bbe
3 changed files with 174 additions and 0 deletions

View File

@ -119,5 +119,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST,
(Ty1.getSizeInBits() % 32 == 0);
});
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
getActionDefinitionsBuilder(Op)
.legalIf([=](const LegalityQuery &Query) {
const LLT &BigTy = Query.Types[BigTyIdx];
const LLT &LitTy = Query.Types[LitTyIdx];
return BigTy.getSizeInBits() % 32 == 0 &&
LitTy.getSizeInBits() % 32 == 0 &&
BigTy.getSizeInBits() <= 512;
})
// Any vectors left are the wrong size. Scalarize them.
.fewerElementsIf([](const LegalityQuery &Query) { return true; },
[](const LegalityQuery &Query) {
return std::make_pair(
0, Query.Types[0].getElementType());
})
.fewerElementsIf([](const LegalityQuery &Query) { return true; },
[](const LegalityQuery &Query) {
return std::make_pair(
1, Query.Types[1].getElementType());
});
}
computeTables();
}

View File

@ -0,0 +1,114 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -global-isel %s | FileCheck %s
---
name: test_merge_s32_s32_s64
body: |
bb.0:
; CHECK-LABEL: name: test_merge_s32_s32_s64
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s64) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
$vgpr0_vgpr1 = COPY %2(s64)
...
---
name: test_merge_s32_s32_v2s32
body: |
bb.0:
; CHECK-LABEL: name: test_merge_s32_s32_v2s32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[MV:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](<2 x s32>)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(<2 x s32>) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
$vgpr0_vgpr1 = COPY %2(<2 x s32>)
...
---
name: test_merge_s32_s32_s32_v3s32
body: |
bb.0:
; CHECK-LABEL: name: test_merge_s32_s32_s32_v3s32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK: [[MV:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](<3 x s32>)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_CONSTANT i32 2
%3:_(<3 x s32>) = G_MERGE_VALUES %0:_(s32), %1:_(s32), %2:_(s32)
$vgpr0_vgpr1_vgpr2 = COPY %3(<3 x s32>)
...
---
name: test_merge_s64_s64_s128
body: |
bb.0:
; CHECK-LABEL: name: test_merge_s64_s64_s128
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[MV:%[0-9]+]]:_(<2 x s64>) = G_MERGE_VALUES [[C]](s64), [[C1]](s64)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](<2 x s64>)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s64) = G_CONSTANT i64 1
%2:_(<2 x s64>) = G_MERGE_VALUES %0(s64), %1(s64)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x s64>)
...
---
name: test_merge_s64_s64_s64_s64_v4s64
body: |
bb.0:
; CHECK-LABEL: name: test_merge_s64_s64_s64_s64_v4s64
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK: [[MV:%[0-9]+]]:_(<4 x s64>) = G_MERGE_VALUES [[C]](s64), [[C1]](s64), [[C2]](s64), [[C3]](s64)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](<4 x s64>)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s64) = G_CONSTANT i64 1
%2:_(s64) = G_CONSTANT i64 2
%3:_(s64) = G_CONSTANT i64 3
%4:_(<4 x s64>) = G_MERGE_VALUES %0(s64), %1(s64), %2(s64), %3(s64)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<4 x s64>)
...
# FIXME: Should be split up
# ---
# name: test_merge_17_x_i32
# body: |
# bb.0:
# %0:_(s32) = G_CONSTANT i32 0
# %1:_(s32) = G_CONSTANT i32 1
# %2:_(s32) = G_CONSTANT i32 2
# %3:_(s32) = G_CONSTANT i32 3
# %4:_(s32) = G_CONSTANT i32 4
# %5:_(s32) = G_CONSTANT i32 5
# %6:_(s32) = G_CONSTANT i32 6
# %7:_(s32) = G_CONSTANT i32 7
# %8:_(s32) = G_CONSTANT i32 8
# %9:_(s32) = G_CONSTANT i32 9
# %10:_(s32) = G_CONSTANT i32 10
# %11:_(s32) = G_CONSTANT i32 11
# %12:_(s32) = G_CONSTANT i32 12
# %13:_(s32) = G_CONSTANT i32 13
# %14:_(s32) = G_CONSTANT i32 14
# %15:_(s32) = G_CONSTANT i32 15
# %16:_(s32) = G_CONSTANT i32 16
# %17:_(<17 x s32>) = G_MERGE_VALUES %0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32)
# S_ENDPGM implicit %17(<17 x s32>)
# ...

View File

@ -0,0 +1,33 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -global-isel -o - %s | FileCheck %s
---
name: test_unmerge_s64_s32
body: |
bb.0:
; CHECK-LABEL: name: test_unmerge_s64_s32
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr2 = COPY [[UV1]](s32)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64)
$vgpr0 = COPY %1(s32)
$vgpr2 = COPY %2(s32)
...
---
name: test_unmerge_v2s32_s32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: test_unmerge_v2s32_s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr2 = COPY [[UV1]](s32)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>)
$vgpr0 = COPY %1(s32)
$vgpr2 = COPY %2(s32)
...