diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 366a2eb887e..5c1359e3161 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -119,5 +119,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST, (Ty1.getSizeInBits() % 32 == 0); }); + // Merge/Unmerge + for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { + unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; + unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; + + getActionDefinitionsBuilder(Op) + .legalIf([=](const LegalityQuery &Query) { + const LLT &BigTy = Query.Types[BigTyIdx]; + const LLT &LitTy = Query.Types[LitTyIdx]; + return BigTy.getSizeInBits() % 32 == 0 && + LitTy.getSizeInBits() % 32 == 0 && + BigTy.getSizeInBits() <= 512; + }) + // Any vectors left are the wrong size. Scalarize them. + .fewerElementsIf([](const LegalityQuery &Query) { return true; }, + [](const LegalityQuery &Query) { + return std::make_pair( + 0, Query.Types[0].getElementType()); + }) + .fewerElementsIf([](const LegalityQuery &Query) { return true; }, + [](const LegalityQuery &Query) { + return std::make_pair( + 1, Query.Types[1].getElementType()); + }); + + } + computeTables(); } diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir new file mode 100644 index 00000000000..ff565938c9e --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -global-isel %s | FileCheck %s + +--- +name: test_merge_s32_s32_s64 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s32_s32_s64 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_MERGE_VALUES %0:_(s32), %1:_(s32) + $vgpr0_vgpr1 = COPY %2(s64) +... + +--- +name: test_merge_s32_s32_v2s32 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s32_s32_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[MV:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](<2 x s32>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<2 x s32>) = G_MERGE_VALUES %0:_(s32), %1:_(s32) + $vgpr0_vgpr1 = COPY %2(<2 x s32>) +... + +--- +name: test_merge_s32_s32_s32_v3s32 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s32_s32_s32_v3s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[MV:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](<3 x s32>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(<3 x s32>) = G_MERGE_VALUES %0:_(s32), %1:_(s32), %2:_(s32) + $vgpr0_vgpr1_vgpr2 = COPY %3(<3 x s32>) +... + +--- +name: test_merge_s64_s64_s128 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s64_s64_s128 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[MV:%[0-9]+]]:_(<2 x s64>) = G_MERGE_VALUES [[C]](s64), [[C1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](<2 x s64>) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s64>) = G_MERGE_VALUES %0(s64), %1(s64) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2(<2 x s64>) +... + +--- +name: test_merge_s64_s64_s64_s64_v4s64 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s64_s64_s64_s64_v4s64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK: [[MV:%[0-9]+]]:_(<4 x s64>) = G_MERGE_VALUES [[C]](s64), [[C1]](s64), [[C2]](s64), [[C3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV]](<4 x s64>) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s64) = G_CONSTANT i64 2 + %3:_(s64) = G_CONSTANT i64 3 + %4:_(<4 x s64>) = G_MERGE_VALUES %0(s64), %1(s64), %2(s64), %3(s64) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4(<4 x s64>) +... + +# FIXME: Should be split up +# --- +# name: test_merge_17_x_i32 +# body: | +# bb.0: +# %0:_(s32) = G_CONSTANT i32 0 +# %1:_(s32) = G_CONSTANT i32 1 +# %2:_(s32) = G_CONSTANT i32 2 +# %3:_(s32) = G_CONSTANT i32 3 + +# %4:_(s32) = G_CONSTANT i32 4 +# %5:_(s32) = G_CONSTANT i32 5 +# %6:_(s32) = G_CONSTANT i32 6 +# %7:_(s32) = G_CONSTANT i32 7 + +# %8:_(s32) = G_CONSTANT i32 8 +# %9:_(s32) = G_CONSTANT i32 9 +# %10:_(s32) = G_CONSTANT i32 10 +# %11:_(s32) = G_CONSTANT i32 11 + +# %12:_(s32) = G_CONSTANT i32 12 +# %13:_(s32) = G_CONSTANT i32 13 +# %14:_(s32) = G_CONSTANT i32 14 +# %15:_(s32) = G_CONSTANT i32 15 + +# %16:_(s32) = G_CONSTANT i32 16 + +# %17:_(<17 x s32>) = G_MERGE_VALUES %0:_(s32), %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32) +# S_ENDPGM implicit %17(<17 x s32>) +# ... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir new file mode 100644 index 00000000000..b49037ebb4e --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -0,0 +1,33 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -global-isel -o - %s | FileCheck %s + +--- +name: test_unmerge_s64_s32 +body: | + bb.0: + ; CHECK-LABEL: name: test_unmerge_s64_s32 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr2 = COPY [[UV1]](s32) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64) + $vgpr0 = COPY %1(s32) + $vgpr2 = COPY %2(s32) +... + +--- +name: test_unmerge_v2s32_s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_unmerge_v2s32_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr2 = COPY [[UV1]](s32) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) + $vgpr0 = COPY %1(s32) + $vgpr2 = COPY %2(s32) +...