1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

AMDGPU/GlobalISel: Extend load/store workaround to i128 vectors

This commit is contained in:
Matt Arsenault 2020-06-15 13:42:47 -04:00
parent b474372d7b
commit 539e48655f
5 changed files with 147 additions and 57 deletions

View File

@ -307,7 +307,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;
if (!Ty.isVector())
return true;
return Ty.getElementType().getSizeInBits() == 16;
unsigned EltSize = Ty.getElementType().getSizeInBits();
return EltSize != 32 && EltSize != 64;
}
static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query,

View File

@ -10377,24 +10377,29 @@ body: |
; CI-LABEL: name: test_load_constant_v2s128_align32
; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; VI-LABEL: name: test_load_constant_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-LABEL: name: test_load_constant_v2s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-MESA-LABEL: name: test_load_constant_v2s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-MESA-LABEL: name: test_load_constant_v2s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 4)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@ -13259,3 +13264,39 @@ body: |
%1:_(s512) = G_LOAD %0 :: (load 64, align 32, addrspace 4)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
...
---
name: test_load_constant_v4s128_align32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CI-LABEL: name: test_load_constant_v4s128_align32
; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
; CI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
; VI-LABEL: name: test_load_constant_v4s128_align32
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
; VI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
; GFX9-LABEL: name: test_load_constant_v4s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
; CI-MESA-LABEL: name: test_load_constant_v4s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
; CI-MESA: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
; GFX9-MESA-LABEL: name: test_load_constant_v4s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<4 x s128>) = G_LOAD %0 :: (load 64, align 32, addrspace 4)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
...

View File

@ -10477,53 +10477,48 @@ body: |
; CI-LABEL: name: test_load_flat_v2s128_align32
; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
; CI: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; VI-LABEL: name: test_load_flat_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
; VI: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-LABEL: name: test_load_flat_v2s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
; GFX9: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-MESA-LABEL: name: test_load_flat_v2s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-MESA-LABEL: name: test_load_flat_v2s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 0)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1

View File

@ -9916,28 +9916,34 @@ body: |
; SI-LABEL: name: test_load_global_v2s128_align32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; SI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-HSA-LABEL: name: test_load_global_v2s128_align32
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-MESA-LABEL: name: test_load_global_v2s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; VI-LABEL: name: test_load_global_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 1)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1

View File

@ -6770,45 +6770,49 @@ body: |
...
---
name: test_store_global_v8s32_align32
name: test_store_global_v2s128_align32
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; SI-LABEL: name: test_store_global_v8s32_align32
; SI-LABEL: name: test_store_global_v2s128_align32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; SI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
; CI-LABEL: name: test_store_global_v8s32_align32
; CI-LABEL: name: test_store_global_v2s128_align32
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; CI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
; VI-LABEL: name: test_store_global_v8s32_align32
; VI-LABEL: name: test_store_global_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; VI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v8s32_align32
; GFX9-LABEL: name: test_store_global_v2s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
%1:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
G_STORE %1, %0 :: (store 32, align 32, addrspace 1)
...
@ -7472,6 +7476,49 @@ body: |
G_STORE %1, %0 :: (store 32, align 32, addrspace 1)
...
---
name: test_store_global_v8s32_align32
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; SI-LABEL: name: test_store_global_v8s32_align32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
; CI-LABEL: name: test_store_global_v8s32_align32
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
; VI-LABEL: name: test_store_global_v8s32_align32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
; GFX9-LABEL: name: test_store_global_v8s32_align32
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
G_STORE %1, %0 :: (store 32, align 32, addrspace 1)
...
---
name: test_store_global_v9s32_align1
body: |