mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
AMDGPU: Fix introducing stack access on unaligned v16i8
llvm-svn: 280298
This commit is contained in:
parent
21cdd84c8c
commit
a193e467ba
@ -2012,6 +2012,9 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
|
||||
// problems during legalization, the emitted instructions to pack and unpack
|
||||
// the bytes again are not eliminated in the case of an unaligned copy.
|
||||
if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
|
||||
if (VT.isVector())
|
||||
return scalarizeVectorLoad(LN, DAG);
|
||||
|
||||
SDValue Ops[2];
|
||||
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
|
||||
return DAG.getMergeValues(Ops, SDLoc(N));
|
||||
@ -2060,8 +2063,12 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
|
||||
// order problems during legalization, the emitted instructions to pack and
|
||||
// unpack the bytes again are not eliminated in the case of an unaligned
|
||||
// copy.
|
||||
if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast))
|
||||
if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
|
||||
if (VT.isVector())
|
||||
return scalarizeVectorStore(SN, DAG);
|
||||
|
||||
return expandUnalignedStore(SN, DAG);
|
||||
}
|
||||
|
||||
if (!IsFast)
|
||||
return SDValue();
|
||||
|
@ -1,15 +1,13 @@
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s
|
||||
; XUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
|
||||
;
|
||||
; EG-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
|
||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
|
||||
; EG: MEM_{{.*}} MSKOR [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
|
||||
; EG-NOT: BFE
|
||||
; EG: ADD_INT
|
||||
; EG: LSHL
|
||||
; EG: ASHR [[RES]]
|
||||
; EG: ASHR
|
||||
; EG: LSHL
|
||||
; EG: ASHR [[RES]]
|
||||
; EG: ASHR
|
||||
; EG: LSHR {{\*?}} [[ADDR]]
|
||||
|
||||
; Works with the align 2 removed
|
||||
|
@ -552,4 +552,53 @@ define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspac
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}local_load_align1_v16i8:
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
; SI: ds_read_u8
|
||||
|
||||
; SI: ScratchSize: 0{{$}}
|
||||
define void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(3)* %in) #0 {
|
||||
%ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 1
|
||||
store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}local_store_align1_v16i8:
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
; SI: ds_write_b8
|
||||
|
||||
; SI: ScratchSize: 0{{$}}
|
||||
define void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out) #0 {
|
||||
store <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* %out, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user