1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

R600/SI: Make more store operations legal

v2i32, i32, trunc i32 to i16, and truc i32 to i8 stores are legal for
all address spaces.  We had marked them as custom in order to lower
them for the private address space, but this is no longer necessary.

This enables lowering of misaligned stores of these types in the
DAGLegalizer.

llvm-svn: 228189
This commit is contained in:
Tom Stellard 2015-02-04 20:49:51 +00:00
parent 5ea00f06af
commit e523dc6621
3 changed files with 46 additions and 23 deletions

View File

@ -159,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
// Custom lowering of vector stores is required for local address space
// stores.
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
// XXX: Native v2i32 local address space stores are possible, but not
// currently implemented.
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);

View File

@ -90,8 +90,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
@ -159,8 +157,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
@ -1150,11 +1146,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Store->getMemoryVT();
// These stores are legal.
if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
VT.isVector() && VT.getVectorNumElements() == 2 &&
VT.getVectorElementType() == MVT::i32)
return SDValue();
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
return ScalarizeVectorStore(Op, DAG);

View File

@ -6,7 +6,10 @@
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_write_b32
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
%v = load i32 addrspace(3)* %p, align 1
@ -19,7 +22,10 @@ define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_store_dword
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
%v = load i32 addrspace(1)* %p, align 1
store i32 %v, i32 addrspace(1)* %r, align 1
@ -35,8 +41,14 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
%v = load i64 addrspace(3)* %p, align 1
@ -53,8 +65,14 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_store_dword
; SI: buffer_store_dword
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
%v = load i64 addrspace(1)* %p, align 1
store i64 %v, i64 addrspace(1)* %r, align 1
@ -82,10 +100,25 @@ define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
%v = load <4 x i32> addrspace(3)* %p, align 1
@ -149,7 +182,7 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
ret void
}
; SI: {{^}}load_lds_i64_align_1
; SI-LABEL: {{^}}load_lds_i64_align_1:
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
@ -158,7 +191,9 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%val = load i64 addrspace(3)* %in, align 1
store i64 %val, i64 addrspace(1)* %out, align 8