1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

R600/SI: Match i16 immediate offset of LDS instructions.

llvm-svn: 204272
This commit is contained in:
Matt Arsenault 2014-03-19 22:19:49 +00:00
parent 194b9e9539
commit 38344ebbaf
6 changed files with 114 additions and 31 deletions

View File

@ -99,10 +99,18 @@ def as_i32imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
}]>;
def IMM8bit : PatLeaf <(imm),
[{return isUInt<8>(N->getZExtValue());}]
>;
def IMM12bit : PatLeaf <(imm),
[{return isUInt<12>(N->getZExtValue());}]
>;
def IMM16bit : PatLeaf <(imm),
[{return isUInt<16>(N->getZExtValue());}]
>;
def mubuf_vaddr_offset : PatFrag<
(ops node:$ptr, node:$offset, node:$imm_offset),
(add (add node:$ptr, node:$offset), node:$imm_offset)
@ -387,6 +395,7 @@ class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
DS <op, outs, ins, asm, pat> {
bits<16> offset;
// Single load interpret the 2 i8imm operands as a single i16 offset.
let offset0 = offset{7-0};
let offset1 = offset{15-8};
}

View File

@ -1915,29 +1915,39 @@ def : Pat <
/********** Load/Store Patterns **********/
/********** ======================= **********/
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag i32:$src0),
(vt (inst 0, $src0, 0))
>;
multiclass DSReadPat <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
(vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))),
(inst (i1 0), $ptr, (as_i16imm $offset))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
def : DSReadPat <DS_READ_B32, i32, local_load>;
def : Pat <
(local_load i32:$src0),
(i32 (DS_READ_B32 0, $src0, 0))
>;
def : Pat <
(frag i32:$src0),
(vt (inst 0, $src0, 0))
>;
}
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag i32:$src1, i32:$src0),
(inst 0, $src0, $src1, 0)
>;
defm : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
defm : DSReadPat <DS_READ_B32, i32, local_load>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
(frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))),
(inst (i1 0), $ptr, $value, (as_i16imm $offset))
>;
def : Pat <
(frag i32:$src1, i32:$src0),
(inst 0, $src0, $src1, 0)
>;
}
defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
(DS_ADD_U32_RTN 0, $ptr, $val, 0)>;

View File

@ -11,7 +11,7 @@
; CHECK-LABEL: @local_address_load
; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
; CHECK: DS_READ_B32 [[PTR]]
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = load i32 addrspace(3)* %in
@ -32,9 +32,8 @@ entry:
}
; CHECK-LABEL: @local_address_gep_const_offset
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]]
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 4,
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 1
@ -43,6 +42,19 @@ entry:
ret void
}
; Offset too large, can't fold into 16-bit immediate offset.
; CHECK-LABEL: @local_address_gep_large_const_offset
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_READ_B32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 16385
%1 = load i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: @null_32bit_lds_ptr:
; CHECK: V_CMP_NE_I32
; CHECK-NOT: V_CMP_NE_I32
@ -86,3 +98,41 @@ define void @global_ptr() nounwind {
store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
; CHECK-LABEL: @local_address_store
; CHECK: DS_WRITE_B32
define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
store i32 %val, i32 addrspace(3)* %out
ret void
}
; CHECK-LABEL: @local_address_gep_store
; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
%gep = getelementptr i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
; CHECK-LABEL: @local_address_gep_const_offset_store
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
; Offset too large, can't fold into 16-bit immediate offset.
; CHECK-LABEL: @local_address_gep_large_const_offset_store
; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}

View File

@ -4,11 +4,14 @@
%struct.foo = type { [3 x float], [3 x float] }
; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
; already in a VGPR after the first read.
; CHECK-LABEL: @do_as_ptr_calcs:
; CHECK: S_ADD_I32 {{s[0-9]+}},
; CHECK: S_ADD_I32 [[SREG1:s[0-9]+]],
; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; CHECK: DS_READ_B32 [[VREG1]],
; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 20
; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 12
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0

View File

@ -2,12 +2,22 @@
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: @use_gep_address_space:
; CHECK: S_ADD_I32
; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 64
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
}
define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: @use_gep_address_space_large_offset:
; CHECK: S_ADD_I32
; CHECK: DS_WRITE_B32
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
}
define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: @gep_as_vector_v4:
; CHECK: S_ADD_I32

View File

@ -24,11 +24,12 @@
; EG-CHECK: GROUP_BARRIER
; EG-CHECK-NEXT: ALU clause
; Make sure the lds reads are using different addresses.
; Make sure the lds reads are using different addresses, at different
; constant offsets.
; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]]
; SI-CHECK-NOT: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]]
; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 16
; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0,
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry: