mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
R600/SI: Fix bad code with unaligned byte vector loads
Don't do the v4i8 -> v4f32 combine if the load will need to be expanded due to alignment. This stops adding instructions to repack into a single register that the v_cvt_ubyteN_f32 instructions read. llvm-svn: 225926
This commit is contained in:
parent
22a9f67443
commit
424a0025ca
@ -302,7 +302,7 @@ bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
unsigned AddrSpace,
|
||||
unsigned Align,
|
||||
bool *IsFast) const {
|
||||
@ -1167,7 +1167,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) {
|
||||
DAGCombinerInfo &DCI) const {
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT ScalarVT = VT.getScalarType();
|
||||
if (ScalarVT != MVT::f32)
|
||||
@ -1215,8 +1215,21 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
|
||||
EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
|
||||
EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
|
||||
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
|
||||
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Src);
|
||||
|
||||
unsigned AS = Load->getAddressSpace();
|
||||
unsigned Align = Load->getAlignment();
|
||||
Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
|
||||
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
|
||||
|
||||
// Don't try to replace the load if we have to expand it due to alignment
|
||||
// problems. Otherwise we will end up scalarizing the load, and trying to
|
||||
// repack into the vector for no real reason.
|
||||
if (Align < ABIAlignment &&
|
||||
!allowsMisalignedMemoryAccesses(LoadVT, AS, Align, nullptr)) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
|
||||
Load->getChain(),
|
||||
Load->getBasePtr(),
|
||||
|
@ -50,8 +50,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
|
||||
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
|
||||
|
||||
static SDValue performUCharToFloatCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI);
|
||||
SDValue performUCharToFloatCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
SDValue performSHLPtrCombine(SDNode *N,
|
||||
unsigned AS,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
|
@ -36,7 +36,7 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <3 x i8> addrspace(1)* %in, align 1
|
||||
%load = load <3 x i8> addrspace(1)* %in, align 4
|
||||
%cvt = uitofp <3 x i8> %load to <3 x float>
|
||||
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
|
||||
ret void
|
||||
@ -66,23 +66,13 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
|
||||
; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
|
||||
; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
|
||||
; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
|
||||
; SI-NOT: v_lshlrev_b32
|
||||
; SI-NOT: v_or_b32
|
||||
|
||||
; SI: v_lshlrev_b32
|
||||
; SI: v_or_b32
|
||||
; SI: v_lshlrev_b32
|
||||
; SI: v_or_b32
|
||||
; SI: v_lshlrev_b32
|
||||
; SI: v_or_b32
|
||||
|
||||
; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG0]]
|
||||
; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
|
||||
; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
|
||||
; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG3]]
|
||||
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte2_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte3_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
|
||||
|
||||
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
|
Loading…
Reference in New Issue
Block a user