1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00

AMDGPU/SI: Fix LowerParameter() for i16 arguments

Summary:
If we are loading an i16 value from a 32-bit memory location, then
we need to be able to truncate the loaded value to i16.

Reviewers: arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D25198

llvm-svn: 284397
This commit is contained in:
Tom Stellard 2016-10-17 16:21:45 +00:00
parent f0b7ab4378
commit a4414c957a
2 changed files with 20 additions and 16 deletions

View File

@ -587,23 +587,31 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
unsigned Offset, bool Signed) const {
const DataLayout &DL = DAG.getDataLayout();
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
SDValue PtrOffset = DAG.getUNDEF(PtrVT);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned Align = DL.getABITypeAlignment(Ty);
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
if (MemVT.isFloatingPoint())
ExtTy = ISD::EXTLOAD;
SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset,
PtrInfo, MemVT, Align,
MachineMemOperand::MONonTemporal |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
MachineMemOperand::MONonTemporal |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
SDValue Val;
if (MemVT.isFloatingPoint())
Val = DAG.getNode(ISD::FP_EXTEND, SL, VT, Load);
else if (Signed)
Val = DAG.getSExtOrTrunc(Load, SL, VT);
else
Val = DAG.getZExtOrTrunc(Load, SL, VT);
SDValue Ops[] = {
Val,
Load.getValue(1)
};
return DAG.getMergeValues(Ops, SL);
}
SDValue SITargetLowering::LowerFormalArguments(

View File

@ -149,12 +149,8 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
ret void
}
; FIXME: Should be able to merge this
; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dword v
; GCN-NOAA: buffer_store_dwordx4 v
; GCN-AA: buffer_store_dwordx2
; GCN-AA: buffer_store_dword v