mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[NVPTX] Fixed lowering of unaligned loads/stores of f16 scalars and vectors.
Differential Revision: https://reviews.llvm.org/D30672 llvm-svn: 297198
This commit is contained in:
parent
2d32893f55
commit
5d21b52b47
@ -2071,8 +2071,21 @@ SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() == MVT::i1)
|
||||
return LowerLOADi1(Op, DAG);
|
||||
else
|
||||
return SDValue();
|
||||
|
||||
// v2f16 is legal, so we can't rely on legalizer to handle unaligned
|
||||
// loads and have to handle it here.
|
||||
if (Op.getValueType() == MVT::v2f16) {
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
EVT MemVT = Load->getMemoryVT();
|
||||
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
|
||||
Load->getAddressSpace(), Load->getAlignment())) {
|
||||
SDValue Ops[2];
|
||||
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
|
||||
return DAG.getMergeValues(Ops, SDLoc(Op));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// v = ld i1* addr
|
||||
@ -2098,16 +2111,23 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT ValVT = Op.getOperand(1).getValueType();
|
||||
switch (ValVT.getSimpleVT().SimpleTy) {
|
||||
case MVT::i1:
|
||||
StoreSDNode *Store = cast<StoreSDNode>(Op);
|
||||
EVT VT = Store->getMemoryVT();
|
||||
|
||||
if (VT == MVT::i1)
|
||||
return LowerSTOREi1(Op, DAG);
|
||||
default:
|
||||
if (ValVT.isVector())
|
||||
return LowerSTOREVector(Op, DAG);
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// v2f16 is legal, so we can't rely on legalizer to handle unaligned
|
||||
// stores and have to handle it here.
|
||||
if (VT == MVT::v2f16 &&
|
||||
!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
|
||||
Store->getAddressSpace(), Store->getAlignment()))
|
||||
return expandUnalignedStore(Store, DAG);
|
||||
|
||||
if (VT.isVector())
|
||||
return LowerSTOREVector(Op, DAG);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -161,6 +161,20 @@ define half @test_load(half* %a) #0 {
|
||||
ret half %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .func test_halfp0a1(
|
||||
; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
|
||||
; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
|
||||
; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
|
||||
; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
|
||||
; CHECK: ret
|
||||
define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
|
||||
%1 = load half, half * %from , align 1
|
||||
store half %1, half * %to , align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare half @test_callee(half %a, half %b) #0
|
||||
|
||||
; CHECK-LABEL: test_call(
|
||||
|
@ -41,6 +41,64 @@ define <4 x float> @t4(i8* %p1) {
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .func test_v1halfp0a1(
|
||||
; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v1halfp0a1_param_0];
|
||||
; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v1halfp0a1_param_1];
|
||||
; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
|
||||
; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
|
||||
; CHECK: ret
|
||||
define void @test_v1halfp0a1(<1 x half> * noalias readonly %from, <1 x half> * %to) {
|
||||
%1 = load <1 x half>, <1 x half> * %from , align 1
|
||||
store <1 x half> %1, <1 x half> * %to , align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .func test_v2halfp0a1(
|
||||
; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v2halfp0a1_param_0];
|
||||
; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v2halfp0a1_param_1];
|
||||
; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]],
|
||||
; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+1],
|
||||
; CHECK-DAG: ld.u8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+2],
|
||||
; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+3],
|
||||
; CHECK: ret
|
||||
define void @test_v2halfp0a1(<2 x half> * noalias readonly %from, <2 x half> * %to) {
|
||||
%1 = load <2 x half>, <2 x half> * %from , align 1
|
||||
store <2 x half> %1, <2 x half> * %to , align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .visible .func test_v4halfp0a1(
|
||||
; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v4halfp0a1_param_0];
|
||||
; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v4halfp0a1_param_1];
|
||||
; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]], [[B0]]
|
||||
; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]]
|
||||
; CHECK-DAG: ld.u8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+2], [[B2]]
|
||||
; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+3], [[B3]]
|
||||
; CHECK-DAG: ld.u8 [[B4:%r[sd]?[0-9]+]], [%[[FROM]]+4]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+4], [[B4]]
|
||||
; CHECK-DAG: ld.u8 [[B5:%r[sd]?[0-9]+]], [%[[FROM]]+5]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+5], [[B5]]
|
||||
; CHECK-DAG: ld.u8 [[B6:%r[sd]?[0-9]+]], [%[[FROM]]+6]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+6], [[B6]]
|
||||
; CHECK-DAG: ld.u8 [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7]
|
||||
; CHECK-DAG: st.u8 [%[[TO]]+7], [[B7]]
|
||||
; CHECK: ret
|
||||
define void @test_v4halfp0a1(<4 x half> * noalias readonly %from, <4 x half> * %to) {
|
||||
%1 = load <4 x half>, <4 x half> * %from , align 1
|
||||
store <4 x half> %1, <4 x half> * %to , align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: s1
|
||||
define void @s1(<4 x float>* %p1, <4 x float> %v) {
|
||||
|
Loading…
Reference in New Issue
Block a user