1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[Hexagon] Remove custom lowering of loads of v4i16

The target-independent lowering works fine, except concatenating 32-bit
words. Add a pattern to generate A2_combinew instead of 64-bit asl/or.

llvm-svn: 308186
This commit is contained in:
Krzysztof Parzyszek 2017-07-17 15:45:45 +00:00
parent 9af064f149
commit afff3fc916
5 changed files with 30 additions and 83 deletions

View File

@ -1364,79 +1364,6 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
// Handle only specific vector loads.
SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
SDValue Chain = LoadNode->getChain();
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
SDValue Result;
SDValue Base = LoadNode->getBasePtr();
ISD::LoadExtType Ext = LoadNode->getExtensionType();
unsigned Alignment = LoadNode->getAlignment();
SDValue LoadChain;
if(Ext == ISD::NON_EXTLOAD)
Ext = ISD::ZEXTLOAD;
if (VT == MVT::v4i16) {
if (Alignment == 2) {
SDValue Loads[4];
// Base load.
Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
LoadNode->getPointerInfo(), MVT::i16, Alignment,
LoadNode->getMemOperand()->getFlags());
// Base+2 load.
SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
LoadNode->getPointerInfo(), MVT::i16, Alignment,
LoadNode->getMemOperand()->getFlags());
// SHL 16, then OR base and base+2.
SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
// Base + 4.
Increment = DAG.getConstant(4, DL, MVT::i32);
Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
LoadNode->getPointerInfo(), MVT::i16, Alignment,
LoadNode->getMemOperand()->getFlags());
// Base + 6.
Increment = DAG.getConstant(6, DL, MVT::i32);
Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
LoadNode->getPointerInfo(), MVT::i16, Alignment,
LoadNode->getMemOperand()->getFlags());
// SHL 16, then OR base+4 and base+6.
Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
// Combine to i64. This could be optimised out later if we can
// affect reg allocation of this code.
Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Loads[0].getValue(1), Loads[1].getValue(1),
Loads[2].getValue(1), Loads[3].getValue(1));
} else {
// Perform default type expansion.
Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
LoadNode->getAlignment(),
LoadNode->getMemOperand()->getFlags());
LoadChain = Result.getValue(1);
}
} else
llvm_unreachable("Custom lowering unsupported load");
Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
// Since we pretend to lower a load, we need the original chain
// info attached to the result.
SDValue Ops[] = { Result, LoadChain };
return DAG.getMergeValues(Ops, DL);
}
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
@ -1961,18 +1888,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Handling of vector operations.
//
// Custom lower v4i16 load only. Let v4i16 store to be
// promoted for now.
promoteLdStType(MVT::v4i8, MVT::i32);
promoteLdStType(MVT::v2i16, MVT::i32);
promoteLdStType(MVT::v8i8, MVT::i64);
promoteLdStType(MVT::v4i16, MVT::i64);
promoteLdStType(MVT::v2i32, MVT::i64);
setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
setOperationAction(ISD::STORE, MVT::v4i16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
@ -2970,8 +2891,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
// Custom lower some vector loads.
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);

View File

@ -165,7 +165,6 @@ namespace HexagonISD {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
bool CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,

View File

@ -2250,6 +2250,12 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
// Prefer this pattern to S2_asl_i_p_or for the special case of joining
// two 32-bit words into a 64-bit word.
let AddedComplexity = 200 in
def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)),
(A2_combinew I32:$a, I32:$b)>;
def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)),
(i64 (zext (i32 (and I32:$a, (i32 65535)))))),
(shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))),

View File

@ -0,0 +1,23 @@
; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
; CHECK-LABEL: danny:
; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0)
; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2)
; CHECK: [[T0]] |= asl([[T1]],#16)
; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4)
; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6)
; CHECK: [[T2]] |= asl([[T3]],#16)
; CHECK: combine([[T2]],[[T0]])
define <4 x i16> @danny(<4 x i16>* %p) {
%t0 = load <4 x i16>, <4 x i16>* %p, align 2
ret <4 x i16> %t0
}
; CHECK-LABEL: sammy:
; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0)
; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4)
; CHECK: combine([[T1]],[[T0]])
define <4 x i16> @sammy(<4 x i16>* %p) {
%t0 = load <4 x i16>, <4 x i16>* %p, align 4
ret <4 x i16> %t0
}