From c6f27be4ce6b8367c09cfecdee15ca71401b23b7 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 10 May 2021 15:26:57 -0500 Subject: [PATCH] [Hexagon] Handle loads and stores of scalar predicate vectors Handle v2i1, v4i1, and v8i1. --- lib/Target/Hexagon/HexagonISelLowering.cpp | 49 +++- test/CodeGen/Hexagon/isel-memory-vNi1.ll | 276 +++++++++++++++++++++ 2 files changed, 321 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/Hexagon/isel-memory-vNi1.ll diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 1d722a722dc..8530f5d5140 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1726,6 +1726,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, VT, Custom); } + // Custom-lower load/stores of boolean vectors. + for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) { + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + } + for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) { setCondCodeAction(ISD::SETNE, VT, Expand); @@ -2878,27 +2884,62 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { SDValue HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const { + MVT Ty = ty(Op); + const SDLoc &dl(Op); + // Lower loads of scalar predicate vectors (v2i1, v4i1, v8i1) to loads of i1 + // followed by a TYPECAST. LoadSDNode *LN = cast(Op.getNode()); + bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1); + if (DoCast) { + SDValue NL = DAG.getLoad( + LN->getAddressingMode(), LN->getExtensionType(), MVT::i1, dl, + LN->getChain(), LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(), + /*MemoryVT*/ MVT::i1, LN->getAlign(), LN->getMemOperand()->getFlags(), + LN->getAAInfo(), LN->getRanges()); + LN = cast(NL.getNode()); + } + unsigned ClaimAlign = LN->getAlignment(); - validateConstPtrAlignment(LN->getBasePtr(), SDLoc(Op), ClaimAlign); + validateConstPtrAlignment(LN->getBasePtr(), dl, ClaimAlign); // Call LowerUnalignedLoad for all loads, it recognizes loads that // don't need extra aligning. - return LowerUnalignedLoad(Op, DAG); + SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG); + if (DoCast) { + SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, Ty, LU); + SDValue Ch = cast(LU.getNode())->getChain(); + return DAG.getMergeValues({TC, Ch}, dl); + } + return LU; } SDValue HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); StoreSDNode *SN = cast(Op.getNode()); + SDValue Val = SN->getValue(); + MVT Ty = ty(Val); + + bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1); + if (DoCast) { + SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, Val); + SDValue NS = DAG.getStore(SN->getChain(), dl, TC, SN->getBasePtr(), + SN->getMemOperand()); + if (SN->isIndexed()) { + NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(), + SN->getAddressingMode()); + } + SN = cast(NS.getNode()); + } + unsigned ClaimAlign = SN->getAlignment(); SDValue Ptr = SN->getBasePtr(); - const SDLoc &dl(Op); validateConstPtrAlignment(Ptr, dl, ClaimAlign); MVT StoreTy = SN->getMemoryVT().getSimpleVT(); unsigned NeedAlign = Subtarget.getTypeAlignment(StoreTy); if (ClaimAlign < NeedAlign) return expandUnalignedStore(SN, DAG); - return Op; + return SDValue(SN, 0); } SDValue diff --git a/test/CodeGen/Hexagon/isel-memory-vNi1.ll b/test/CodeGen/Hexagon/isel-memory-vNi1.ll new file mode 100644 index 00000000000..a438b6cc46f --- /dev/null +++ b/test/CodeGen/Hexagon/isel-memory-vNi1.ll @@ -0,0 +1,276 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define i64 @f0(<8 x i1>* %a0, <8 x i8> %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = combine(#0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = r0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vmux(p0,r3:2,r5:4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = load <8 x i1>, <8 x i1>* %a0, align 1 + %v1 = select <8 x i1> %v0, <8 x i8> %a1, <8 x i8> zeroinitializer + %v2 = bitcast <8 x i8> %v1 to i64 + ret i64 %v2 +} + +define i32 @f1(<4 x i1>* %a0, <4 x i8> %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = vsxtbh(r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = vsxtbh(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = r0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = vmux(p0,r5:4,r3:2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = vtrunehb(r1:0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = load <4 x i1>, <4 x i1>* %a0, align 1 + %v1 = select <4 x i1> %v0, <4 x i8> %a1, <4 x i8> zeroinitializer + %v2 = bitcast <4 x i8> %v1 to i32 + ret i32 %v2 +} + +define i16 @f2(<2 x i1>* %a0, <2 x i8> %a1) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p1 = tstbit(r0,#4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = r0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p1,r3,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = mux(p0,r2,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = insert(r1,#24,#8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = load <2 x i1>, <2 x i1>* %a0, align 1 + %v1 = select <2 x i1> %v0, <2 x i8> %a1, <2 x i8> zeroinitializer + %v2 = bitcast <2 x i8> %v1 to i16 + ret i16 %v2 +} + +define i8 @f3(<1 x i1>* %a0, <1 x i8> %a1) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = r0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = mux(p0,r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = load <1 x i1>, <1 x i1>* %a0, align 1 + %v1 = select <1 x i1> %v0, <1 x i8> %a1, <1 x i8> zeroinitializer + %v2 = bitcast <1 x i8> %v1 to i8 + ret i8 %v2 +} + +define void @f4(<8 x i1>* %a0, i64 %a1) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = combine(#0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = vcmpb.eq(r3:2,r5:4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p0,#0,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memb(r0+#0) = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = bitcast i64 %a1 to <8 x i8> + %v1 = icmp ne <8 x i8> %v0, zeroinitializer + store <8 x i1> %v1, <8 x i1>* %a0, align 1 + ret void +} + +define void @f5(<4 x i1>* %a0, i32 %a1) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = vsxtbh(r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3:2 = vsxtbh(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = vcmph.eq(r5:4,r3:2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p0,#0,#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memb(r0+#0) = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = bitcast i32 %a1 to <4 x i8> + %v1 = icmp ne <4 x i8> %v0, zeroinitializer + store <4 x i1> %v1, <4 x i1>* %a0, align 1 + ret void +} + +define void @f6(<2 x i1>* %a0, i16 %a1) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = extractu(r1,#8,#8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = #255 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p1 = !bitsclr(r1,r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r2,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) r2 = #0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p1,#8,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = mux(p1,#2,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = setbit(r1,#2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = setbit(r3,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) r2 = #128 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = mux(p0,#0,#32) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p1) r5 = add(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = setbit(r2,#6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p1) r6 = add(r3,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = setbit(r4,#4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = or(r6,r5) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) r2 = add(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) r4 = add(r3,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 |= or(r4,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = r5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p0,#1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memb(r0+#0) = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = bitcast i16 %a1 to <2 x i8> + %v1 = icmp ne <2 x i8> %v0, zeroinitializer + store <2 x i1> %v1, <2 x i1>* %a0, align 1 + ret void +} + +define void @f7(<1 x i1>* %a0, i8 %a1) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = #255 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = !bitsclr(r1,r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p0,#1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memb(r0+#0) = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = bitcast i8 %a1 to <1 x i8> + %v1 = icmp ne <1 x i8> %v0, zeroinitializer + store <1 x i1> %v1, <1 x i1>* %a0, align 1 + ret void +} + +attributes #0 = { nounwind "target-features"="-packets" }