mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
Revert "[llvm][sve] Lowering for VLS truncating stores" because it
causes a seg fault (see https://reviews.llvm.org/D104471). This reverts commit c305557acdaad453e32309d575fe9c6c7090c099.
This commit is contained in:
parent
e10d9fcec5
commit
f2ec69fb59
@ -1271,14 +1271,6 @@ public:
|
||||
getTruncStoreAction(ValVT, MemVT) == Custom);
|
||||
}
|
||||
|
||||
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
|
||||
bool LegalOnly) const {
|
||||
if (LegalOnly)
|
||||
return isTruncStoreLegal(ValVT, MemVT);
|
||||
|
||||
return isTruncStoreLegalOrCustom(ValVT, MemVT);
|
||||
}
|
||||
|
||||
/// Return how the indexed load should be treated: either it is legal, needs
|
||||
/// to be promoted to a larger size, needs to be expanded to some other code
|
||||
/// sequence, or the target has a custom expander for it.
|
||||
|
@ -18097,11 +18097,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
||||
|
||||
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
|
||||
// truncating store. We can do this even if this is already a truncstore.
|
||||
if ((Value.getOpcode() == ISD::FP_ROUND ||
|
||||
Value.getOpcode() == ISD::TRUNCATE) &&
|
||||
Value.getNode()->hasOneUse() && ST->isUnindexed() &&
|
||||
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
|
||||
ST->getMemoryVT(), LegalOperations)) {
|
||||
if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
|
||||
&& Value.getNode()->hasOneUse() && ST->isUnindexed() &&
|
||||
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
|
||||
ST->getMemoryVT())) {
|
||||
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
|
||||
Ptr, ST->getMemoryVT(), ST->getMemOperand());
|
||||
}
|
||||
|
@ -1241,13 +1241,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
}
|
||||
}
|
||||
|
||||
// SVE supports truncating stores of 64 and 128-bit vectors
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
|
||||
|
||||
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
|
||||
MVT::nxv4f32, MVT::nxv2f64}) {
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
|
||||
@ -1494,16 +1487,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
setCondCodeAction(ISD::SETUNE, VT, Expand);
|
||||
}
|
||||
|
||||
// Mark integer truncating stores as having custom lowering
|
||||
if (VT.isInteger()) {
|
||||
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
|
||||
while (InnerVT != VT) {
|
||||
setTruncStoreAction(VT, InnerVT, Custom);
|
||||
InnerVT = InnerVT.changeVectorElementType(
|
||||
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
|
||||
}
|
||||
}
|
||||
|
||||
// Lower fixed length vector operations to scalable equivalents.
|
||||
setOperationAction(ISD::ABS, VT, Custom);
|
||||
setOperationAction(ISD::ADD, VT, Custom);
|
||||
@ -4547,7 +4530,7 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
|
||||
EVT MemVT = StoreNode->getMemoryVT();
|
||||
|
||||
if (VT.isVector()) {
|
||||
if (useSVEForFixedLengthVectorVT(VT, true))
|
||||
if (useSVEForFixedLengthVectorVT(VT))
|
||||
return LowerFixedLengthVectorStoreToSVE(Op, DAG);
|
||||
|
||||
unsigned AS = StoreNode->getAddressSpace();
|
||||
@ -4559,8 +4542,7 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
|
||||
return scalarizeVectorStore(StoreNode, DAG);
|
||||
}
|
||||
|
||||
if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
|
||||
MemVT == MVT::v4i8) {
|
||||
if (StoreNode->isTruncatingStore()) {
|
||||
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
|
||||
}
|
||||
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
|
||||
@ -15140,29 +15122,6 @@ static bool performTBISimplification(SDValue Addr,
|
||||
return false;
|
||||
}
|
||||
|
||||
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
|
||||
assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
|
||||
"Expected STORE dag node in input!");
|
||||
|
||||
if (auto Store = dyn_cast<StoreSDNode>(N)) {
|
||||
if (!Store->isTruncatingStore())
|
||||
return SDValue();
|
||||
SDValue Ext = Store->getValue();
|
||||
auto ExtOpCode = Ext.getOpcode();
|
||||
if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
|
||||
ExtOpCode != ISD::ANY_EXTEND)
|
||||
return SDValue();
|
||||
SDValue Orig = Ext->getOperand(0);
|
||||
if (Store->getMemoryVT() != Orig->getValueType(0))
|
||||
return SDValue();
|
||||
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
|
||||
Store->getBasePtr(), Store->getPointerInfo(),
|
||||
Store->getAlign());
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue performSTORECombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
SelectionDAG &DAG,
|
||||
@ -15174,9 +15133,6 @@ static SDValue performSTORECombine(SDNode *N,
|
||||
performTBISimplification(N->getOperand(2), DCI, DAG))
|
||||
return SDValue(N, 0);
|
||||
|
||||
if (SDValue Store = foldTruncStoreOfExt(DAG, N))
|
||||
return Store;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -54,15 +54,6 @@ public:
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *IsFast = nullptr) const override;
|
||||
|
||||
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
|
||||
bool LegalOperations) const override {
|
||||
// R600 has "custom" lowering for truncating stores despite not supporting
|
||||
// those instructions. If we allow that custom lowering in the DAG combiner
|
||||
// then all truncates are merged into truncating stores, giving worse code
|
||||
// generation. This hook prevents the DAG combiner performing that combine.
|
||||
return isTruncStoreLegal(ValVT, MemVT);
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned Gen;
|
||||
/// Each OpenCL kernel has nine implicit parameters that are stored in the
|
||||
|
@ -36,7 +36,10 @@ define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 {
|
||||
; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0
|
||||
; CHECK-NEXT: ld1sb { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d]
|
||||
; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d
|
||||
; CHECK-NEXT: st1b { z[[XTN]].s }, [[PG0]], [x0]
|
||||
; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1]
|
||||
; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]]
|
||||
; CHECK-NEXT: strb [[RES_LO]], [x0]
|
||||
; CHECK-NEXT: strb [[RES_HI]], [x0, #1]
|
||||
; CHECK-NEXT: ret
|
||||
%cval = load <2 x i8>, <2 x i8>* %a
|
||||
%ptrs = load <2 x i8*>, <2 x i8*>* %b
|
||||
@ -58,7 +61,8 @@ define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 {
|
||||
; CHECK-NEXT: ld1sb { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
|
||||
; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s
|
||||
; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h
|
||||
; CHECK-NEXT: st1b { z[[UZP2]].h }, [[PG0]], [x0]
|
||||
; CHECK-NEXT: uzp1 v[[UZP3:[0-9]+]].8b, v[[UZP2]].8b, v[[UZP2]].8b
|
||||
; CHECK-NEXT: str s[[UZP3]], [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%cval = load <4 x i8>, <4 x i8>* %a
|
||||
%ptrs = load <4 x i8*>, <4 x i8*>* %b
|
||||
@ -174,7 +178,10 @@ define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 {
|
||||
; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0
|
||||
; CHECK-NEXT: ld1sh { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d]
|
||||
; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d
|
||||
; CHECK-NEXT: st1h { z[[RES]].s }, [[PG0]], [x0]
|
||||
; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1]
|
||||
; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]]
|
||||
; CHECK-NEXT: strh [[RES_LO]], [x0]
|
||||
; CHECK-NEXT: strh [[RES_HI]], [x0, #2]
|
||||
; CHECK-NEXT: ret
|
||||
%cval = load <2 x i16>, <2 x i16>* %a
|
||||
%ptrs = load <2 x i16*>, <2 x i16*>* %b
|
||||
|
@ -1,218 +0,0 @@
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Don't use SVE when its registers are no bigger than NEON.
|
||||
; NO_SVE-NOT: ptrue
|
||||
|
||||
define void @store_trunc_v2i64i8(<2 x i64>* %ap, <2 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v2i64i8
|
||||
; CHECK: ldr q[[Q0:[0-9]+]], [x0]
|
||||
; CHECK: ptrue p[[P0:[0-9]+]].d, vl2
|
||||
; CHECK-NEXT: st1b { z[[Q0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x i64>, <2 x i64>* %ap
|
||||
%val = trunc <2 x i64> %a to <2 x i8>
|
||||
store <2 x i8> %val, <2 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v4i64i8
|
||||
; CHECK: ptrue p[[P0:[0-9]+]].d, vl4
|
||||
; CHECK-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
||||
; CHECK-NEXT: st1b { z[[Q0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x i64>, <4 x i64>* %ap
|
||||
%val = trunc <4 x i64> %a to <4 x i8>
|
||||
store <4 x i8> %val, <4 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v8i64i8:
|
||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: st1b { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
||||
; VBITS_EQ_256-DAG: ld1d { [[Z0:z[0-9]+]].d }, [[PG]]/z, [x8]
|
||||
; VBITS_EQ_256-DAG: ld1d { [[Z1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl4
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s
|
||||
; VBITS_EQ_256-DAG: splice [[Z1]].s, [[PG]], [[Z1]].s, [[Z0]].s
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl8
|
||||
; VBITS_EQ_256-DAG: st1b { [[Z1]].s }, [[PG]], [x1]
|
||||
; VBITS_EQ_256-DAG: ret
|
||||
%a = load <8 x i64>, <8 x i64>* %ap
|
||||
%val = trunc <8 x i64> %a to <8 x i8>
|
||||
store <8 x i8> %val, <8 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v16i64i8(<16 x i64>* %ap, <16 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v16i64i8:
|
||||
; VBITS_GE_1024: ptrue p[[P0:[0-9]+]].d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: st1b { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%a = load <16 x i64>, <16 x i64>* %ap
|
||||
%val = trunc <16 x i64> %a to <16 x i8>
|
||||
store <16 x i8> %val, <16 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v32i64i8:
|
||||
; VBITS_GE_2048: ptrue p[[P0:[0-9]+]].d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: st1b { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%a = load <32 x i64>, <32 x i64>* %ap
|
||||
%val = trunc <32 x i64> %a to <32 x i8>
|
||||
store <32 x i8> %val, <32 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v8i64i16:
|
||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: st1h { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation.
|
||||
; Currently does not use the truncating store
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
||||
; VBITS_EQ_256-DAG: ld1d { [[Z0:z[0-9]+]].d }, [[PG]]/z, [x8]
|
||||
; VBITS_EQ_256-DAG: ld1d { [[Z1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
|
||||
; VBITS_EQ_256-DAG: mov v[[V0:[0-9]+]].d[1], v{{[0-9]+}}.d[0]
|
||||
; VBITS_EQ_256-DAG: str q[[V0]], [x1]
|
||||
; VBITS_EQ_256-DAG: ret
|
||||
%a = load <8 x i64>, <8 x i64>* %ap
|
||||
%val = trunc <8 x i64> %a to <8 x i16>
|
||||
store <8 x i16> %val, <8 x i16>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v8i64i32:
|
||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: st1w { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
|
||||
; VBITS_EQ_256-DAG: ld1d { [[Z0:z[0-9]+]].d }, [[PG]]/z, [x8]
|
||||
; VBITS_EQ_256-DAG: ld1d { [[Z1:z[0-9]+]].d }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl4
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s
|
||||
; VBITS_EQ_256-DAG: splice [[Z1]].s, [[PG]], [[Z1]].s, [[Z0]].s
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl8
|
||||
; VBITS_EQ_256-DAG: st1w { [[Z1]].s }, [[PG]], [x1]
|
||||
; VBITS_EQ_256-DAG: ret
|
||||
%a = load <8 x i64>, <8 x i64>* %ap
|
||||
%val = trunc <8 x i64> %a to <8 x i32>
|
||||
store <8 x i32> %val, <8 x i32>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v16i32i8:
|
||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16
|
||||
; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: st1b { [[Z0]].s }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation.
|
||||
; Currently does not use the truncating store
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
||||
; VBITS_EQ_256-DAG: ld1w { [[Z0:z[0-9]+]].s }, [[PG]]/z, [x8]
|
||||
; VBITS_EQ_256-DAG: ld1w { [[Z1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b
|
||||
; VBITS_EQ_256-DAG: mov v[[V0:[0-9]+]].d[1], v{{[0-9]+}}.d[0]
|
||||
; VBITS_EQ_256-DAG: str q[[V0]], [x1]
|
||||
; VBITS_EQ_256-DAG: ret
|
||||
%a = load <16 x i32>, <16 x i32>* %ap
|
||||
%val = trunc <16 x i32> %a to <16 x i8>
|
||||
store <16 x i8> %val, <16 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v16i32i16:
|
||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16
|
||||
; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: st1h { [[Z0]].s }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
|
||||
; VBITS_EQ_256-DAG: ld1w { [[Z0:z[0-9]+]].s }, [[PG]]/z, [x8]
|
||||
; VBITS_EQ_256-DAG: ld1w { [[Z1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].h, vl8
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
|
||||
; VBITS_EQ_256-DAG: splice [[Z1]].h, [[PG]], [[Z1]].h, [[Z0]].h
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].h, vl16
|
||||
; VBITS_EQ_256-DAG: st1h { [[Z1]].h }, [[PG]], [x1]
|
||||
; VBITS_EQ_256-DAG: ret
|
||||
%a = load <16 x i32>, <16 x i32>* %ap
|
||||
%val = trunc <16 x i32> %a to <16 x i16>
|
||||
store <16 x i16> %val, <16 x i16>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {
|
||||
; CHECK-LABEL: store_trunc_v32i16i8:
|
||||
; VBITS_GE_512: ptrue p[[P0:[0-9]+]].h, vl32
|
||||
; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: st1b { [[Z0]].h }, p[[P0]], [x{{[0-9]+}}]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
|
||||
; Ensure sensible type legalisation
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
|
||||
; VBITS_EQ_256-DAG: ld1h { [[Z0:z[0-9]+]].h }, [[PG]]/z, [x8]
|
||||
; VBITS_EQ_256-DAG: ld1h { [[Z1:z[0-9]+]].h }, [[PG]]/z, [x0]
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].b, vl16
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b
|
||||
; VBITS_EQ_256-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b
|
||||
; VBITS_EQ_256-DAG: splice [[Z1]].b, [[PG]], [[Z1]].b, [[Z0]].b
|
||||
; VBITS_EQ_256-DAG: ptrue [[PG]].b, vl32
|
||||
; VBITS_EQ_256-DAG: st1b { [[Z1]].b }, [[PG]], [x1]
|
||||
; VBITS_EQ_256-DAG: ret
|
||||
%a = load <32 x i16>, <32 x i16>* %ap
|
||||
%val = trunc <32 x i16> %a to <32 x i8>
|
||||
store <32 x i8> %val, <32 x i8>* %dest
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -255,9 +255,10 @@ define dso_local void @g2(%struct.S1* %a) {
|
||||
; N32-NEXT: .cfi_offset 31, -8
|
||||
; N32-NEXT: .cfi_offset 16, -16
|
||||
; N32-NEXT: move $5, $4
|
||||
; N32-NEXT: lui $1, 1
|
||||
; N32-NEXT: addu $1, $sp, $1
|
||||
; N32-NEXT: sw $4, -4($1)
|
||||
; N32-NEXT: sll $1, $5, 0
|
||||
; N32-NEXT: lui $2, 1
|
||||
; N32-NEXT: addu $2, $sp, $2
|
||||
; N32-NEXT: sw $1, -4($2)
|
||||
; N32-NEXT: addiu $16, $sp, 8
|
||||
; N32-NEXT: ori $6, $zero, 65520
|
||||
; N32-NEXT: jal memcpy
|
||||
@ -388,8 +389,10 @@ define dso_local i32 @g3(%struct.S1* %a, %struct.S1* %b) #0 {
|
||||
; N32-NEXT: .cfi_def_cfa_offset 16
|
||||
; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
|
||||
; N32-NEXT: .cfi_offset 31, -8
|
||||
; N32-NEXT: sw $5, 0($sp)
|
||||
; N32-NEXT: sw $4, 4($sp)
|
||||
; N32-NEXT: sll $1, $5, 0
|
||||
; N32-NEXT: sw $1, 0($sp)
|
||||
; N32-NEXT: sll $1, $4, 0
|
||||
; N32-NEXT: sw $1, 4($sp)
|
||||
; N32-NEXT: jal memcpy
|
||||
; N32-NEXT: ori $6, $zero, 65520
|
||||
; N32-NEXT: addiu $2, $zero, 4
|
||||
|
@ -554,8 +554,10 @@ define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
|
||||
; MIPS64R5: # %bb.0:
|
||||
; MIPS64R5-NEXT: daddiu $sp, $sp, -16
|
||||
; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
|
||||
; MIPS64R5-NEXT: sw $5, 8($sp)
|
||||
; MIPS64R5-NEXT: sw $4, 12($sp)
|
||||
; MIPS64R5-NEXT: sll $1, $5, 0
|
||||
; MIPS64R5-NEXT: sw $1, 8($sp)
|
||||
; MIPS64R5-NEXT: sll $1, $4, 0
|
||||
; MIPS64R5-NEXT: sw $1, 12($sp)
|
||||
; MIPS64R5-NEXT: lbu $1, 9($sp)
|
||||
; MIPS64R5-NEXT: lbu $2, 8($sp)
|
||||
; MIPS64R5-NEXT: insert.w $w0[0], $2
|
||||
@ -1261,8 +1263,10 @@ define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
|
||||
; MIPS64R5: # %bb.0:
|
||||
; MIPS64R5-NEXT: daddiu $sp, $sp, -16
|
||||
; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
|
||||
; MIPS64R5-NEXT: sw $5, 8($sp)
|
||||
; MIPS64R5-NEXT: sw $4, 12($sp)
|
||||
; MIPS64R5-NEXT: sll $1, $5, 0
|
||||
; MIPS64R5-NEXT: sw $1, 8($sp)
|
||||
; MIPS64R5-NEXT: sll $1, $4, 0
|
||||
; MIPS64R5-NEXT: sw $1, 12($sp)
|
||||
; MIPS64R5-NEXT: lh $1, 10($sp)
|
||||
; MIPS64R5-NEXT: lh $2, 8($sp)
|
||||
; MIPS64R5-NEXT: insert.d $w0[0], $2
|
||||
|
@ -285,57 +285,65 @@ define void @f3(i32 %a) {
|
||||
;
|
||||
; MIPS4-LABEL: f3:
|
||||
; MIPS4: # %bb.0:
|
||||
; MIPS4-NEXT: lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
|
||||
; MIPS4-NEXT: sll $1, $4, 0 # <MCInst #{{[0-9]+}} SLL
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Imm:0>>
|
||||
; MIPS4-NEXT: lui $2, %highest(c) # <MCInst #{{[0-9]+}} LUi64
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Expr:(%highest(c))>>
|
||||
; MIPS4-NEXT: daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS4-NEXT: daddiu $2, $2, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Expr:(%higher(c))>>
|
||||
; MIPS4-NEXT: dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS4-NEXT: dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Imm:16>>
|
||||
; MIPS4-NEXT: daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS4-NEXT: daddiu $2, $2, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Expr:(%hi(c))>>
|
||||
; MIPS4-NEXT: dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS4-NEXT: dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Imm:16>>
|
||||
; MIPS4-NEXT: jr $ra # <MCInst #{{[0-9]+}} JR
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>>
|
||||
; MIPS4-NEXT: sw $4, %lo(c)($1) # <MCInst #{{[0-9]+}} SW
|
||||
; MIPS4-NEXT: sw $1, %lo(c)($2) # <MCInst #{{[0-9]+}} SW
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS4-NEXT: # <MCOperand Expr:(%lo(c))>>
|
||||
;
|
||||
; MIPS64R6-LABEL: f3:
|
||||
; MIPS64R6: # %bb.0:
|
||||
; MIPS64R6-NEXT: lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
|
||||
; MIPS64R6-NEXT: sll $1, $4, 0 # <MCInst #{{[0-9]+}} SLL
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Imm:0>>
|
||||
; MIPS64R6-NEXT: lui $2, %highest(c) # <MCInst #{{[0-9]+}} LUi64
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Expr:(%highest(c))>>
|
||||
; MIPS64R6-NEXT: daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS64R6-NEXT: daddiu $2, $2, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Expr:(%higher(c))>>
|
||||
; MIPS64R6-NEXT: dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS64R6-NEXT: dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Imm:16>>
|
||||
; MIPS64R6-NEXT: daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS64R6-NEXT: daddiu $2, $2, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Expr:(%hi(c))>>
|
||||
; MIPS64R6-NEXT: dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS64R6-NEXT: dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Imm:16>>
|
||||
; MIPS64R6-NEXT: jr $ra # <MCInst #{{[0-9]+}} JALR64
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>>
|
||||
; MIPS64R6-NEXT: sw $4, %lo(c)($1) # <MCInst #{{[0-9]+}} SW
|
||||
; MIPS64R6-NEXT: sw $1, %lo(c)($2) # <MCInst #{{[0-9]+}} SW
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Reg:{{[0-9]+}}>
|
||||
; MIPS64R6-NEXT: # <MCOperand Expr:(%lo(c))>>
|
||||
|
Loading…
Reference in New Issue
Block a user