From 19ac7a1fe68d3348a7488b0d82319bb95d4f0aa2 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 8 Jan 2019 13:30:27 +0000 Subject: [PATCH] AArch64: avoid splitting vector truncating stores. We have code to split vector splats (of zero and non-zero) for performance reasons, but it ignores the fact that a store might be truncating. Actually, truncating stores are formed for vNi8 and vNi16 types. Since the truncation is from a legal type, the size of the store is always <= 64-bits and so they don't actually benefit from being split up anyway, so this patch just disables that transformation. llvm-svn: 350620 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 11 +++++++++++ test/CodeGen/AArch64/ldst-opt.ll | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 623815e29eb..c7f46a251bf 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10053,6 +10053,7 @@ static SDValue performExtendCombine(SDNode *N, static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts) { + assert(!St.isTruncatingStore() && "cannot split truncating vector store"); unsigned OrigAlignment = St.getAlignment(); unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8; @@ -10127,6 +10128,11 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) { if (!StVal.hasOneUse()) return SDValue(); + // If the store is truncating then it's going down to i16 or smaller, which + // means it can be implemented in a single store anyway. + if (St.isTruncatingStore()) + return SDValue(); + // If the immediate offset of the address operand is too large for the stp // instruction, then bail out. if (DAG.isBaseWithConstantOffset(St.getBasePtr())) { @@ -10177,6 +10183,11 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) { if (NumVecElts != 4 && NumVecElts != 2) return SDValue(); + // If the store is truncating then it's going down to i16 or smaller, which + // means it can be implemented in a single store anyway. + if (St.isTruncatingStore()) + return SDValue(); + // Check that this is a splat. // Make sure that each of the relevant vector element locations are inserted // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32. diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 7f6cba2133f..fe55806e56f 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1681,3 +1681,19 @@ entry: %add = add i64 %ld, 1 ret i64 %add } + +; CHECK-LABEL: trunc_splat_zero: +; CHECK-DAG: strh wzr, [x0] +define void @trunc_splat_zero(<2 x i8>* %ptr) { + store <2 x i8> zeroinitializer, <2 x i8>* %ptr, align 2 + ret void +} + +; CHECK-LABEL: trunc_splat: +; CHECK: mov [[VAL:w[0-9]+]], #42 +; CHECK: movk [[VAL]], #42, lsl #16 +; CHECK: str [[VAL]], [x0] +define void @trunc_splat(<2 x i16>* %ptr) { + store <2 x i16> , <2 x i16>* %ptr, align 4 + ret void +}