1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[X86] Fold a VTRUNCS/VTRUNCUS+store into a saturating truncating store.

We already did this for VTRUNCUS with a specific combination of
types. This extends this to VTRUNCS and handles any types where
a truncating store is legal.

llvm-svn: 374615
This commit is contained in:
Craig Topper 2019-10-12 00:01:08 +00:00
parent 890a39900c
commit 5dd46d3111
2 changed files with 18 additions and 26 deletions

View File

@ -40332,11 +40332,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
unsigned Alignment = St->getAlignment();
SDValue StoredVal = St->getOperand(1);
SDValue StoredVal = St->getValue();
EVT VT = StoredVal.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Convert a store of vXi1 into a store of iX and a bitcast.
@ -40453,17 +40453,15 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
MVT::v16i8, St->getMemOperand());
}
// Try to fold a vpmovuswb 256->128 into a truncating store.
// FIXME: Generalize this to other types.
// FIXME: Do the same for signed saturation.
if (!St->isTruncatingStore() && VT == MVT::v16i8 &&
St->getValue().getOpcode() == X86ISD::VTRUNCUS &&
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
TLI.isTruncStoreLegal(MVT::v16i16, MVT::v16i8) &&
St->getValue().hasOneUse()) {
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
dl, St->getValue().getOperand(0), St->getBasePtr(),
MVT::v16i8, St->getMemOperand(), DAG);
// Try to fold a VTRUNCUS or VTRUNCS into a truncating store.
if (!St->isTruncatingStore() && StoredVal.hasOneUse() &&
(StoredVal.getOpcode() == X86ISD::VTRUNCUS ||
StoredVal.getOpcode() == X86ISD::VTRUNCS) &&
TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
return EmitTruncSStore(IsSigned, St->getChain(),
dl, StoredVal.getOperand(0), St->getBasePtr(),
VT, St->getMemOperand(), DAG);
}
// Optimize trunc store (of multiple scalars) to shuffle and store.

View File

@ -690,10 +690,8 @@ define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; ALL-LABEL: usat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
; ALL-NEXT: vmovdqu %xmm0, (%rdi)
; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@ -957,12 +955,10 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; ALL-LABEL: smax_usat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
; ALL-NEXT: vmovdqu %xmm0, (%rdi)
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@ -1048,10 +1044,8 @@ define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>*
define void @ssat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
; ALL-LABEL: ssat_trunc_db_1024_mem:
; ALL: ## %bb.0:
; ALL-NEXT: vpmovsdb %zmm0, %xmm0
; ALL-NEXT: vpmovsdb %zmm1, %xmm1
; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
; ALL-NEXT: vmovdqu %xmm0, (%rdi)
; ALL-NEXT: vpmovsdb %zmm1, 16(%rdi)
; ALL-NEXT: vpmovsdb %zmm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%x1 = icmp sgt <32 x i32> %i, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32