mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86] Fold a VTRUNCS/VTRUNCUS+store into a saturating truncating store.
We already did this for VTRUNCUS with a specific combination of types. This extends this to VTRUNCS and handles any types where a truncating store is legal. llvm-svn: 374615
This commit is contained in:
parent
890a39900c
commit
5dd46d3111
@ -40332,11 +40332,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
StoreSDNode *St = cast<StoreSDNode>(N);
|
||||
EVT VT = St->getValue().getValueType();
|
||||
EVT StVT = St->getMemoryVT();
|
||||
SDLoc dl(St);
|
||||
unsigned Alignment = St->getAlignment();
|
||||
SDValue StoredVal = St->getOperand(1);
|
||||
SDValue StoredVal = St->getValue();
|
||||
EVT VT = StoredVal.getValueType();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
// Convert a store of vXi1 into a store of iX and a bitcast.
|
||||
@ -40453,17 +40453,15 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
MVT::v16i8, St->getMemOperand());
|
||||
}
|
||||
|
||||
// Try to fold a vpmovuswb 256->128 into a truncating store.
|
||||
// FIXME: Generalize this to other types.
|
||||
// FIXME: Do the same for signed saturation.
|
||||
if (!St->isTruncatingStore() && VT == MVT::v16i8 &&
|
||||
St->getValue().getOpcode() == X86ISD::VTRUNCUS &&
|
||||
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
|
||||
TLI.isTruncStoreLegal(MVT::v16i16, MVT::v16i8) &&
|
||||
St->getValue().hasOneUse()) {
|
||||
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
|
||||
dl, St->getValue().getOperand(0), St->getBasePtr(),
|
||||
MVT::v16i8, St->getMemOperand(), DAG);
|
||||
// Try to fold a VTRUNCUS or VTRUNCS into a truncating store.
|
||||
if (!St->isTruncatingStore() && StoredVal.hasOneUse() &&
|
||||
(StoredVal.getOpcode() == X86ISD::VTRUNCUS ||
|
||||
StoredVal.getOpcode() == X86ISD::VTRUNCS) &&
|
||||
TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
|
||||
bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
|
||||
return EmitTruncSStore(IsSigned, St->getChain(),
|
||||
dl, StoredVal.getOperand(0), St->getBasePtr(),
|
||||
VT, St->getMemOperand(), DAG);
|
||||
}
|
||||
|
||||
// Optimize trunc store (of multiple scalars) to shuffle and store.
|
||||
|
@ -690,10 +690,8 @@ define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
|
||||
define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; ALL-LABEL: usat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
|
||||
; ALL-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
|
||||
; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
|
||||
@ -957,12 +955,10 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; ALL-LABEL: smax_usat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovusdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
|
||||
; ALL-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
|
||||
; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
|
||||
; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1048,10 +1044,8 @@ define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>*
|
||||
define void @ssat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
|
||||
; ALL-LABEL: ssat_trunc_db_1024_mem:
|
||||
; ALL: ## %bb.0:
|
||||
; ALL-NEXT: vpmovsdb %zmm0, %xmm0
|
||||
; ALL-NEXT: vpmovsdb %zmm1, %xmm1
|
||||
; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
|
||||
; ALL-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; ALL-NEXT: vpmovsdb %zmm1, 16(%rdi)
|
||||
; ALL-NEXT: vpmovsdb %zmm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%x1 = icmp sgt <32 x i32> %i, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32
|
||||
|
Loading…
Reference in New Issue
Block a user