1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

AVX1 : Enable vector masked_load/store to AVX1.

Use AVX1 FP instructions (vmaskmovps/pd) in place of the AVX2 int instructions (vpmaskmovd/q).

Differential Revision: http://reviews.llvm.org/D16528

llvm-svn: 258675
This commit is contained in:
Igor Breger 2016-01-25 10:17:11 +00:00
parent e13e338f45
commit 66fa90c341
4 changed files with 868 additions and 743 deletions

View File

@ -8703,116 +8703,47 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256>, VEX_W;
def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
(VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
(VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
(VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
(VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
(VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask),
(bc_v8f32 (v8i32 immAllZerosV)))),
(VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))),
(VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
(VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))),
(VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))),
(VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
(VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask),
(bc_v4f32 (v4i32 immAllZerosV)))),
(VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))),
(VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr),
VR128:$mask)>;
def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
(VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))),
(VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))),
(VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
VR128:$mask)>;
def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
(VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
(VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
(VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
(v4f64 immAllZerosV))),
(VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))),
(VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
(VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
(bc_v4i64 (v8i32 immAllZerosV)))),
(VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))),
(VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
(VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
(VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
(VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
(v2f64 immAllZerosV))),
(VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))),
(VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr),
VR128:$mask)>;
def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
(VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
(bc_v2i64 (v4i32 immAllZerosV)))),
(VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))),
(VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr),
VR128:$mask)>;
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
// masked store
def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)),
(!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
// masked load
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
(VT (bitconvert (ZeroVT immAllZerosV))))),
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
(!cast<Instruction>(BlendStr#"rr")
RC:$src0,
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr),
RC:$mask)>;
}
let Predicates = [HasAVX] in {
defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;
defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>;
}
let Predicates = [HasAVX1Only] in {
// zero vector created as v8f32 (base on X86TargetLowering::LowerBUILD_VECTOR)
defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8f32>;
defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8f32>;
// load/store i32/i64 not supported use ps/pd version
defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8f32>;
defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
}
let Predicates = [HasAVX2] in {
// zero vector created as v8i32 (base on X86TargetLowering::LowerBUILD_VECTOR)
defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>;
defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>;
defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>;
defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
}
//===----------------------------------------------------------------------===//
// Variable Bit Shifts
//

View File

@ -1438,7 +1438,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
int DataWidth = isa<PointerType>(ScalarTy) ?
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
return (DataWidth >= 32 && ST->hasAVX2());
return (DataWidth >= 32 && ST->hasAVX());
}
bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,7 @@
; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
;AVX1-NOT: llvm.masked
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc_linux"
@ -18,12 +16,12 @@ target triple = "x86_64-pc_linux"
; }
;}
;AVX2-LABEL: @foo1
;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX2: call <8 x i32> @llvm.masked.load.v8i32
;AVX2: add nsw <8 x i32>
;AVX2: call void @llvm.masked.store.v8i32
;AVX2: ret void
;AVX-LABEL: @foo1
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX: call <8 x i32> @llvm.masked.load.v8i32
;AVX: add nsw <8 x i32>
;AVX: call void @llvm.masked.store.v8i32
;AVX: ret void
;AVX512-LABEL: @foo1
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
@ -102,12 +100,12 @@ for.end: ; preds = %for.cond
; }
;}
;AVX2-LABEL: @foo2
;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX2: call <8 x float> @llvm.masked.load.v8f32
;AVX2: fadd <8 x float>
;AVX2: call void @llvm.masked.store.v8f32
;AVX2: ret void
;AVX-LABEL: @foo2
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
;AVX: call <8 x float> @llvm.masked.load.v8f32
;AVX: fadd <8 x float>
;AVX: call void @llvm.masked.store.v8f32
;AVX: ret void
;AVX512-LABEL: @foo2
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
@ -187,13 +185,13 @@ for.end: ; preds = %for.cond
; }
;}
;AVX2-LABEL: @foo3
;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
;AVX2: call <4 x double> @llvm.masked.load.v4f64
;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
;AVX2: fadd <4 x double>
;AVX2: call void @llvm.masked.store.v4f64
;AVX2: ret void
;AVX-LABEL: @foo3
;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
;AVX: call <4 x double> @llvm.masked.load.v4f64
;AVX: sitofp <4 x i32> %wide.load to <4 x double>
;AVX: fadd <4 x double>
;AVX: call void @llvm.masked.store.v4f64
;AVX: ret void
;AVX512-LABEL: @foo3
;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
@ -275,9 +273,9 @@ for.end: ; preds = %for.cond
; }
;}
;AVX2-LABEL: @foo4
;AVX2-NOT: llvm.masked
;AVX2: ret void
;AVX-LABEL: @foo4
;AVX-NOT: llvm.masked
;AVX: ret void
;AVX512-LABEL: @foo4
;AVX512-NOT: llvm.masked
@ -349,10 +347,10 @@ for.end: ; preds = %for.cond
; The loop here should not be vectorized due to trapping
; constant expression
;AVX2-LABEL: @foo5
;AVX2-NOT: llvm.masked
;AVX2: store i32 sdiv
;AVX2: ret void
;AVX-LABEL: @foo5
;AVX-NOT: llvm.masked
;AVX: store i32 sdiv
;AVX: ret void
;AVX512-LABEL: @foo5
;AVX512-NOT: llvm.masked