mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
AVX1 : Enable vector masked_load/store to AVX1.
Use AVX1 FP instructions (vmaskmovps/pd) in place of the AVX2 int instructions (vpmaskmovd/q). Differential Revision: http://reviews.llvm.org/D16528 llvm-svn: 258675
This commit is contained in:
parent
e13e338f45
commit
66fa90c341
@ -8703,116 +8703,47 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
|
||||
int_x86_avx2_maskstore_q,
|
||||
int_x86_avx2_maskstore_q_256>, VEX_W;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
|
||||
(VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
|
||||
(VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
|
||||
(VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
|
||||
(VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
|
||||
|
||||
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
|
||||
(VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask),
|
||||
(bc_v8f32 (v8i32 immAllZerosV)))),
|
||||
(VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))),
|
||||
(VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr),
|
||||
VR256:$mask)>;
|
||||
|
||||
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
|
||||
(VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))),
|
||||
(VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))),
|
||||
(VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
|
||||
VR256:$mask)>;
|
||||
|
||||
def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
|
||||
(VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask),
|
||||
(bc_v4f32 (v4i32 immAllZerosV)))),
|
||||
(VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))),
|
||||
(VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr),
|
||||
VR128:$mask)>;
|
||||
|
||||
def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
|
||||
(VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))),
|
||||
(VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))),
|
||||
(VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
|
||||
VR128:$mask)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
|
||||
(VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
|
||||
(VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
|
||||
|
||||
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
|
||||
(VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
|
||||
(v4f64 immAllZerosV))),
|
||||
(VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))),
|
||||
(VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr),
|
||||
VR256:$mask)>;
|
||||
|
||||
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
|
||||
(VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
|
||||
(bc_v4i64 (v8i32 immAllZerosV)))),
|
||||
(VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))),
|
||||
(VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
|
||||
VR256:$mask)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
|
||||
(VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
|
||||
|
||||
def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
|
||||
(VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
|
||||
|
||||
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
|
||||
(VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
|
||||
(v2f64 immAllZerosV))),
|
||||
(VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))),
|
||||
(VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr),
|
||||
VR128:$mask)>;
|
||||
|
||||
def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
|
||||
(VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
|
||||
(bc_v2i64 (v4i32 immAllZerosV)))),
|
||||
(VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
|
||||
|
||||
def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))),
|
||||
(VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr),
|
||||
VR128:$mask)>;
|
||||
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
|
||||
ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
|
||||
// masked store
|
||||
def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)),
|
||||
(!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
|
||||
// masked load
|
||||
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
|
||||
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
|
||||
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
|
||||
(VT (bitconvert (ZeroVT immAllZerosV))))),
|
||||
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
|
||||
def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
|
||||
(!cast<Instruction>(BlendStr#"rr")
|
||||
RC:$src0,
|
||||
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr),
|
||||
RC:$mask)>;
|
||||
}
|
||||
let Predicates = [HasAVX] in {
|
||||
defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;
|
||||
defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>;
|
||||
}
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
// zero vector created as v8f32 (base on X86TargetLowering::LowerBUILD_VECTOR)
|
||||
defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8f32>;
|
||||
defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8f32>;
|
||||
// load/store i32/i64 not supported use ps/pd version
|
||||
defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
|
||||
defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8f32>;
|
||||
defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
|
||||
defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
|
||||
}
|
||||
let Predicates = [HasAVX2] in {
|
||||
// zero vector created as v8i32 (base on X86TargetLowering::LowerBUILD_VECTOR)
|
||||
defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>;
|
||||
defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>;
|
||||
|
||||
defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>;
|
||||
defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>;
|
||||
defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
|
||||
defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Variable Bit Shifts
|
||||
//
|
||||
|
@ -1438,7 +1438,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
|
||||
int DataWidth = isa<PointerType>(ScalarTy) ?
|
||||
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
|
||||
|
||||
return (DataWidth >= 32 && ST->hasAVX2());
|
||||
return (DataWidth >= 32 && ST->hasAVX());
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,7 @@
|
||||
; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
|
||||
; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
|
||||
; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
|
||||
; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
|
||||
; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
|
||||
|
||||
;AVX1-NOT: llvm.masked
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-pc_linux"
|
||||
|
||||
@ -18,12 +16,12 @@ target triple = "x86_64-pc_linux"
|
||||
; }
|
||||
;}
|
||||
|
||||
;AVX2-LABEL: @foo1
|
||||
;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX2: call <8 x i32> @llvm.masked.load.v8i32
|
||||
;AVX2: add nsw <8 x i32>
|
||||
;AVX2: call void @llvm.masked.store.v8i32
|
||||
;AVX2: ret void
|
||||
;AVX-LABEL: @foo1
|
||||
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX: call <8 x i32> @llvm.masked.load.v8i32
|
||||
;AVX: add nsw <8 x i32>
|
||||
;AVX: call void @llvm.masked.store.v8i32
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo1
|
||||
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
@ -102,12 +100,12 @@ for.end: ; preds = %for.cond
|
||||
; }
|
||||
;}
|
||||
|
||||
;AVX2-LABEL: @foo2
|
||||
;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX2: call <8 x float> @llvm.masked.load.v8f32
|
||||
;AVX2: fadd <8 x float>
|
||||
;AVX2: call void @llvm.masked.store.v8f32
|
||||
;AVX2: ret void
|
||||
;AVX-LABEL: @foo2
|
||||
;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
;AVX: call <8 x float> @llvm.masked.load.v8f32
|
||||
;AVX: fadd <8 x float>
|
||||
;AVX: call void @llvm.masked.store.v8f32
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo2
|
||||
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
|
||||
@ -187,13 +185,13 @@ for.end: ; preds = %for.cond
|
||||
; }
|
||||
;}
|
||||
|
||||
;AVX2-LABEL: @foo3
|
||||
;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
|
||||
;AVX2: call <4 x double> @llvm.masked.load.v4f64
|
||||
;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
|
||||
;AVX2: fadd <4 x double>
|
||||
;AVX2: call void @llvm.masked.store.v4f64
|
||||
;AVX2: ret void
|
||||
;AVX-LABEL: @foo3
|
||||
;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
|
||||
;AVX: call <4 x double> @llvm.masked.load.v4f64
|
||||
;AVX: sitofp <4 x i32> %wide.load to <4 x double>
|
||||
;AVX: fadd <4 x double>
|
||||
;AVX: call void @llvm.masked.store.v4f64
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo3
|
||||
;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
|
||||
@ -275,9 +273,9 @@ for.end: ; preds = %for.cond
|
||||
; }
|
||||
;}
|
||||
|
||||
;AVX2-LABEL: @foo4
|
||||
;AVX2-NOT: llvm.masked
|
||||
;AVX2: ret void
|
||||
;AVX-LABEL: @foo4
|
||||
;AVX-NOT: llvm.masked
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo4
|
||||
;AVX512-NOT: llvm.masked
|
||||
@ -349,10 +347,10 @@ for.end: ; preds = %for.cond
|
||||
|
||||
; The loop here should not be vectorized due to trapping
|
||||
; constant expression
|
||||
;AVX2-LABEL: @foo5
|
||||
;AVX2-NOT: llvm.masked
|
||||
;AVX2: store i32 sdiv
|
||||
;AVX2: ret void
|
||||
;AVX-LABEL: @foo5
|
||||
;AVX-NOT: llvm.masked
|
||||
;AVX: store i32 sdiv
|
||||
;AVX: ret void
|
||||
|
||||
;AVX512-LABEL: @foo5
|
||||
;AVX512-NOT: llvm.masked
|
||||
|
Loading…
Reference in New Issue
Block a user