diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 8f24f98be68..789d91175d3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -399,6 +399,27 @@ namespace { return isInt(CN->getSExtValue()); return isSExtAbsoluteSymbolRef(Width, N); } + + // Indicates we should prefer to use a non-temporal load for this load. + bool useNonTemporalLoad(LoadSDNode *N) const { + if (!N->isNonTemporal()) + return false; + + unsigned StoreSize = N->getMemoryVT().getStoreSize(); + + if (N->getAlignment() < StoreSize) + return false; + + switch (StoreSize) { + default: llvm_unreachable("Unsupported store size"); + case 16: + return Subtarget->hasSSE41(); + case 32: + return Subtarget->hasAVX2(); + case 64: + return Subtarget->hasAVX512(); + } + } }; } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7f7a1a1ba30..e49fb62311d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -647,35 +647,26 @@ def sdmem : Operand { // Vector load wrappers to prevent folding of non-temporal aligned loads on // supporting targets. -def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return !Subtarget->hasSSE41() || !cast(N)->isNonTemporal() || - cast(N)->getAlignment() < 16; -}]>; -def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return !Subtarget->hasAVX2() || !cast(N)->isNonTemporal() || - cast(N)->getAlignment() < 32; -}]>; -def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return !Subtarget->hasAVX512() || !cast(N)->isNonTemporal() || - cast(N)->getAlignment() < 64; +def vecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !useNonTemporalLoad(cast(N)); }]>; // 128-bit load pattern fragments // NOTE: all 128-bit integer vector loads are promoted to v2i64 -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>; +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vecload node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vecload node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vecload node:$ptr))>; // 256-bit load pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 -def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>; -def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>; -def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>; +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vecload node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vecload node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vecload node:$ptr))>; // 512-bit load pattern fragments -def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>; -def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>; -def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>; +def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vecload node:$ptr))>; +def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vecload node:$ptr))>; +def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vecload node:$ptr))>; // 128-/256-/512-bit extload pattern fragments def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; @@ -685,63 +676,53 @@ def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>; // Like 'store', but always requires vector size alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - StoreSDNode *St = cast(N); + auto *St = cast(N); return St->getAlignment() >= St->getMemoryVT().getStoreSize(); }]>; // Like 'load', but always requires 128-bit vector alignment. -def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 16 && - (!Subtarget->hasSSE41() || !cast(N)->isNonTemporal()); -}]>; - -// Like 'load', but always requires 256-bit vector alignment. -def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 32 && - (!Subtarget->hasAVX2() || !cast(N)->isNonTemporal()); -}]>; - -// Like 'load', but always requires 512-bit vector alignment. -def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 64 && - (!Subtarget->hasAVX512() || !cast(N)->isNonTemporal()); +def alignedvecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + auto *Ld = cast(N); + return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize() && + !useNonTemporalLoad(cast(N)); }]>; // 128-bit aligned load pattern fragments // NOTE: all 128-bit integer vector loads are promoted to v2i64 def alignedloadv4f32 : PatFrag<(ops node:$ptr), - (v4f32 (alignedload node:$ptr))>; + (v4f32 (alignedvecload node:$ptr))>; def alignedloadv2f64 : PatFrag<(ops node:$ptr), - (v2f64 (alignedload node:$ptr))>; + (v2f64 (alignedvecload node:$ptr))>; def alignedloadv2i64 : PatFrag<(ops node:$ptr), - (v2i64 (alignedload node:$ptr))>; + (v2i64 (alignedvecload node:$ptr))>; // 256-bit aligned load pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 def alignedloadv8f32 : PatFrag<(ops node:$ptr), - (v8f32 (alignedload256 node:$ptr))>; + (v8f32 (alignedvecload node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), - (v4f64 (alignedload256 node:$ptr))>; + (v4f64 (alignedvecload node:$ptr))>; def alignedloadv4i64 : PatFrag<(ops node:$ptr), - (v4i64 (alignedload256 node:$ptr))>; + (v4i64 (alignedvecload node:$ptr))>; // 512-bit aligned load pattern fragments def alignedloadv16f32 : PatFrag<(ops node:$ptr), - (v16f32 (alignedload512 node:$ptr))>; + (v16f32 (alignedvecload node:$ptr))>; def alignedloadv8f64 : PatFrag<(ops node:$ptr), - (v8f64 (alignedload512 node:$ptr))>; + (v8f64 (alignedvecload node:$ptr))>; def alignedloadv8i64 : PatFrag<(ops node:$ptr), - (v8i64 (alignedload512 node:$ptr))>; + (v8i64 (alignedvecload node:$ptr))>; -// Like 'vec128load', but uses special alignment checks suitable for use in +// Like 'vecload', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to // be naturally aligned on some targets but not on others. If the subtarget // allows unaligned accesses, match any load, though this may require // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. -def memop : PatFrag<(ops node:$ptr), (vec128load node:$ptr), [{ +def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{ + auto *Ld = cast(N); return Subtarget->hasSSEUnalignedMem() || - cast(N)->getAlignment() >= 16; + Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize(); }]>; // 128-bit memop pattern fragments