mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
Added Skylake client to X86 targets and features
Changes in X86.td: I set features of Intel processors in incremental form: IVB = SNB + X HSW = IVB + X .. I added Skylake client processor and defined it's features FeatureADX was missing on KNL Added some new features to appropriate processors SMAP, IFMA, PREFETCHWT1, VMFUNC and others Differential Revision: http://reviews.llvm.org/D16357 llvm-svn: 258659
This commit is contained in:
parent
01175f32ea
commit
832e2d5858
@ -805,25 +805,34 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
|
||||
Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
|
||||
|
||||
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
|
||||
Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
|
||||
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
|
||||
Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
|
||||
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
|
||||
Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
|
||||
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
|
||||
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
|
||||
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
|
||||
Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1);
|
||||
Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1);
|
||||
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
|
||||
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
|
||||
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
|
||||
// Enable protection keys
|
||||
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
|
||||
|
||||
// AVX512 is only supported if the OS supports the context save for it.
|
||||
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
|
||||
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
|
||||
Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
|
||||
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
|
||||
Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
|
||||
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
|
||||
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
|
||||
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
|
||||
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
|
||||
|
||||
Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
|
||||
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
|
||||
// Enable protection keys
|
||||
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
|
||||
|
||||
bool HasLeafD = MaxLevel >= 0xd &&
|
||||
!GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
|
||||
|
@ -125,6 +125,9 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
|
||||
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
||||
"Enable AVX-512 PreFetch Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
|
||||
"true",
|
||||
"Prefetch with Intent to Write and T1 Hint">;
|
||||
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
||||
"Enable AVX-512 Doubleword and Quadword Instructions",
|
||||
[FeatureAVX512]>;
|
||||
@ -137,6 +140,9 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
|
||||
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
|
||||
"Enable AVX-512 Vector Bit Manipulation Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureIFMA : SubtargetFeature<"ifma", "HasIFMA", "true",
|
||||
"Enable AVX-512 Integer Fused Multiple-Add",
|
||||
[FeatureAVX512]>;
|
||||
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
|
||||
"Enable protection keys">;
|
||||
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
|
||||
@ -202,6 +208,20 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
|
||||
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||
"PadShortFunctions", "true",
|
||||
"Pad short functions">;
|
||||
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
|
||||
"Invalidate Process-Context Identifier">;
|
||||
def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
|
||||
"VM Functions">;
|
||||
def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
|
||||
"Supervisor Mode Access Protection">;
|
||||
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
|
||||
"Enable Software Guard Extensions">;
|
||||
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
|
||||
"Flush A Cache Line Optimized">;
|
||||
def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
|
||||
"Enable Persistent Commit">;
|
||||
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
|
||||
"Cache Line Write Back">;
|
||||
// TODO: This feature ought to be renamed.
|
||||
// What it really refers to are CPUs for which certain instructions
|
||||
// (which ones besides the example below?) are microcoded.
|
||||
@ -365,13 +385,12 @@ def : WestmereProc<"westmere">;
|
||||
|
||||
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
|
||||
// rather than a superset.
|
||||
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||
def ProcIntelSNB : SubtargetFeature<"snb", "X86ProcFamily", "IntelSNB",
|
||||
" Intel SandyBridge Processor", [
|
||||
FeatureMMX,
|
||||
FeatureAVX,
|
||||
FeatureFXSR,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem,
|
||||
FeatureSlowUAMem32,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
@ -379,187 +398,125 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||
FeatureXSAVEOPT,
|
||||
FeatureLAHFSAHF
|
||||
]>;
|
||||
|
||||
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||
ProcIntelSNB,
|
||||
FeatureSlowBTMem,
|
||||
FeatureSlowUAMem32
|
||||
]>;
|
||||
def : SandyBridgeProc<"sandybridge">;
|
||||
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
|
||||
|
||||
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||
FeatureMMX,
|
||||
FeatureAVX,
|
||||
FeatureFXSR,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem,
|
||||
FeatureSlowUAMem32,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT,
|
||||
def ProcIntelIVB : SubtargetFeature<"ivb", "X86ProcFamily", "IntelIVB",
|
||||
" Intel IvyBridge Processor", [
|
||||
ProcIntelSNB,
|
||||
FeatureRDRAND,
|
||||
FeatureF16C,
|
||||
FeatureFSGSBase,
|
||||
FeatureLAHFSAHF
|
||||
FeatureFSGSBase
|
||||
]>;
|
||||
|
||||
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||
ProcIntelIVB,
|
||||
FeatureSlowBTMem,
|
||||
FeatureSlowUAMem32
|
||||
]>;
|
||||
def : IvyBridgeProc<"ivybridge">;
|
||||
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
|
||||
|
||||
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
||||
FeatureMMX,
|
||||
def ProcIntelHSW : SubtargetFeature<"hsw", "X86ProcFamily", "IntelHSW",
|
||||
" Intel Haswell Processor", [
|
||||
ProcIntelIVB,
|
||||
FeatureAVX2,
|
||||
FeatureFXSR,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
FeatureRDRAND,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT,
|
||||
FeatureF16C,
|
||||
FeatureFSGSBase,
|
||||
FeatureMOVBE,
|
||||
FeatureLZCNT,
|
||||
FeatureBMI,
|
||||
FeatureBMI2,
|
||||
FeatureFMA,
|
||||
FeatureLZCNT,
|
||||
FeatureMOVBE,
|
||||
FeatureINVPCID,
|
||||
FeatureVMFUNC,
|
||||
FeatureRTM,
|
||||
FeatureHLE,
|
||||
FeatureSlowIncDec,
|
||||
FeatureLAHFSAHF
|
||||
FeatureSlowIncDec
|
||||
]>;
|
||||
|
||||
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||
[ProcIntelHSW]>;
|
||||
def : HaswellProc<"haswell">;
|
||||
def : HaswellProc<"core-avx2">; // Legacy alias.
|
||||
|
||||
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
||||
FeatureMMX,
|
||||
FeatureAVX2,
|
||||
FeatureFXSR,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT,
|
||||
FeatureRDRAND,
|
||||
FeatureF16C,
|
||||
FeatureFSGSBase,
|
||||
FeatureMOVBE,
|
||||
FeatureLZCNT,
|
||||
FeatureBMI,
|
||||
FeatureBMI2,
|
||||
FeatureFMA,
|
||||
FeatureRTM,
|
||||
FeatureHLE,
|
||||
def ProcIntelBDW : SubtargetFeature<"bdw", "X86ProcFamily", "IntelBDW",
|
||||
" Intel Broadwell Processor", [
|
||||
ProcIntelHSW,
|
||||
FeatureADX,
|
||||
FeatureRDSEED,
|
||||
FeatureSlowIncDec,
|
||||
FeatureLAHFSAHF
|
||||
FeatureSMAP
|
||||
]>;
|
||||
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||
[ProcIntelBDW]>;
|
||||
def : BroadwellProc<"broadwell">;
|
||||
|
||||
def ProcIntelSKL : SubtargetFeature<"skl", "X86ProcFamily", "IntelSKL",
|
||||
" Intel Skylake Client Processor", [
|
||||
ProcIntelBDW,
|
||||
FeatureMPX,
|
||||
FeatureXSAVEC,
|
||||
FeatureXSAVES,
|
||||
FeatureSGX,
|
||||
FeatureCLFLUSHOPT
|
||||
]>;
|
||||
|
||||
// FIXME: define SKL model
|
||||
class SkylakeClientProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||
[ProcIntelSKL]>;
|
||||
def : SkylakeClientProc<"skl">;
|
||||
|
||||
// FIXME: define KNL model
|
||||
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
||||
FeatureMMX,
|
||||
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,[
|
||||
ProcIntelIVB,
|
||||
FeatureAVX512,
|
||||
FeatureFXSR,
|
||||
FeatureERI,
|
||||
FeatureCDI,
|
||||
FeaturePFI,
|
||||
FeatureCMPXCHG16B,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT,
|
||||
FeatureRDRAND,
|
||||
FeatureF16C,
|
||||
FeatureFSGSBase,
|
||||
FeaturePREFETCHWT1,
|
||||
FeatureADX,
|
||||
FeatureRDSEED,
|
||||
FeatureMOVBE,
|
||||
FeatureLZCNT,
|
||||
FeatureBMI,
|
||||
FeatureBMI2,
|
||||
FeatureFMA,
|
||||
FeatureRTM,
|
||||
FeatureHLE,
|
||||
FeatureSlowIncDec,
|
||||
FeatureMPX,
|
||||
FeatureLAHFSAHF
|
||||
FeatureFMA
|
||||
]>;
|
||||
def : KnightsLandingProc<"knl">;
|
||||
|
||||
// FIXME: define SKX model
|
||||
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
||||
FeatureMMX,
|
||||
def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX",
|
||||
" Intel Skylake Server Processor", [
|
||||
ProcIntelSKL,
|
||||
FeatureAVX512,
|
||||
FeatureFXSR,
|
||||
FeatureCDI,
|
||||
FeatureDQI,
|
||||
FeatureBWI,
|
||||
FeatureVLX,
|
||||
FeaturePKU,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT,
|
||||
FeatureRDRAND,
|
||||
FeatureF16C,
|
||||
FeatureFSGSBase,
|
||||
FeatureMOVBE,
|
||||
FeatureLZCNT,
|
||||
FeatureBMI,
|
||||
FeatureBMI2,
|
||||
FeatureFMA,
|
||||
FeatureRTM,
|
||||
FeatureHLE,
|
||||
FeatureADX,
|
||||
FeatureRDSEED,
|
||||
FeatureSlowIncDec,
|
||||
FeatureMPX,
|
||||
FeatureXSAVEC,
|
||||
FeatureXSAVES,
|
||||
FeatureLAHFSAHF
|
||||
FeaturePCOMMIT,
|
||||
FeatureCLWB
|
||||
]>;
|
||||
def : SkylakeProc<"skylake">;
|
||||
def : SkylakeProc<"skx">; // Legacy alias.
|
||||
|
||||
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
||||
FeatureMMX,
|
||||
FeatureAVX512,
|
||||
FeatureFXSR,
|
||||
FeatureCDI,
|
||||
FeatureDQI,
|
||||
FeatureBWI,
|
||||
FeatureVLX,
|
||||
FeaturePKU,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem,
|
||||
FeaturePOPCNT,
|
||||
FeatureAES,
|
||||
FeaturePCLMUL,
|
||||
FeatureXSAVE,
|
||||
FeatureXSAVEOPT,
|
||||
FeatureRDRAND,
|
||||
FeatureF16C,
|
||||
FeatureFSGSBase,
|
||||
FeatureMOVBE,
|
||||
FeatureLZCNT,
|
||||
FeatureBMI,
|
||||
FeatureBMI2,
|
||||
// FIXME: define SKX model
|
||||
class SkylakeServerProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||
[ ProcIntelSKX]>;
|
||||
def : SkylakeServerProc<"skylake">;
|
||||
def : SkylakeServerProc<"skx">; // Legacy alias.
|
||||
|
||||
def ProcIntelCNL : SubtargetFeature<"cnl", "X86ProcFamily", "IntelCNL",
|
||||
" Intel Cannonlake Processor", [
|
||||
ProcIntelSKX,
|
||||
FeatureVBMI,
|
||||
FeatureFMA,
|
||||
FeatureRTM,
|
||||
FeatureHLE,
|
||||
FeatureADX,
|
||||
FeatureRDSEED,
|
||||
FeatureSlowIncDec,
|
||||
FeatureMPX,
|
||||
FeatureXSAVEC,
|
||||
FeatureXSAVES,
|
||||
FeatureLAHFSAHF
|
||||
FeatureIFMA,
|
||||
FeatureSHA
|
||||
]>;
|
||||
|
||||
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||
[ ProcIntelCNL ]>;
|
||||
def : CannonlakeProc<"cannonlake">;
|
||||
def : CannonlakeProc<"cnl">;
|
||||
|
||||
|
@ -797,6 +797,8 @@ def HasBMI : Predicate<"Subtarget->hasBMI()">;
|
||||
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
|
||||
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
|
||||
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
|
||||
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
|
||||
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
|
||||
def HasRTM : Predicate<"Subtarget->hasRTM()">;
|
||||
def HasHLE : Predicate<"Subtarget->hasHLE()">;
|
||||
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
|
||||
|
@ -262,6 +262,7 @@ void X86Subtarget::initializeEnvironment() {
|
||||
HasBMI = false;
|
||||
HasBMI2 = false;
|
||||
HasVBMI = false;
|
||||
HasIFMA = false;
|
||||
HasRTM = false;
|
||||
HasHLE = false;
|
||||
HasERI = false;
|
||||
|
@ -55,7 +55,8 @@ protected:
|
||||
};
|
||||
|
||||
enum X86ProcFamilyEnum {
|
||||
Others, IntelAtom, IntelSLM
|
||||
Others, IntelAtom, IntelSLM, IntelSNB, IntelIVB, IntelHSW, IntelBDW,
|
||||
IntelKNL, IntelSKL, IntelSKX, IntelCNL
|
||||
};
|
||||
|
||||
/// X86 processor family: Intel Atom, and others
|
||||
@ -137,6 +138,9 @@ protected:
|
||||
/// Processor has VBMI instructions.
|
||||
bool HasVBMI;
|
||||
|
||||
/// Processor has Integer Fused Multiply Add
|
||||
bool HasIFMA;
|
||||
|
||||
/// Processor has RTM instructions.
|
||||
bool HasRTM;
|
||||
|
||||
@ -158,6 +162,9 @@ protected:
|
||||
/// Processor has LAHF/SAHF instructions.
|
||||
bool HasLAHFSAHF;
|
||||
|
||||
/// Processor has Prefetch with intent to Write instruction
|
||||
bool HasPFPREFETCHWT1;
|
||||
|
||||
/// True if BT (bit test) of memory instructions are slow.
|
||||
bool IsBTMemSlow;
|
||||
|
||||
@ -229,9 +236,30 @@ protected:
|
||||
/// Processor has PKU extenstions
|
||||
bool HasPKU;
|
||||
|
||||
/// Processot supports MPX - Memory Protection Extensions
|
||||
/// Processor supports MPX - Memory Protection Extensions
|
||||
bool HasMPX;
|
||||
|
||||
/// Processor supports Invalidate Process-Context Identifier
|
||||
bool HasInvPCId;
|
||||
|
||||
/// Processor has VM Functions
|
||||
bool HasVMFUNC;
|
||||
|
||||
/// Processor has Supervisor Mode Access Protection
|
||||
bool HasSMAP;
|
||||
|
||||
/// Processor has Software Guard Extensions
|
||||
bool HasSGX;
|
||||
|
||||
/// Processor supports Flush Cache Line instruction
|
||||
bool HasCLFLUSHOPT;
|
||||
|
||||
/// Processor has Persistent Commit feature
|
||||
bool HasPCOMMIT;
|
||||
|
||||
/// Processor supports Cache Line Write Back instruction
|
||||
bool HasCLWB;
|
||||
|
||||
/// Use software floating point for code generation.
|
||||
bool UseSoftFloat;
|
||||
|
||||
@ -378,6 +406,7 @@ public:
|
||||
bool hasBMI() const { return HasBMI; }
|
||||
bool hasBMI2() const { return HasBMI2; }
|
||||
bool hasVBMI() const { return HasVBMI; }
|
||||
bool hasIFMA() const { return HasIFMA; }
|
||||
bool hasRTM() const { return HasRTM; }
|
||||
bool hasHLE() const { return HasHLE; }
|
||||
bool hasADX() const { return HasADX; }
|
||||
|
@ -214,31 +214,31 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: addl (%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $68, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
||||
@ -303,31 +303,31 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
||||
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $68, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
||||
@ -390,31 +390,31 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: addl (%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $68, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
||||
@ -479,31 +479,31 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
|
||||
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
|
||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $68, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
||||
@ -2879,6 +2879,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16>
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
@ -2899,6 +2909,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||
@ -2919,6 +2939,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16>
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
@ -2939,6 +2969,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16>
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
@ -2959,6 +2999,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||
@ -2979,6 +3029,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1,
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||
@ -2992,13 +3052,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i8, <32 x i16>
|
||||
define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %esi, %k1
|
||||
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: kmovd %esi, %k1
|
||||
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||
@ -3019,6 +3089,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16>
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
@ -3039,6 +3119,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16>
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
@ -3059,6 +3149,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, <
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||
@ -3079,6 +3179,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16>
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
@ -3152,13 +3262,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i3
|
||||
define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
|
||||
@ -3172,13 +3292,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i3
|
||||
define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
|
||||
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
|
||||
@ -3192,13 +3322,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>,
|
||||
define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
|
Loading…
Reference in New Issue
Block a user