From 832e2d585871dc7c614d21702e1ec0ecc0d13f60 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 24 Jan 2016 10:41:28 +0000 Subject: [PATCH] Added Skylake client to X86 targets and features Changes in X86.td: I set features of Intel processors in incremental form: IVB = SNB + X HSW = IVB + X .. I added Skylake client processor and defined it's features FeatureADX was missing on KNL Added some new features to appropriate processors SMAP, IFMA, PREFETCHWT1, VMFUNC and others Differential Revision: http://reviews.llvm.org/D16357 llvm-svn: 258659 --- lib/Support/Host.cpp | 15 +- lib/Target/X86/X86.td | 233 +++++++++------------- lib/Target/X86/X86InstrInfo.td | 2 + lib/Target/X86/X86Subtarget.cpp | 1 + lib/Target/X86/X86Subtarget.h | 33 +++- test/CodeGen/X86/avx512bw-intrinsics.ll | 252 ++++++++++++++++++------ 6 files changed, 337 insertions(+), 199 deletions(-) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 599089a316d..ffad1d79ad0 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -805,25 +805,34 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1); Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); + Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1); Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); + Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); + Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1); + Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1); + Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); + Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); - // Enable protection keys - Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; + Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; - Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + + Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); + Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + // Enable protection keys + Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); bool HasLeafD = MaxLevel >= 0xd && !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 06b8b466674..d324e53ea14 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -125,6 +125,9 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; +def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1", + "true", + "Prefetch with Intent to Write and T1 Hint">; def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", "Enable AVX-512 Doubleword and Quadword Instructions", [FeatureAVX512]>; @@ -137,6 +140,9 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", "Enable AVX-512 Vector Bit Manipulation Instructions", [FeatureAVX512]>; +def FeatureIFMA : SubtargetFeature<"ifma", "HasIFMA", "true", + "Enable AVX-512 Integer Fused Multiple-Add", + [FeatureAVX512]>; def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", "Enable protection keys">; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", @@ -202,6 +208,20 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true", + "Invalidate Process-Context Identifier">; +def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true", + "VM Functions">; +def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true", + "Supervisor Mode Access Protection">; +def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", + "Enable Software Guard Extensions">; +def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", + "Flush A Cache Line Optimized">; +def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true", + "Enable Persistent Commit">; +def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", + "Cache Line Write Back">; // TODO: This feature ought to be renamed. // What it really refers to are CPUs for which certain instructions // (which ones besides the example below?) are microcoded. @@ -365,13 +385,12 @@ def : WestmereProc<"westmere">; // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. -class SandyBridgeProc : ProcessorModel : ProcessorModel; + +class SandyBridgeProc : ProcessorModel; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. -class IvyBridgeProc : ProcessorModel; + +class IvyBridgeProc : ProcessorModel; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. -class HaswellProc : ProcessorModel; + +class HaswellProc : ProcessorModel; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. -class BroadwellProc : ProcessorModel; +class BroadwellProc : ProcessorModel; def : BroadwellProc<"broadwell">; +def ProcIntelSKL : SubtargetFeature<"skl", "X86ProcFamily", "IntelSKL", + " Intel Skylake Client Processor", [ + ProcIntelBDW, + FeatureMPX, + FeatureXSAVEC, + FeatureXSAVES, + FeatureSGX, + FeatureCLFLUSHOPT +]>; + +// FIXME: define SKL model +class SkylakeClientProc : ProcessorModel; +def : SkylakeClientProc<"skl">; + // FIXME: define KNL model -class KnightsLandingProc : ProcessorModel : ProcessorModel; def : KnightsLandingProc<"knl">; -// FIXME: define SKX model -class SkylakeProc : ProcessorModel; -def : SkylakeProc<"skylake">; -def : SkylakeProc<"skx">; // Legacy alias. -class CannonlakeProc : ProcessorModel : ProcessorModel; +def : SkylakeServerProc<"skylake">; +def : SkylakeServerProc<"skx">; // Legacy alias. + +def ProcIntelCNL : SubtargetFeature<"cnl", "X86ProcFamily", "IntelCNL", + " Intel Cannonlake Processor", [ + ProcIntelSKX, FeatureVBMI, - FeatureFMA, - FeatureRTM, - FeatureHLE, - FeatureADX, - FeatureRDSEED, - FeatureSlowIncDec, - FeatureMPX, - FeatureXSAVEC, - FeatureXSAVES, - FeatureLAHFSAHF + FeatureIFMA, + FeatureSHA ]>; + +class CannonlakeProc : ProcessorModel; def : CannonlakeProc<"cannonlake">; def : CannonlakeProc<"cnl">; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2c0be6f6ca1..e2624e5d9a3 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -797,6 +797,8 @@ def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasVBMI : Predicate<"Subtarget->hasVBMI()">, AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">; +def HasIFMA : Predicate<"Subtarget->hasIFMA()">, + AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">; def HasRTM : Predicate<"Subtarget->hasRTM()">; def HasHLE : Predicate<"Subtarget->hasHLE()">; def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 739de59a6f7..cca25ec7563 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -262,6 +262,7 @@ void X86Subtarget::initializeEnvironment() { HasBMI = false; HasBMI2 = false; HasVBMI = false; + HasIFMA = false; HasRTM = false; HasHLE = false; HasERI = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index c1adb4469fb..d355ca310ea 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -55,7 +55,8 @@ protected: }; enum X86ProcFamilyEnum { - Others, IntelAtom, IntelSLM + Others, IntelAtom, IntelSLM, IntelSNB, IntelIVB, IntelHSW, IntelBDW, + IntelKNL, IntelSKL, IntelSKX, IntelCNL }; /// X86 processor family: Intel Atom, and others @@ -137,6 +138,9 @@ protected: /// Processor has VBMI instructions. bool HasVBMI; + /// Processor has Integer Fused Multiply Add + bool HasIFMA; + /// Processor has RTM instructions. bool HasRTM; @@ -158,6 +162,9 @@ protected: /// Processor has LAHF/SAHF instructions. bool HasLAHFSAHF; + /// Processor has Prefetch with intent to Write instruction + bool HasPFPREFETCHWT1; + /// True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -229,9 +236,30 @@ protected: /// Processor has PKU extenstions bool HasPKU; - /// Processot supports MPX - Memory Protection Extensions + /// Processor supports MPX - Memory Protection Extensions bool HasMPX; + /// Processor supports Invalidate Process-Context Identifier + bool HasInvPCId; + + /// Processor has VM Functions + bool HasVMFUNC; + + /// Processor has Supervisor Mode Access Protection + bool HasSMAP; + + /// Processor has Software Guard Extensions + bool HasSGX; + + /// Processor supports Flush Cache Line instruction + bool HasCLFLUSHOPT; + + /// Processor has Persistent Commit feature + bool HasPCOMMIT; + + /// Processor supports Cache Line Write Back instruction + bool HasCLWB; + /// Use software floating point for code generation. bool UseSoftFloat; @@ -378,6 +406,7 @@ public: bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasVBMI() const { return HasVBMI; } + bool hasIFMA() const { return HasIFMA; } bool hasRTM() const { return HasRTM; } bool hasHLE() const { return HasHLE; } bool hasADX() const { return HasADX; } diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 3e7c0d949f2..c0125f3005b 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -214,31 +214,31 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: addl (%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) @@ -303,31 +303,31 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -390,31 +390,31 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: addl (%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) @@ -479,31 +479,31 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: retl %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) @@ -2879,6 +2879,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) @@ -2899,6 +2909,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, < ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) @@ -2919,6 +2939,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -2939,6 +2969,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -2959,6 +2999,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, < ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) @@ -2979,6 +3029,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1, ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) @@ -2992,13 +3052,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i8, <32 x i16> define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} -; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z} -; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: retq +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) @@ -3019,6 +3089,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -3039,6 +3119,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -3059,6 +3149,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, < ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) @@ -3079,6 +3179,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -3152,13 +3262,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i3 define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} -; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z} -; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: retq +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z} +; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) @@ -3172,13 +3292,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i3 define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} -; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z} -; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: retq +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z} +; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z} +; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) @@ -3192,13 +3322,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: retq +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 +; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)