mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Remove FMA3Info DenseMap. Break into sorted tables that we can binary search.
I separated out the rounding and broadcast groups into their own tables because it made the ordering in the main table easier. Further splitting of the tables might make it possible to directly index using bits from the TSFlags, but its probably not worth it right now. llvm-svn: 336075
This commit is contained in:
parent
032393eea9
commit
8838161306
@ -580,7 +580,7 @@ namespace X86II {
|
||||
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
|
||||
// specified machine instruction.
|
||||
//
|
||||
inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
|
||||
inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
|
||||
return TSFlags >> X86II::OpcodeShift;
|
||||
}
|
||||
|
||||
|
@ -21,239 +21,148 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define FMA3BASE(X132, X213, X231, Attrs) \
|
||||
{ { X132, X213, X231 }, Attrs },
|
||||
#define FMA3GROUP(Name, Suf, Attrs) \
|
||||
{ { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
|
||||
|
||||
#define FMA3RA(R132, R213, R231, Attrs) \
|
||||
FMA3BASE(X86::R132, X86::R213, X86::R231, Attrs)
|
||||
#define FMA3GROUP_MASKED(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
|
||||
FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
|
||||
|
||||
#define FMA3R(R132, R213, R231) \
|
||||
FMA3RA(R132, R213, R231, 0)
|
||||
#define FMA3GROUP_PACKED_WIDTHS(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Ym, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Yr, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
|
||||
FMA3GROUP(Name, Suf##m, Attrs) \
|
||||
FMA3GROUP(Name, Suf##r, Attrs)
|
||||
|
||||
#define FMA3MA(M132, M213, M231, Attrs) \
|
||||
FMA3BASE(X86::M132, X86::M213, X86::M231, Attrs)
|
||||
#define FMA3GROUP_PACKED(Name, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS(Name, PD, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS(Name, PS, Attrs)
|
||||
|
||||
#define FMA3M(M132, M213, M231) \
|
||||
FMA3MA(M132, M213, M231, 0)
|
||||
#define FMA3GROUP_SCALAR_WIDTHS(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Zm, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
|
||||
FMA3GROUP(Name, Suf##Zr, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
|
||||
FMA3GROUP(Name, Suf##m, Attrs) \
|
||||
FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
|
||||
FMA3GROUP(Name, Suf##r, Attrs) \
|
||||
FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
|
||||
|
||||
#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \
|
||||
FMA3RA(R132, R213, R231, Attrs) \
|
||||
FMA3MA(M132, M213, M231, Attrs)
|
||||
#define FMA3GROUP_SCALAR(Name, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs) \
|
||||
|
||||
#define FMA3RM(R132, R213, R231, M132, M213, M231) \
|
||||
FMA3RMA(R132, R213, R231, M132, M213, M231, 0)
|
||||
|
||||
#define FMA3_AVX2_VECTOR_GROUP(Name) \
|
||||
FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \
|
||||
Name##132PSm, Name##213PSm, Name##231PSm) \
|
||||
FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \
|
||||
Name##132PDm, Name##213PDm, Name##231PDm) \
|
||||
FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \
|
||||
Name##132PSYm, Name##213PSYm, Name##231PSYm) \
|
||||
FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \
|
||||
Name##132PDYm, Name##213PDYm, Name##231PDYm)
|
||||
|
||||
#define FMA3_AVX2_SCALAR_GROUP(Name) \
|
||||
FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \
|
||||
Name##132SSm, Name##213SSm, Name##231SSm) \
|
||||
FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \
|
||||
Name##132SDm, Name##213SDm, Name##231SDm) \
|
||||
FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \
|
||||
Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic) \
|
||||
FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \
|
||||
Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic)
|
||||
|
||||
#define FMA3_AVX2_FULL_GROUP(Name) \
|
||||
FMA3_AVX2_VECTOR_GROUP(Name) \
|
||||
FMA3_AVX2_SCALAR_GROUP(Name)
|
||||
|
||||
#define FMA3_AVX512_VECTOR_GROUP(Name) \
|
||||
FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \
|
||||
Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m) \
|
||||
FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \
|
||||
Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m) \
|
||||
FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \
|
||||
Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m) \
|
||||
FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \
|
||||
Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m) \
|
||||
FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \
|
||||
Name##132PSZm, Name##213PSZm, Name##231PSZm) \
|
||||
FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \
|
||||
Name##132PDZm, Name##213PDZm, Name##231PDZm) \
|
||||
FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \
|
||||
Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \
|
||||
Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \
|
||||
Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \
|
||||
Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \
|
||||
Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \
|
||||
Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \
|
||||
Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \
|
||||
Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \
|
||||
Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \
|
||||
Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \
|
||||
Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \
|
||||
Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb) \
|
||||
FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb) \
|
||||
FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb) \
|
||||
FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb) \
|
||||
FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb) \
|
||||
FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb) \
|
||||
FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb) \
|
||||
FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb) \
|
||||
FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked)
|
||||
|
||||
#define FMA3_AVX512_SCALAR_GROUP(Name) \
|
||||
FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \
|
||||
Name##132SSZm, Name##213SSZm, Name##231SSZm) \
|
||||
FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \
|
||||
Name##132SDZm, Name##213SDZm, Name##231SDZm) \
|
||||
FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \
|
||||
Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic) \
|
||||
FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \
|
||||
Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic) \
|
||||
FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \
|
||||
Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \
|
||||
Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \
|
||||
Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \
|
||||
Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic) \
|
||||
FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic) \
|
||||
FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KMergeMasked) \
|
||||
FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked) \
|
||||
FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \
|
||||
X86InstrFMA3Group::X86FMA3Intrinsic | \
|
||||
X86InstrFMA3Group::X86FMA3KZeroMasked)
|
||||
|
||||
#define FMA3_AVX512_FULL_GROUP(Name) \
|
||||
FMA3_AVX512_VECTOR_GROUP(Name) \
|
||||
FMA3_AVX512_SCALAR_GROUP(Name)
|
||||
#define FMA3GROUP_FULL(Name, Attrs) \
|
||||
FMA3GROUP_PACKED(Name, Attrs) \
|
||||
FMA3GROUP_SCALAR(Name, Attrs)
|
||||
|
||||
static const X86InstrFMA3Group Groups[] = {
|
||||
FMA3_AVX2_FULL_GROUP(VFMADD)
|
||||
FMA3_AVX2_FULL_GROUP(VFMSUB)
|
||||
FMA3_AVX2_FULL_GROUP(VFNMADD)
|
||||
FMA3_AVX2_FULL_GROUP(VFNMSUB)
|
||||
|
||||
FMA3_AVX2_VECTOR_GROUP(VFMADDSUB)
|
||||
FMA3_AVX2_VECTOR_GROUP(VFMSUBADD)
|
||||
|
||||
FMA3_AVX512_FULL_GROUP(VFMADD)
|
||||
FMA3_AVX512_FULL_GROUP(VFMSUB)
|
||||
FMA3_AVX512_FULL_GROUP(VFNMADD)
|
||||
FMA3_AVX512_FULL_GROUP(VFNMSUB)
|
||||
|
||||
FMA3_AVX512_VECTOR_GROUP(VFMADDSUB)
|
||||
FMA3_AVX512_VECTOR_GROUP(VFMSUBADD)
|
||||
FMA3GROUP_FULL(VFMADD, 0)
|
||||
FMA3GROUP_PACKED(VFMADDSUB, 0)
|
||||
FMA3GROUP_FULL(VFMSUB, 0)
|
||||
FMA3GROUP_PACKED(VFMSUBADD, 0)
|
||||
FMA3GROUP_FULL(VFNMADD, 0)
|
||||
FMA3GROUP_FULL(VFNMSUB, 0)
|
||||
};
|
||||
|
||||
namespace {
|
||||
#define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
|
||||
|
||||
struct X86InstrFMA3Info {
|
||||
/// A map that is used to find the group of FMA opcodes using any FMA opcode
|
||||
/// from the group.
|
||||
DenseMap<unsigned, const X86InstrFMA3Group *> OpcodeToGroup;
|
||||
#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
|
||||
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
|
||||
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
|
||||
|
||||
/// Constructor. Just creates an object of the class.
|
||||
X86InstrFMA3Info() {
|
||||
for (const X86InstrFMA3Group &G : Groups) {
|
||||
OpcodeToGroup[G.Opcodes[0]] = &G;
|
||||
OpcodeToGroup[G.Opcodes[1]] = &G;
|
||||
OpcodeToGroup[G.Opcodes[2]] = &G;
|
||||
}
|
||||
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
|
||||
|
||||
#define FMA3GROUP_SCALAR_AVX512(Name, Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, SDZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, SSZ##Suf, Attrs)
|
||||
|
||||
static const X86InstrFMA3Group BroadcastGroups[] = {
|
||||
FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
|
||||
FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
|
||||
FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
|
||||
FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
|
||||
FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
|
||||
FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
|
||||
};
|
||||
|
||||
static const X86InstrFMA3Group RoundGroups[] = {
|
||||
FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
|
||||
FMA3GROUP_SCALAR_AVX512(VFMADD, rb_Int, X86InstrFMA3Group::Intrinsic)
|
||||
FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
|
||||
FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
|
||||
FMA3GROUP_SCALAR_AVX512(VFMSUB, rb_Int, X86InstrFMA3Group::Intrinsic)
|
||||
FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
|
||||
FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
|
||||
FMA3GROUP_SCALAR_AVX512(VFNMADD, rb_Int, X86InstrFMA3Group::Intrinsic)
|
||||
FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
|
||||
FMA3GROUP_SCALAR_AVX512(VFNMSUB, rb_Int, X86InstrFMA3Group::Intrinsic)
|
||||
};
|
||||
|
||||
static void verifyTables() {
|
||||
#ifndef NDEBUG
|
||||
static std::atomic<bool> TableChecked(false);
|
||||
if (!TableChecked.load(std::memory_order_relaxed)) {
|
||||
assert(std::is_sorted(std::begin(Groups), std::end(Groups)) &&
|
||||
std::is_sorted(std::begin(RoundGroups), std::end(RoundGroups)) &&
|
||||
std::is_sorted(std::begin(BroadcastGroups),
|
||||
std::end(BroadcastGroups)) &&
|
||||
"FMA3 tables not sorted!");
|
||||
TableChecked.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static ManagedStatic<X86InstrFMA3Info> X86FMA3InfoObj;
|
||||
|
||||
/// Returns a reference to a group of FMA3 opcodes to where the given
|
||||
/// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
|
||||
/// and not included into any FMA3 group, then nullptr is returned.
|
||||
const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode) {
|
||||
auto &Map = X86FMA3InfoObj->OpcodeToGroup;
|
||||
auto I = Map.find(Opcode);
|
||||
if (I != Map.end())
|
||||
return I->second;
|
||||
const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
|
||||
|
||||
return nullptr;
|
||||
// FMA3 instructions have a well defined encoding pattern we can exploit.
|
||||
uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
|
||||
bool IsFMA3 = ((TSFlags & X86II::EncodingMask) == X86II::VEX ||
|
||||
(TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
|
||||
(TSFlags & X86II::OpMapMask) == X86II::T8 &&
|
||||
(TSFlags & X86II::OpPrefixMask) == X86II::PD &&
|
||||
((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
|
||||
(BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
|
||||
(BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
|
||||
if (!IsFMA3)
|
||||
return nullptr;
|
||||
|
||||
verifyTables();
|
||||
|
||||
ArrayRef<X86InstrFMA3Group> Table;
|
||||
if (TSFlags & X86II::EVEX_RC)
|
||||
Table = makeArrayRef(RoundGroups);
|
||||
else if (TSFlags & X86II::EVEX_B)
|
||||
Table = makeArrayRef(BroadcastGroups);
|
||||
else
|
||||
Table = makeArrayRef(Groups);
|
||||
|
||||
// FMA 132 instructions have an opcode of 0x96-0x9F
|
||||
// FMA 213 instructions have an opcode of 0xA6-0xAF
|
||||
// FMA 231 instructions have an opcode of 0xB6-0xBF
|
||||
unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
|
||||
|
||||
auto I = std::lower_bound(Table.begin(), Table.end(), Opcode,
|
||||
[FormIndex](const X86InstrFMA3Group &Group,
|
||||
unsigned Opcode) {
|
||||
return Group.Opcodes[FormIndex] < Opcode;
|
||||
});
|
||||
assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
|
||||
"Couldn't find FMA3 opcode!");
|
||||
return I;
|
||||
}
|
||||
|
@ -43,17 +43,17 @@ struct X86InstrFMA3Group {
|
||||
enum : uint16_t {
|
||||
/// This bit must be set in the 'Attributes' field of FMA group if such
|
||||
/// group of FMA opcodes consists of FMA intrinsic opcodes.
|
||||
X86FMA3Intrinsic = 0x1,
|
||||
Intrinsic = 0x1,
|
||||
|
||||
/// This bit must be set in the 'Attributes' field of FMA group if such
|
||||
/// group of FMA opcodes consists of AVX512 opcodes accepting a k-mask and
|
||||
/// passing the elements from the 1st operand to the result of the operation
|
||||
/// when the correpondings bits in the k-mask are unset.
|
||||
X86FMA3KMergeMasked = 0x2,
|
||||
KMergeMasked = 0x2,
|
||||
|
||||
/// This bit must be set in the 'Attributes' field of FMA group if such
|
||||
/// group of FMA opcodes consists of AVX512 opcodes accepting a k-zeromask.
|
||||
X86FMA3KZeroMasked = 0x4,
|
||||
KZeroMasked = 0x4,
|
||||
};
|
||||
|
||||
/// Returns the 132 form of FMA opcode.
|
||||
@ -72,26 +72,30 @@ struct X86InstrFMA3Group {
|
||||
}
|
||||
|
||||
/// Returns true iff the group of FMA opcodes holds intrinsic opcodes.
|
||||
bool isIntrinsic() const { return (Attributes & X86FMA3Intrinsic) != 0; }
|
||||
bool isIntrinsic() const { return (Attributes & Intrinsic) != 0; }
|
||||
|
||||
/// Returns true iff the group of FMA opcodes holds k-merge-masked opcodes.
|
||||
bool isKMergeMasked() const {
|
||||
return (Attributes & X86FMA3KMergeMasked) != 0;
|
||||
return (Attributes & KMergeMasked) != 0;
|
||||
}
|
||||
|
||||
/// Returns true iff the group of FMA opcodes holds k-zero-masked opcodes.
|
||||
bool isKZeroMasked() const { return (Attributes & X86FMA3KZeroMasked) != 0; }
|
||||
bool isKZeroMasked() const { return (Attributes &KZeroMasked) != 0; }
|
||||
|
||||
/// Returns true iff the group of FMA opcodes holds any of k-masked opcodes.
|
||||
bool isKMasked() const {
|
||||
return (Attributes & (X86FMA3KMergeMasked | X86FMA3KZeroMasked)) != 0;
|
||||
return (Attributes & (KMergeMasked | KZeroMasked)) != 0;
|
||||
}
|
||||
|
||||
bool operator<(const X86InstrFMA3Group &RHS) const {
|
||||
return Opcodes[0] < RHS.Opcodes[0];
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns a reference to a group of FMA3 opcodes to where the given
|
||||
/// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
|
||||
/// and not included into any FMA3 group, then nullptr is returned.
|
||||
const X86InstrFMA3Group *getFMA3Group(unsigned Opcode);
|
||||
const X86InstrFMA3Group *getFMA3Group(unsigned Opcode, uint64_t TSFlags);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
@ -1803,7 +1803,8 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
|
||||
OpIdx1, OpIdx2);
|
||||
}
|
||||
|
||||
const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode());
|
||||
const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
|
||||
MI.getDesc().TSFlags);
|
||||
if (FMA3Group) {
|
||||
unsigned Opc =
|
||||
getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
|
||||
@ -2039,7 +2040,8 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
|
||||
}
|
||||
|
||||
default:
|
||||
const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode());
|
||||
const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
|
||||
MI.getDesc().TSFlags);
|
||||
if (FMA3Group)
|
||||
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
|
||||
FMA3Group->isIntrinsic());
|
||||
|
Loading…
x
Reference in New Issue
Block a user