1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86] Add more instructions to the memory folding tables using the autogenerated table as a guide.

I think this covers most of the unmasked vector instructions. We're still missing a lot of the masked instructions.

There are some test changes here because of the new folding support. I don't think these particular cases should be folded because it creates an undef register dependency. I think the changes introduced in r334175 are not handling stack folding. They're only blocking the peephole pass.

llvm-svn: 334800
This commit is contained in:
Craig Topper 2018-06-15 05:49:19 +00:00
parent 9e40efa572
commit 5526c28acc
6 changed files with 237 additions and 35 deletions

View File

@ -351,8 +351,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
{ X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
{ X86::CALL16r, X86::CALL16m, TB_FOLDED_LOAD },
{ X86::CALL16r_NT, X86::CALL16m_NT, TB_FOLDED_LOAD },
{ X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
{ X86::CALL32r_NT, X86::CALL32m_NT, TB_FOLDED_LOAD },
{ X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
{ X86::CALL64r_NT, X86::CALL64m_NT, TB_FOLDED_LOAD },
{ X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
{ X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
{ X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
@ -379,8 +382,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
{ X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
{ X86::JMP16r, X86::JMP16m, TB_FOLDED_LOAD },
{ X86::JMP16r_NT, X86::JMP16m_NT, TB_FOLDED_LOAD },
{ X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
{ X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD },
{ X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
{ X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD },
{ X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
{ X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
{ X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
@ -406,6 +412,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
{ X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
{ X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
{ X86::PTWRITE64r, X86::PTWRITE64m, TB_FOLDED_LOAD },
{ X86::PTWRITEr, X86::PTWRITEm, TB_FOLDED_LOAD },
{ X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
{ X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
{ X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
@ -935,6 +943,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::TZMSK64rr, X86::TZMSK64rm, 0 },
// AVX-512 foldable instructions
{ X86::VBROADCASTF32X2Zr, X86::VBROADCASTF32X2Zm, TB_NO_REVERSE },
{ X86::VBROADCASTI32X2Zr, X86::VBROADCASTI32X2Zm, TB_NO_REVERSE },
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
{ X86::VCOMISDZrr, X86::VCOMISDZrm, 0 },
@ -942,8 +952,65 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCOMISSZrr, X86::VCOMISSZrm, 0 },
{ X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 },
{ X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrm, 0 },
{ X86::VCVTPD2DQZrr, X86::VCVTPD2DQZrm, 0 },
{ X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrm, 0 },
{ X86::VCVTPD2QQZrr, X86::VCVTPD2QQZrm, 0 },
{ X86::VCVTPD2UDQZrr, X86::VCVTPD2UDQZrm, 0 },
{ X86::VCVTPD2UQQZrr, X86::VCVTPD2UQQZrm, 0 },
{ X86::VCVTPS2DQZrr, X86::VCVTPS2DQZrm, 0 },
{ X86::VCVTPS2PDZrr, X86::VCVTPS2PDZrm, 0 },
{ X86::VCVTPS2QQZrr, X86::VCVTPS2QQZrm, 0 },
{ X86::VCVTPS2UDQZrr, X86::VCVTPS2UDQZrm, 0 },
{ X86::VCVTPS2UQQZrr, X86::VCVTPS2UQQZrm, 0 },
{ X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrm, 0 },
{ X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrm, 0 },
{ X86::VCVTSD2SI64Zrr_Int, X86::VCVTSD2SI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2SIZrr_Int, X86::VCVTSD2SIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2USI64Zrr_Int, X86::VCVTSD2USI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2USIZrr_Int, X86::VCVTSD2USIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2SI64Zrr_Int, X86::VCVTSS2SI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2SIZrr_Int, X86::VCVTSS2SIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2USI64Zrr_Int, X86::VCVTSS2USI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2USIZrr_Int, X86::VCVTSS2USIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTTPD2DQZrr, X86::VCVTTPD2DQZrm, 0 },
{ X86::VCVTTPD2QQZrr, X86::VCVTTPD2QQZrm, 0 },
{ X86::VCVTTPD2UDQZrr, X86::VCVTTPD2UDQZrm, 0 },
{ X86::VCVTTPD2UQQZrr, X86::VCVTTPD2UQQZrm, 0 },
{ X86::VCVTTPS2DQZrr, X86::VCVTTPS2DQZrm, 0 },
{ X86::VCVTTPS2QQZrr, X86::VCVTTPS2QQZrm, 0 },
{ X86::VCVTTPS2UDQZrr, X86::VCVTTPS2UDQZrm, 0 },
{ X86::VCVTTPS2UQQZrr, X86::VCVTTPS2UQQZrm, 0 },
{ X86::VCVTTSD2SI64Zrr, X86::VCVTTSD2SI64Zrm, 0 },
{ X86::VCVTTSD2SI64Zrr_Int, X86::VCVTTSD2SI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSD2SIZrr, X86::VCVTTSD2SIZrm, 0 },
{ X86::VCVTTSD2SIZrr_Int, X86::VCVTTSD2SIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSD2USI64Zrr, X86::VCVTTSD2USI64Zrm, 0 },
{ X86::VCVTTSD2USI64Zrr_Int, X86::VCVTTSD2USI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSD2USIZrr, X86::VCVTTSD2USIZrm, 0 },
{ X86::VCVTTSD2USIZrr_Int, X86::VCVTTSD2USIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSS2SI64Zrr, X86::VCVTTSS2SI64Zrm, 0 },
{ X86::VCVTTSS2SI64Zrr_Int, X86::VCVTTSS2SI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSS2SIZrr, X86::VCVTTSS2SIZrm, 0 },
{ X86::VCVTTSS2SIZrr_Int, X86::VCVTTSS2SIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSS2USI64Zrr, X86::VCVTTSS2USI64Zrm, 0 },
{ X86::VCVTTSS2USI64Zrr_Int, X86::VCVTTSS2USI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTTSS2USIZrr, X86::VCVTTSS2USIZrm, 0 },
{ X86::VCVTTSS2USIZrr_Int, X86::VCVTTSS2USIZrm_Int, TB_NO_REVERSE },
{ X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0 },
{ X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrm, 0 },
{ X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrm, 0 },
{ X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrm, 0 },
{ X86::VEXP2PDZr, X86::VEXP2PDZm, 0 },
{ X86::VEXP2PSZr, X86::VEXP2PSZm, 0 },
{ X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0 },
{ X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrm, 0 },
{ X86::VFPCLASSSDrr, X86::VFPCLASSSDrm, TB_NO_REVERSE },
{ X86::VFPCLASSSSrr, X86::VFPCLASSSSrm, TB_NO_REVERSE },
{ X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0 },
{ X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0 },
{ X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0 },
{ X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
{ X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
@ -994,6 +1061,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 },
{ X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 },
{ X86::VPOPCNTWZrr, X86::VPOPCNTWZrm, 0 },
{ X86::VPROLDZri, X86::VPROLDZmi, 0 },
{ X86::VPROLQZri, X86::VPROLQZmi, 0 },
{ X86::VPRORDZri, X86::VPRORDZmi, 0 },
{ X86::VPRORQZri, X86::VPRORQZmi, 0 },
{ X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
{ X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
{ X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
@ -1012,6 +1083,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VRCP14PSZr, X86::VRCP14PSZm, 0 },
{ X86::VRCP28PDZr, X86::VRCP28PDZm, 0 },
{ X86::VRCP28PSZr, X86::VRCP28PSZm, 0 },
{ X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0 },
{ X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0 },
{ X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0 },
{ X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0 },
{ X86::VRSQRT14PDZr, X86::VRSQRT14PDZm, 0 },
{ X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0 },
{ X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0 },
@ -1024,11 +1099,42 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE },
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTF32X2Z256r, X86::VBROADCASTF32X2Z256m, TB_NO_REVERSE },
{ X86::VBROADCASTI32X2Z256r, X86::VBROADCASTI32X2Z256m, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 },
{ X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rm, 0 },
{ X86::VCVTPD2DQZ256rr, X86::VCVTPD2DQZ256rm, 0 },
{ X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rm, 0 },
{ X86::VCVTPD2QQZ256rr, X86::VCVTPD2QQZ256rm, 0 },
{ X86::VCVTPD2UDQZ256rr, X86::VCVTPD2UDQZ256rm, 0 },
{ X86::VCVTPD2UQQZ256rr, X86::VCVTPD2UQQZ256rm, 0 },
{ X86::VCVTPS2DQZ256rr, X86::VCVTPS2DQZ256rm, 0 },
{ X86::VCVTPS2PDZ256rr, X86::VCVTPS2PDZ256rm, 0 },
{ X86::VCVTPS2QQZ256rr, X86::VCVTPS2QQZ256rm, 0 },
{ X86::VCVTPS2UDQZ256rr, X86::VCVTPS2UDQZ256rm, 0 },
{ X86::VCVTPS2UQQZ256rr, X86::VCVTPS2UQQZ256rm, 0 },
{ X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rm, 0 },
{ X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rm, 0 },
{ X86::VCVTTPD2DQZ256rr, X86::VCVTTPD2DQZ256rm, 0 },
{ X86::VCVTTPD2QQZ256rr, X86::VCVTTPD2QQZ256rm, 0 },
{ X86::VCVTTPD2UDQZ256rr, X86::VCVTTPD2UDQZ256rm, 0 },
{ X86::VCVTTPD2UQQZ256rr, X86::VCVTTPD2UQQZ256rm, 0 },
{ X86::VCVTTPS2DQZ256rr, X86::VCVTTPS2DQZ256rm, 0 },
{ X86::VCVTTPS2QQZ256rr, X86::VCVTTPS2QQZ256rm, 0 },
{ X86::VCVTTPS2UDQZ256rr, X86::VCVTTPS2UDQZ256rm, 0 },
{ X86::VCVTTPS2UQQZ256rr, X86::VCVTTPS2UQQZ256rm, 0 },
{ X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0 },
{ X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rm, 0 },
{ X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rm, 0 },
{ X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rm, 0 },
{ X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0 },
{ X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rm, 0 },
{ X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0 },
{ X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0 },
{ X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0 },
{ X86::VGETMANTPSZ256rri, X86::VGETMANTPSZ256rmi, 0 },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
{ X86::VMOVDDUPZ256rr, X86::VMOVDDUPZ256rm, 0 },
@ -1074,6 +1180,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPOPCNTDZ256rr, X86::VPOPCNTDZ256rm, 0 },
{ X86::VPOPCNTQZ256rr, X86::VPOPCNTQZ256rm, 0 },
{ X86::VPOPCNTWZ256rr, X86::VPOPCNTWZ256rm, 0 },
{ X86::VPROLDZ256ri, X86::VPROLDZ256mi, 0 },
{ X86::VPROLQZ256ri, X86::VPROLQZ256mi, 0 },
{ X86::VPRORDZ256ri, X86::VPRORDZ256mi, 0 },
{ X86::VPRORQZ256ri, X86::VPRORQZ256mi, 0 },
{ X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
{ X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
{ X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
@ -1090,19 +1200,53 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 },
{ X86::VRCP14PDZ256r, X86::VRCP14PDZ256m, 0 },
{ X86::VRCP14PSZ256r, X86::VRCP14PSZ256m, 0 },
{ X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0 },
{ X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0 },
{ X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0 },
{ X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0 },
{ X86::VRSQRT14PDZ256r, X86::VRSQRT14PDZ256m, 0 },
{ X86::VRSQRT14PSZ256r, X86::VRSQRT14PSZ256m, 0 },
{ X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0 },
{ X86::VSQRTPSZ256r, X86::VSQRTPSZ256m, 0 },
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTI32X2Z128r, X86::VBROADCASTI32X2Z128m, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE },
{ X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rm, 0 },
{ X86::VCVTPD2DQZ128rr, X86::VCVTPD2DQZ128rm, 0 },
{ X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rm, 0 },
{ X86::VCVTPD2QQZ128rr, X86::VCVTPD2QQZ128rm, 0 },
{ X86::VCVTPD2UDQZ128rr, X86::VCVTPD2UDQZ128rm, 0 },
{ X86::VCVTPD2UQQZ128rr, X86::VCVTPD2UQQZ128rm, 0 },
{ X86::VCVTPS2DQZ128rr, X86::VCVTPS2DQZ128rm, 0 },
{ X86::VCVTPS2PDZ128rr, X86::VCVTPS2PDZ128rm, TB_NO_REVERSE },
{ X86::VCVTPS2QQZ128rr, X86::VCVTPS2QQZ128rm, TB_NO_REVERSE },
{ X86::VCVTPS2UDQZ128rr, X86::VCVTPS2UDQZ128rm, 0 },
{ X86::VCVTPS2UQQZ128rr, X86::VCVTPS2UQQZ128rm, TB_NO_REVERSE },
{ X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rm, 0 },
{ X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rm, 0 },
{ X86::VCVTTPD2DQZ128rr, X86::VCVTTPD2DQZ128rm, 0 },
{ X86::VCVTTPD2QQZ128rr, X86::VCVTTPD2QQZ128rm, 0 },
{ X86::VCVTTPD2UDQZ128rr, X86::VCVTTPD2UDQZ128rm, 0 },
{ X86::VCVTTPD2UQQZ128rr, X86::VCVTTPD2UQQZ128rm, 0 },
{ X86::VCVTTPS2DQZ128rr, X86::VCVTTPS2DQZ128rm, 0 },
{ X86::VCVTTPS2QQZ128rr, X86::VCVTTPS2QQZ128rm, TB_NO_REVERSE },
{ X86::VCVTTPS2UDQZ128rr, X86::VCVTTPS2UDQZ128rm, 0 },
{ X86::VCVTTPS2UQQZ128rr, X86::VCVTTPS2UQQZ128rm, TB_NO_REVERSE },
{ X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE },
{ X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rm, 0 },
{ X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rm, 0 },
{ X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rm, 0 },
{ X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0 },
{ X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rm, 0 },
{ X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0 },
{ X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0 },
{ X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0 },
{ X86::VGETMANTPSZ128rri, X86::VGETMANTPSZ128rmi, 0 },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
{ X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rm, 0 },
{ X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rm, TB_NO_REVERSE },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
{ X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
{ X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
@ -1143,6 +1287,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPOPCNTDZ128rr, X86::VPOPCNTDZ128rm, 0 },
{ X86::VPOPCNTQZ128rr, X86::VPOPCNTQZ128rm, 0 },
{ X86::VPOPCNTWZ128rr, X86::VPOPCNTWZ128rm, 0 },
{ X86::VPROLDZ128ri, X86::VPROLDZ128mi, 0 },
{ X86::VPROLQZ128ri, X86::VPROLQZ128mi, 0 },
{ X86::VPRORDZ128ri, X86::VPRORDZ128mi, 0 },
{ X86::VPRORQZ128ri, X86::VPRORQZ128mi, 0 },
{ X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
{ X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
{ X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
@ -1159,6 +1307,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 },
{ X86::VRCP14PDZ128r, X86::VRCP14PDZ128m, 0 },
{ X86::VRCP14PSZ128r, X86::VRCP14PSZ128m, 0 },
{ X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0 },
{ X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0 },
{ X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0 },
{ X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0 },
{ X86::VRSQRT14PDZ128r, X86::VRSQRT14PDZ128m, 0 },
{ X86::VRSQRT14PSZ128r, X86::VRSQRT14PSZ128m, 0 },
{ X86::VSQRTPDZ128r, X86::VSQRTPDZ128m, 0 },
@ -1577,6 +1729,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCMPSDrr_Int, X86::VCMPSDrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSrr, X86::VCMPSSrm, 0 },
{ X86::VCMPSSrr_Int, X86::VCMPSSrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
{ X86::VCVTSD2SSrr_Int, X86::VCVTSD2SSrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::VCVTSS2SDrr_Int, X86::VCVTSS2SDrm_Int, TB_NO_REVERSE },
{ X86::VDIVPDrr, X86::VDIVPDrm, 0 },
{ X86::VDIVPSrr, X86::VDIVPSrm, 0 },
{ X86::VDIVSDrr, X86::VDIVSDrm, 0 },
@ -2003,18 +2159,45 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
{ X86::VANDPDZrr, X86::VANDPDZrm, 0 },
{ X86::VANDPSZrr, X86::VANDPSZrm, 0 },
{ X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
{ X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
{ X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
{ X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
{ X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 },
{ X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSI2SDZrr, X86::VCVTSI2SDZrm, 0 },
{ X86::VCVTSI2SDZrr_Int, X86::VCVTSI2SDZrm_Int, 0 },
{ X86::VCVTSI2SSZrr, X86::VCVTSI2SSZrm, 0 },
{ X86::VCVTSI2SSZrr_Int, X86::VCVTSI2SSZrm_Int, 0 },
{ X86::VCVTSI642SDZrr, X86::VCVTSI642SDZrm, 0 },
{ X86::VCVTSI642SDZrr_Int,X86::VCVTSI642SDZrm_Int, 0 },
{ X86::VCVTSI642SSZrr, X86::VCVTSI642SSZrm, 0 },
{ X86::VCVTSI642SSZrr_Int,X86::VCVTSI642SSZrm_Int, 0 },
{ X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 },
{ X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE },
{ X86::VCVTUSI2SDZrr, X86::VCVTUSI2SDZrm, 0 },
{ X86::VCVTUSI2SDZrr_Int, X86::VCVTUSI2SDZrm_Int, 0 },
{ X86::VCVTUSI2SSZrr, X86::VCVTUSI2SSZrm, 0 },
{ X86::VCVTUSI2SSZrr_Int, X86::VCVTUSI2SSZrm_Int, 0 },
{ X86::VCVTUSI642SDZrr, X86::VCVTUSI642SDZrm, 0 },
{ X86::VCVTUSI642SDZrr_Int,X86::VCVTUSI642SDZrm_Int,0 },
{ X86::VCVTUSI642SSZrr, X86::VCVTUSI642SSZrm, 0 },
{ X86::VCVTUSI642SSZrr_Int,X86::VCVTUSI642SSZrm_Int,0 },
{ X86::VDBPSADBWZrri, X86::VDBPSADBWZrmi, 0 },
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
{ X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
{ X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
{ X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
{ X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE },
{ X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE },
{ X86::VGETMANTSDZrri, X86::VGETMANTSDZrmi, TB_NO_REVERSE },
{ X86::VGETMANTSSZrri, X86::VGETMANTSSZrmi, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 },
{ X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 },
{ X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 },
@ -2071,6 +2254,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDQZrr, X86::VPANDQZrm, 0 },
{ X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
{ X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
{ X86::VPBLENDMBZrr, X86::VPBLENDMBZrm, 0 },
{ X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
{ X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
{ X86::VPBLENDMWZrr, X86::VPBLENDMWZrm, 0 },
{ X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
{ X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
{ X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
@ -2247,6 +2434,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
{ X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
{ X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
{ X86::VBLENDMPDZ128rr, X86::VBLENDMPDZ128rm, 0 },
{ X86::VBLENDMPDZ256rr, X86::VBLENDMPDZ256rm, 0 },
{ X86::VBLENDMPSZ128rr, X86::VBLENDMPSZ128rm, 0 },
{ X86::VBLENDMPSZ256rr, X86::VBLENDMPSZ256rm, 0 },
{ X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
{ X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
@ -2255,6 +2446,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
{ X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
{ X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
{ X86::VDBPSADBWZ128rri, X86::VDBPSADBWZ128rmi, 0 },
{ X86::VDBPSADBWZ256rri, X86::VDBPSADBWZ256rmi, 0 },
{ X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 },
{ X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 },
{ X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 },
@ -2321,6 +2514,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
{ X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
{ X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
{ X86::VPBLENDMBZ128rr, X86::VPBLENDMBZ128rm, 0 },
{ X86::VPBLENDMBZ256rr, X86::VPBLENDMBZ256rm, 0 },
{ X86::VPBLENDMDZ128rr, X86::VPBLENDMDZ128rm, 0 },
{ X86::VPBLENDMDZ256rr, X86::VPBLENDMDZ256rm, 0 },
{ X86::VPBLENDMQZ128rr, X86::VPBLENDMQZ128rm, 0 },
{ X86::VPBLENDMQZ256rr, X86::VPBLENDMQZ256rm, 0 },
{ X86::VPBLENDMWZ128rr, X86::VPBLENDMWZ128rm, 0 },
{ X86::VPBLENDMWZ256rr, X86::VPBLENDMWZ256rm, 0 },
{ X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
{ X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
{ X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
@ -2837,6 +3038,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 },
{ X86::VPMADD52HUQZr, X86::VPMADD52HUQZm, 0 },
{ X86::VPMADD52LUQZr, X86::VPMADD52LUQZm, 0 },
{ X86::VPSHLDVDZr, X86::VPSHLDVDZm, 0 },
{ X86::VPSHLDVQZr, X86::VPSHLDVQZm, 0 },
{ X86::VPSHLDVWZr, X86::VPSHLDVWZm, 0 },
{ X86::VPSHRDVDZr, X86::VPSHRDVDZm, 0 },
{ X86::VPSHRDVQZr, X86::VPSHRDVQZm, 0 },
{ X86::VPSHRDVWZr, X86::VPSHRDVWZm, 0 },
{ X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 },
{ X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 },
@ -2861,6 +3068,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 },
{ X86::VPMADD52HUQZ256r, X86::VPMADD52HUQZ256m, 0 },
{ X86::VPMADD52LUQZ256r, X86::VPMADD52LUQZ256m, 0 },
{ X86::VPSHLDVDZ256r, X86::VPSHLDVDZ256m, 0 },
{ X86::VPSHLDVQZ256r, X86::VPSHLDVQZ256m, 0 },
{ X86::VPSHLDVWZ256r, X86::VPSHLDVWZ256m, 0 },
{ X86::VPSHRDVDZ256r, X86::VPSHRDVDZ256m, 0 },
{ X86::VPSHRDVQZ256r, X86::VPSHRDVQZ256m, 0 },
{ X86::VPSHRDVWZ256r, X86::VPSHRDVWZ256m, 0 },
{ X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 },
{ X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 },
@ -2885,6 +3098,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 },
{ X86::VPMADD52HUQZ128r, X86::VPMADD52HUQZ128m, 0 },
{ X86::VPMADD52LUQZ128r, X86::VPMADD52LUQZ128m, 0 },
{ X86::VPSHLDVDZ128r, X86::VPSHLDVDZ128m, 0 },
{ X86::VPSHLDVQZ128r, X86::VPSHLDVQZ128m, 0 },
{ X86::VPSHLDVWZ128r, X86::VPSHLDVWZ128m, 0 },
{ X86::VPSHRDVDZ128r, X86::VPSHRDVDZ128m, 0 },
{ X86::VPSHRDVQZ128r, X86::VPSHRDVQZ128m, 0 },
{ X86::VPSHRDVWZ128r, X86::VPSHRDVWZ128m, 0 },
{ X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
{ X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },

View File

@ -1798,8 +1798,7 @@ entry:
define <2 x double> @test_mm_cvtu32_sd(<2 x double> %__A, i32 %__B) {
; X86-LABEL: test_mm_cvtu32_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0
; X86-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm_cvtu32_sd:
@ -1836,8 +1835,7 @@ entry:
define <4 x float> @test_mm_cvtu32_ss(<4 x float> %__A, i32 %__B) {
; X86-LABEL: test_mm_cvtu32_ss:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0
; X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm_cvtu32_ss:

View File

@ -217,6 +217,7 @@ top:
; Make sure we are making a smart choice regarding undef registers and
; hiding the false dependency behind a true dependency
; TODO: We shouldn't be folding the load here.
define double @truedeps(float %arg) {
top:
tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"()
@ -227,8 +228,8 @@ top:
%tmp1 = fpext float %arg to double
ret double %tmp1
;AVX-LABEL:@truedeps
;AVX-NOT: vxorps
;AVX: vcvtss2sd [[XMM0:%xmm[0-9]+]], [[XMM0]], {{%xmm[0-9]+}}
;AVX: vxorps [[XMM6:%xmm6]], [[XMM6]], [[XMM6]]
;AVX: vcvtss2sd {{.*}}, [[XMM6]], {{%xmm[0-9]+}}
}
; Make sure we are making a smart choice regarding undef registers and

View File

@ -78,8 +78,7 @@ define double @single_to_double_rm_optsize(float* %x) optsize {
;
; AVX-LABEL: single_to_double_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = load float, float* %x, align 4
@ -113,8 +112,7 @@ define float @double_to_single_rm_optsize(double* %x) optsize {
;
; AVX-LABEL: double_to_single_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = load double, double* %x, align 8

View File

@ -690,16 +690,10 @@ define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind {
; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: retl
;
; X86-AVX1-LABEL: test_mm_cvtsi32_ss:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX512-LABEL: test_mm_cvtsi32_ss:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
; X86-AVX512-NEXT: retl
; X86-AVX-LABEL: test_mm_cvtsi32_ss:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: test_mm_cvtsi32_ss:
; X64-SSE: # %bb.0:

View File

@ -1405,8 +1405,7 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
; X86-AVX-LABEL: test_mm_cvtsd_ss_load:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: vmovaps (%eax), %xmm1
; X86-AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
@ -1416,8 +1415,7 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
;
; X64-AVX-LABEL: test_mm_cvtsd_ss_load:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovaps (%rdi), %xmm1
; X64-AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0
; X64-AVX-NEXT: retq
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
@ -1445,16 +1443,10 @@ define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: retl
;
; X86-AVX1-LABEL: test_mm_cvtsi32_sd:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-AVX1-NEXT: retl
;
; X86-AVX512-LABEL: test_mm_cvtsi32_sd:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
; X86-AVX512-NEXT: retl
; X86-AVX-LABEL: test_mm_cvtsi32_sd:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: test_mm_cvtsi32_sd:
; X64-SSE: # %bb.0: