1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Add MOVSDrr->MOVLPDrm entry to load folding table. Add custom handling to turn UNPCKLPDrr->MOVHPDrm when load is under aligned.

If the load is aligned we can turn UNPCKLPDrr into UNPCKLPDrm.

llvm-svn: 365287
This commit is contained in:
Craig Topper 2019-07-08 02:10:20 +00:00
parent d42ee3cbb2
commit d6555d6106
3 changed files with 23 additions and 9 deletions

View File

@ -1365,6 +1365,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, TB_NO_REVERSE },
{ X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
{ X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
{ X86::MOVSDrr, X86::MOVLPDrm, TB_NO_REVERSE },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
@ -1979,6 +1980,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMOVDQU8Zrrkz, X86::VMOVDQU8Zrmkz, TB_NO_REVERSE },
{ X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
{ X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
{ X86::VMOVSDZrr, X86::VMOVLPDZ128rm, TB_NO_REVERSE },
{ X86::VMOVSDrr, X86::VMOVLPDrm, TB_NO_REVERSE },
{ X86::VMOVSHDUPZ128rrkz, X86::VMOVSHDUPZ128rmkz, 0 },
{ X86::VMOVSHDUPZ256rrkz, X86::VMOVSHDUPZ256rmkz, 0 },
{ X86::VMOVSHDUPZrrkz, X86::VMOVSHDUPZrmkz, 0 },

View File

@ -4603,7 +4603,22 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
}
}
break;
};
case X86::UNPCKLPDrr:
// If we won't be able to fold this to the memory form of UNPCKL, use
// MOVHPD instead. Done as custom because we can't have this in the load
// table twice.
if (OpNum == 2) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) {
MachineInstr *NewMI =
FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this, 8);
return NewMI;
}
}
break;
}
return nullptr;
}

View File

@ -2441,8 +2441,7 @@ define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
define <4 x float> @shuffle_mem_v4f32_0145(<4 x float> %a, <4 x float>* %pb) {
; SSE-LABEL: shuffle_mem_v4f32_0145:
; SSE: # %bb.0:
; SSE-NEXT: movups (%rdi), %xmm1
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_mem_v4f32_0145:
@ -2457,20 +2456,17 @@ define <4 x float> @shuffle_mem_v4f32_0145(<4 x float> %a, <4 x float>* %pb) {
define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
; SSE2-LABEL: shuffle_mem_v4f32_4523:
; SSE2: # %bb.0:
; SSE2-NEXT: movupd (%rdi), %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_mem_v4f32_4523:
; SSE3: # %bb.0:
; SSE3-NEXT: movupd (%rdi), %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_mem_v4f32_4523:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movupd (%rdi), %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_mem_v4f32_4523: