1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86][SSE] Added missing PACKSS/PACKUS intrinsic schedules

Improves atom scheduler test coverage (to make it easier to upgrade them for PR32431).

Checked on Agner that these actually match the UNPACK schedules, but better to include a separate class

llvm-svn: 309701
This commit is contained in:
Simon Pilgrim 2017-08-01 16:47:48 +00:00
parent da3779ac8d
commit b385b3e7e3
4 changed files with 16 additions and 26 deletions

View File

@ -4249,8 +4249,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
Sched<[WriteShuffle]>;
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))],
IIC_SSE_PACK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@ -4259,8 +4259,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(OutVT (OpNode (ArgVT VR128:$src1),
(bitconvert (ld_frag addr:$src2)))))]>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
(bitconvert (ld_frag addr:$src2)))))],
IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
@ -4292,8 +4292,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
Sched<[WriteShuffle]>;
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))],
IIC_SSE_PACK>, Sched<[WriteShuffle]>;
def rm : SS48I<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@ -4302,8 +4302,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(OutVT (OpNode (ArgVT VR128:$src1),
(bitconvert (ld_frag addr:$src2)))))]>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
(bitconvert (ld_frag addr:$src2)))))],
IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,

View File

@ -299,6 +299,7 @@ def IIC_SSE_SHUFP : InstrItinClass;
def IIC_SSE_PSHUF_RI : InstrItinClass;
def IIC_SSE_PSHUF_MI : InstrItinClass;
def IIC_SSE_PACK : InstrItinClass;
def IIC_SSE_UNPCK : InstrItinClass;
def IIC_SSE_MOVMSK : InstrItinClass;

View File

@ -212,6 +212,7 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,

View File

@ -2816,12 +2816,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; ATOM-LABEL: test_packssdw:
; ATOM: # BB#0:
; ATOM-NEXT: packssdw %xmm1, %xmm0
; ATOM-NEXT: packssdw (%rdi), %xmm0
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
@ -2874,12 +2870,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ATOM-LABEL: test_packsswb:
; ATOM: # BB#0:
; ATOM-NEXT: packsswb %xmm1, %xmm0
; ATOM-NEXT: packsswb (%rdi), %xmm0
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
@ -2932,12 +2924,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; ATOM-LABEL: test_packuswb:
; ATOM: # BB#0:
; ATOM-NEXT: packuswb %xmm1, %xmm0
; ATOM-NEXT: packuswb (%rdi), %xmm0
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]