mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86][SSE] Added missing PACKSS/PACKUS intrinsic schedules
Improves atom scheduler test coverage (to make it easier to upgrade them for PR32431). Checked on Agner that these actually match the UNPACK schedules, but better to include a separate class llvm-svn: 309701
This commit is contained in:
parent
da3779ac8d
commit
b385b3e7e3
@ -4249,8 +4249,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst,
|
||||
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))],
|
||||
IIC_SSE_PACK>, Sched<[WriteShuffle]>;
|
||||
def rm : PDI<opc, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
!if(Is2Addr,
|
||||
@ -4259,8 +4259,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst,
|
||||
(OutVT (OpNode (ArgVT VR128:$src1),
|
||||
(bitconvert (ld_frag addr:$src2)))))]>,
|
||||
Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
(bitconvert (ld_frag addr:$src2)))))],
|
||||
IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
|
||||
@ -4292,8 +4292,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst,
|
||||
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
|
||||
Sched<[WriteShuffle]>;
|
||||
(OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))],
|
||||
IIC_SSE_PACK>, Sched<[WriteShuffle]>;
|
||||
def rm : SS48I<opc, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
!if(Is2Addr,
|
||||
@ -4302,8 +4302,8 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst,
|
||||
(OutVT (OpNode (ArgVT VR128:$src1),
|
||||
(bitconvert (ld_frag addr:$src2)))))]>,
|
||||
Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
(bitconvert (ld_frag addr:$src2)))))],
|
||||
IIC_SSE_PACK>, Sched<[WriteShuffleLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
|
||||
|
@ -299,6 +299,7 @@ def IIC_SSE_SHUFP : InstrItinClass;
|
||||
def IIC_SSE_PSHUF_RI : InstrItinClass;
|
||||
def IIC_SSE_PSHUF_MI : InstrItinClass;
|
||||
|
||||
def IIC_SSE_PACK : InstrItinClass;
|
||||
def IIC_SSE_UNPCK : InstrItinClass;
|
||||
|
||||
def IIC_SSE_MOVMSK : InstrItinClass;
|
||||
|
@ -212,6 +212,7 @@ def AtomItineraries : ProcessorItineraries<
|
||||
InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
|
||||
InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
|
||||
|
||||
InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >,
|
||||
InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
|
||||
|
||||
InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,
|
||||
|
@ -2816,12 +2816,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||
;
|
||||
; ATOM-LABEL: test_packssdw:
|
||||
; ATOM: # BB#0:
|
||||
; ATOM-NEXT: packssdw %xmm1, %xmm0
|
||||
; ATOM-NEXT: packssdw (%rdi), %xmm0
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
@ -2874,12 +2870,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||
;
|
||||
; ATOM-LABEL: test_packsswb:
|
||||
; ATOM: # BB#0:
|
||||
; ATOM-NEXT: packsswb %xmm1, %xmm0
|
||||
; ATOM-NEXT: packsswb (%rdi), %xmm0
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
@ -2932,12 +2924,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||
;
|
||||
; ATOM-LABEL: test_packuswb:
|
||||
; ATOM: # BB#0:
|
||||
; ATOM-NEXT: packuswb %xmm1, %xmm0
|
||||
; ATOM-NEXT: packuswb (%rdi), %xmm0
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
|
Loading…
Reference in New Issue
Block a user