1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[X86] Emit PACKUS directly from the v16i8 LowerMULH code instead of using a shuffle.

llvm-svn: 347967
This commit is contained in:
Craig Topper 2018-11-30 08:32:05 +00:00
parent 44b82964e7
commit af3d0bf577
3 changed files with 3 additions and 10 deletions

View File

@ -23766,12 +23766,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG);
// Bitcast back to VT and then pack all the even elements from Lo and Hi.
// Shuffle lowering should turn this into PACKUS.
RLo = DAG.getBitcast(VT, RLo);
RHi = DAG.getBitcast(VT, RHi);
return DAG.getVectorShuffle(VT, dl, RLo, RHi,
{ 0, 2, 4, 6, 8, 10, 12, 14,
16, 18, 20, 22, 24, 26, 28, 30});
return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
}
SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {

View File

@ -3146,9 +3146,8 @@ define <16 x i8> @pr38658(<16 x i8> %x) {
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; XOP-NEXT: vpmovsxbw %xmm1, %xmm1
; XOP-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOP-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm2[1,3,5,7,9,11,13,15],xmm1[1,3,5,7,9,11,13,15]
; XOP-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm1
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0

View File

@ -719,9 +719,8 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; XOP-NEXT: vmovd %eax, %xmm1
; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1
; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOP-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15]
; XOP-NEXT: movl $249, %eax
; XOP-NEXT: vmovd %eax, %xmm2
; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1