mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[X86] Recognise ROTLI/ROTRI rotations as faux shuffles
Allows us to combine rotations with shuffles. One of many things necessary to fix PR44379 (lowering shuffles to rotations)
This commit is contained in:
parent
f6fd87fc2b
commit
d1b038d961
@ -7443,6 +7443,24 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
case X86ISD::VROTLI:
|
||||||
|
case X86ISD::VROTRI: {
|
||||||
|
// We can only decode 'whole byte' bit rotates as shuffles.
|
||||||
|
uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt);
|
||||||
|
if ((RotateVal % 8) != 0)
|
||||||
|
return false;
|
||||||
|
Ops.push_back(N.getOperand(0));
|
||||||
|
int NumBytesPerElt = NumBitsPerElt / 8;
|
||||||
|
int Offset = RotateVal / 8;
|
||||||
|
Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset);
|
||||||
|
for (int i = 0; i != NumElts; ++i) {
|
||||||
|
int BaseIdx = i * NumBytesPerElt;
|
||||||
|
for (int j = 0; j != NumBytesPerElt; ++j) {
|
||||||
|
Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
case X86ISD::VBROADCAST: {
|
case X86ISD::VBROADCAST: {
|
||||||
SDValue Src = N.getOperand(0);
|
SDValue Src = N.getOperand(0);
|
||||||
MVT SrcVT = Src.getSimpleValueType();
|
MVT SrcVT = Src.getSimpleValueType();
|
||||||
|
@ -79,8 +79,7 @@ define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpcklwd(<16 x i16> %a
|
|||||||
define <16 x i8> @combine_shuffle_vrotri_v2i64(<2 x i64> %a0) {
|
define <16 x i8> @combine_shuffle_vrotri_v2i64(<2 x i64> %a0) {
|
||||||
; CHECK-LABEL: combine_shuffle_vrotri_v2i64:
|
; CHECK-LABEL: combine_shuffle_vrotri_v2i64:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vprorq $48, %xmm0, %xmm0
|
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[13,12,11,10,9,8,15,14,5,4,3,2,1,0,7,6]
|
||||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
|
||||||
; CHECK-NEXT: ret{{[l|q]}}
|
; CHECK-NEXT: ret{{[l|q]}}
|
||||||
%1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 48, i64 48>)
|
%1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 48, i64 48>)
|
||||||
%2 = bitcast <2 x i64> %1 to <16 x i8>
|
%2 = bitcast <2 x i64> %1 to <16 x i8>
|
||||||
@ -92,8 +91,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
|||||||
define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
|
define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
|
||||||
; CHECK-LABEL: combine_shuffle_vrotli_v4i32:
|
; CHECK-LABEL: combine_shuffle_vrotli_v4i32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vprold $8, %xmm0, %xmm0
|
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3]
|
||||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
|
||||||
; CHECK-NEXT: ret{{[l|q]}}
|
; CHECK-NEXT: ret{{[l|q]}}
|
||||||
%1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
|
%1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
|
||||||
%2 = bitcast <4 x i32> %1 to <16 x i8>
|
%2 = bitcast <4 x i32> %1 to <16 x i8>
|
||||||
|
@ -255,8 +255,7 @@ define <4 x i32> @combine_vpperm_10zz32BA(<4 x i32> %a0, <4 x i32> %a1) {
|
|||||||
define <16 x i8> @combine_shuffle_proti_v2i64(<2 x i64> %a0) {
|
define <16 x i8> @combine_shuffle_proti_v2i64(<2 x i64> %a0) {
|
||||||
; CHECK-LABEL: combine_shuffle_proti_v2i64:
|
; CHECK-LABEL: combine_shuffle_proti_v2i64:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vprotq $16, %xmm0, %xmm0
|
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[13,12,11,10,9,8,15,14,5,4,3,2,1,0,7,6]
|
||||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
|
||||||
; CHECK-NEXT: ret{{[l|q]}}
|
; CHECK-NEXT: ret{{[l|q]}}
|
||||||
%1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 48, i64 48>)
|
%1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a0, <2 x i64> %a0, <2 x i64> <i64 48, i64 48>)
|
||||||
%2 = bitcast <2 x i64> %1 to <16 x i8>
|
%2 = bitcast <2 x i64> %1 to <16 x i8>
|
||||||
@ -268,8 +267,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
|||||||
define <16 x i8> @combine_shuffle_proti_v4i32(<4 x i32> %a0) {
|
define <16 x i8> @combine_shuffle_proti_v4i32(<4 x i32> %a0) {
|
||||||
; CHECK-LABEL: combine_shuffle_proti_v4i32:
|
; CHECK-LABEL: combine_shuffle_proti_v4i32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vprotd $8, %xmm0, %xmm0
|
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3]
|
||||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
|
|
||||||
; CHECK-NEXT: ret{{[l|q]}}
|
; CHECK-NEXT: ret{{[l|q]}}
|
||||||
%1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
|
%1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> %a0, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
|
||||||
%2 = bitcast <4 x i32> %1 to <16 x i8>
|
%2 = bitcast <4 x i32> %1 to <16 x i8>
|
||||||
|
Loading…
Reference in New Issue
Block a user