mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[X86][AVX] Attempt to lower v16i32/v16f32 shuffles with lowerShuffleAsRepeatedMaskAndLanePermute
Avoids prematurely creating permps/permd variable shuffles. Fixes PR46249
This commit is contained in:
parent
d1c97bc19b
commit
1b7526b7e8
@ -17151,6 +17151,12 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
|
||||
}
|
||||
|
||||
// Try to create an in-lane repeating shuffle mask and then shuffle the
|
||||
// results into the target lanes.
|
||||
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
|
||||
DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
// If we have a single input shuffle with different shuffle patterns in the
|
||||
// 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
|
||||
if (V2.isUndef() &&
|
||||
@ -17288,6 +17294,13 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
CastV1, CastV2, DAG);
|
||||
return DAG.getBitcast(MVT::v16i32, ShufPS);
|
||||
}
|
||||
|
||||
// Try to create an in-lane repeating shuffle mask and then shuffle the
|
||||
// results into the target lanes.
|
||||
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
|
||||
DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
// If we have AVX512F support, we can use VEXPAND.
|
||||
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2,
|
||||
DAG, Subtarget))
|
||||
@ -17296,6 +17309,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return Blend;
|
||||
|
||||
return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
|
||||
}
|
||||
|
||||
|
@ -264,8 +264,8 @@ define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08
|
||||
define <16 x i32> @shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x i32> %a) {
|
||||
; ALL-LABEL: shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4]
|
||||
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
||||
ret <16 x i32> %1
|
||||
@ -274,8 +274,8 @@ define <16 x i32> @shuffle_v16i32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_0
|
||||
define <16 x float> @shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04(<16 x float> %a) {
|
||||
; ALL-LABEL: shuffle_v16f32_0b_0a_09_08_0f_0e_0d_0c_03_02_01_00_07_06_05_04:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4]
|
||||
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3]
|
||||
; ALL-NEXT: retq
|
||||
%1 = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
||||
ret <16 x float> %1
|
||||
|
Loading…
Reference in New Issue
Block a user