mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[X86] Bring consistent naming to the SSE/AVX and AVX512 PALIGNR instructions. Then add shuffle decode printing for the EVEX forms which is made easier by having the naming structure more similar to other instructions.
llvm-svn: 272249
This commit is contained in:
parent
830df4dc0b
commit
a336600f12
@ -342,14 +342,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::PALIGNR128rr:
|
||||
case X86::VPALIGNR128rr:
|
||||
case X86::VPALIGNR256rr:
|
||||
CASE_SHUF(PALIGNR, rri)
|
||||
Src1Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PALIGNR128rm:
|
||||
case X86::VPALIGNR128rm:
|
||||
case X86::VPALIGNR256rm:
|
||||
CASE_SHUF(PALIGNR, rmi)
|
||||
Src2Name = getRegName(MI->getOperand(1).getReg());
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
if (MI->getOperand(NumOperands - 1).isImm())
|
||||
|
@ -7224,7 +7224,7 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
|
||||
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
|
||||
multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
|
||||
multiclass avx512_vpalignr_lowering<X86VectorVTInfo _ , list<Predicate> p>{
|
||||
let Predicates = p in
|
||||
def NAME#_.VTName#rri:
|
||||
Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
|
||||
@ -7232,18 +7232,18 @@ multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
|
||||
_.RC:$src1, _.RC:$src2, imm:$imm)>;
|
||||
}
|
||||
|
||||
multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
|
||||
avx512_vpalign_lowering<_.info512, [HasBWI]>,
|
||||
avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
|
||||
avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
|
||||
multiclass avx512_vpalignr_lowering_common<AVX512VLVectorVTInfo _>:
|
||||
avx512_vpalignr_lowering<_.info512, [HasBWI]>,
|
||||
avx512_vpalignr_lowering<_.info128, [HasBWI, HasVLX]>,
|
||||
avx512_vpalignr_lowering<_.info256, [HasBWI, HasVLX]>;
|
||||
|
||||
defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
|
||||
defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
|
||||
avx512vl_i8_info, avx512vl_i8_info>,
|
||||
avx512_vpalign_lowering_common<avx512vl_i16_info>,
|
||||
avx512_vpalign_lowering_common<avx512vl_i32_info>,
|
||||
avx512_vpalign_lowering_common<avx512vl_f32_info>,
|
||||
avx512_vpalign_lowering_common<avx512vl_i64_info>,
|
||||
avx512_vpalign_lowering_common<avx512vl_f64_info>,
|
||||
avx512_vpalignr_lowering_common<avx512vl_i16_info>,
|
||||
avx512_vpalignr_lowering_common<avx512vl_i32_info>,
|
||||
avx512_vpalignr_lowering_common<avx512vl_f32_info>,
|
||||
avx512_vpalignr_lowering_common<avx512vl_i64_info>,
|
||||
avx512_vpalignr_lowering_common<avx512vl_f64_info>,
|
||||
EVEX_CD8<8, CD8VF>;
|
||||
|
||||
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
|
||||
|
@ -1029,7 +1029,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
|
||||
{ X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
|
||||
{ X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
|
||||
{ X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 },
|
||||
{ X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 },
|
||||
{ X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
|
||||
{ X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
|
||||
{ X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
|
||||
@ -1326,7 +1326,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
|
||||
{ X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
|
||||
{ X86::VPADDWrr, X86::VPADDWrm, 0 },
|
||||
{ X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 },
|
||||
{ X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 },
|
||||
{ X86::VPANDNrr, X86::VPANDNrm, 0 },
|
||||
{ X86::VPANDrr, X86::VPANDrm, 0 },
|
||||
{ X86::VPAVGBrr, X86::VPAVGBrm, 0 },
|
||||
@ -1482,7 +1482,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
|
||||
{ X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
|
||||
{ X86::VPADDWYrr, X86::VPADDWYrm, 0 },
|
||||
{ X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 },
|
||||
{ X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 },
|
||||
{ X86::VPANDNYrr, X86::VPANDNYrm, 0 },
|
||||
{ X86::VPANDYrr, X86::VPANDYrm, 0 },
|
||||
{ X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
|
||||
|
@ -5674,7 +5674,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
|
||||
|
||||
multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
|
||||
let hasSideEffects = 0 in {
|
||||
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
|
||||
def rri : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
@ -5682,7 +5682,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>;
|
||||
let mayLoad = 1 in
|
||||
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
|
||||
def rmi : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
@ -5694,13 +5694,13 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
|
||||
|
||||
multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
|
||||
let hasSideEffects = 0 in {
|
||||
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
|
||||
def Yrri : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, Sched<[WriteShuffle]>;
|
||||
let mayLoad = 1 in
|
||||
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
|
||||
def Yrmi : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2, u8imm:$src3),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
@ -5709,43 +5709,43 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
|
||||
defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V;
|
||||
let Predicates = [HasAVX2] in
|
||||
defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
|
||||
defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
|
||||
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
|
||||
defm PALIGN : ssse3_palignr<"palignr">;
|
||||
defm PALIGNR : ssse3_palignr<"palignr">;
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
||||
def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
(VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
(VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
(VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
(VPALIGNRYrri VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
|
||||
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(VPALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSSE3] in {
|
||||
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
|
||||
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
(PALIGNRrri VR128:$src1, VR128:$src2, imm:$imm)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -105,7 +105,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,20,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,36,37,38,39,40,41,42,43,44,45,46,47,32,33,34,35,52,53,54,55,56,57,58,59,60,61,62,63,48,49,50,51]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -356,7 +356,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,20,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,36,37,38,39,40,41,42,43,44,45,46,47,32,33,34,35,52,53,54,55,56,57,58,59,60,61,62,63,48,49,50,51]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -841,7 +841,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,20,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,36,37,38,39,40,41,42,43,44,45,46,47,32,33,34,35,52,53,54,55,56,57,58,59,60,61,62,63,48,49,50,51]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -13,7 +13,7 @@ define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpalignr $2, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17,34,35,36,37,38,39,40,41,42,43,44,45,46,47,32,33,50,51,52,53,54,55,56,57,58,59,60,61,62,63,48,49]
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
@ -33,7 +33,7 @@ define <64 x i8> @shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpalignr $15, %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
|
||||
|
@ -41,7 +41,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
|
||||
; VL_BW_DQ-NEXT: movb $1, %al
|
||||
; VL_BW_DQ-NEXT: kmovb %eax, %k0
|
||||
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1
|
||||
; VL_BW_DQ-NEXT: vpalignr $8, %xmm0, %xmm1, %xmm0
|
||||
; VL_BW_DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
|
||||
; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
|
||||
|
Loading…
Reference in New Issue
Block a user