1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

More AVX2 instructions and their intrinsics.

llvm-svn: 143895
This commit is contained in:
Craig Topper 2011-11-06 23:04:08 +00:00
parent c597902ecc
commit 01b852b95a
8 changed files with 152 additions and 19 deletions

View File

@ -1112,7 +1112,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vperm2f128_ps_256 : def int_x86_avx_vperm2f128_ps_256 :
GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">, GCCBuiltin<"_builtin_ia32_vperm2f128_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx_vperm2f128_si_256 : def int_x86_avx_vperm2f128_si_256 :
@ -1716,6 +1716,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
} }
// Vector permutation
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_permq : GCCBuiltin<"__builtin_ia32_permdi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_avx2_permpd : GCCBuiltin<"__builtin_ia32_permdf256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Misc. // Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,

View File

@ -773,17 +773,20 @@ static int getID(struct InternalInstruction* insn) {
if (insn->rexPrefix & 0x08) if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW; attrMask |= ATTR_REXW;
if (getIDWithAttrMask(&instructionID, insn, attrMask)) if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1; return -1;
/* The following clauses compensate for limitations of the tables. */ /* The following clauses compensate for limitations of the tables. */
if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
!(attrMask & ATTR_OPSIZE)) {
/* /*
* Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
* has precedence since there are no L-bit with W-bit entries in the tables. * has precedence since there are no L-bit with W-bit entries in the tables.
* So if the L-bit isn't significant we should use the W-bit instead. * So if the L-bit isn't significant we should use the W-bit instead.
* We only need to do this if the instruction doesn't specify OpSize since
* there is a VEX_L_W_OPSIZE table.
*/ */
const struct InstructionSpecifier *spec; const struct InstructionSpecifier *spec;

View File

@ -111,7 +111,8 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \
ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")
#define ENUM_ENTRY(n, r, d) n, #define ENUM_ENTRY(n, r, d) n,

View File

@ -460,7 +460,7 @@ class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern> list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
Requires<[HasAVX2]>; Requires<[HasAVX2]>;
class AVX2Ii8<bits<8> o, Format F, dag outs, dag ins, string asm, class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern> list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
Requires<[HasAVX2]>; Requires<[HasAVX2]>;

View File

@ -7310,14 +7310,17 @@ def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
// //
let neverHasSideEffects = 1 in {
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3), (ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V; []>, VEX_4V;
let mayLoad = 1 in
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3), (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V; []>, VEX_4V;
}
def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
@ -7402,18 +7405,18 @@ defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
// AVX2 Instructions // AVX2 Instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// AVX2I_binop_rmi_int - AVX2 binary operator with 8-bit immediate /// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
multiclass AVX2I_binop_rmi_int<bits<8> opc, string OpcodeStr, multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
X86MemOperand x86memop> { X86MemOperand x86memop> {
let isCommutable = 1 in let isCommutable = 1 in
def rri : AVX2Ii8<opc, MRMSrcReg, (outs RC:$dst), def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u32u8imm:$src3), (ins RC:$src1, RC:$src2, u32u8imm:$src3),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
VEX_4V; VEX_4V;
def rmi : AVX2Ii8<opc, MRMSrcMem, (outs RC:$dst), def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3), (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@ -7424,10 +7427,10 @@ multiclass AVX2I_binop_rmi_int<bits<8> opc, string OpcodeStr,
} }
let isCommutable = 0 in { let isCommutable = 0 in {
defm VPBLENDD : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
VR128, memopv16i8, i128mem>; VR128, memopv16i8, i128mem>;
defm VPBLENDDY : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
VR256, memopv32i8, i256mem>; VR256, memopv32i8, i256mem>;
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -7465,3 +7468,62 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
int_x86_avx2_pbroadcastq_128, int_x86_avx2_pbroadcastq_128,
int_x86_avx2_pbroadcastq_256>; int_x86_avx2_pbroadcastq_256>;
//===----------------------------------------------------------------------===//
// VPERM - Permute instructions
//
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
Intrinsic Int> {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>,
VEX_4V;
}
defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
Intrinsic Int> {
def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>,
VEX;
}
defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
VEX_W;
defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
def VPERM2I128rr : AVXAIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst,
(int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>,
VEX_4V;
def VPERM2I128rm : AVXAIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst,
(int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2),
imm:$src3))]>,
VEX_4V;

View File

@ -846,3 +846,43 @@ define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
ret <4 x i64> %res ret <4 x i64> %res
} }
declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: vpermd
%res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vpermps
%res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
define <4 x i64> @test_x86_avx2_permq(<4 x i64> %a0) {
; CHECK: vpermq
%res = call <4 x i64> @llvm.x86.avx2.permq(<4 x i64> %a0, i8 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.permq(<4 x i64>, i8) nounwind readonly
define <4 x double> @test_x86_avx2_permpd(<4 x double> %a0) {
; CHECK: vpermpd
%res = call <4 x double> @llvm.x86.avx2.permpd(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx2.permpd(<4 x double>, i8) nounwind readonly
define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK: vperm2i128
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly

View File

@ -95,7 +95,10 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_VEX_L: case IC_VEX_L:
case IC_VEX_L_XS: case IC_VEX_L_XS:
case IC_VEX_L_XD: case IC_VEX_L_XD:
return false;
case IC_VEX_L_OPSIZE: case IC_VEX_L_OPSIZE:
return inheritsFrom(child, IC_VEX_L_W_OPSIZE);
case IC_VEX_L_W_OPSIZE:
return false; return false;
default: default:
llvm_unreachable("Unknown instruction class"); llvm_unreachable("Unknown instruction class");
@ -494,7 +497,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
for (index = 0; index < 256; ++index) { for (index = 0; index < 256; ++index) {
o.indent(i * 2); o.indent(i * 2);
if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE)) if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
o << "IC_VEX_L_W_OPSIZE";
else if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
o << "IC_VEX_L_OPSIZE"; o << "IC_VEX_L_OPSIZE";
else if ((index & ATTR_VEXL) && (index & ATTR_XD)) else if ((index & ATTR_VEXL) && (index & ATTR_XD))
o << "IC_VEX_L_XD"; o << "IC_VEX_L_XD";

View File

@ -285,9 +285,12 @@ InstructionContext RecognizableInstr::insnContext() const {
InstructionContext insnContext; InstructionContext insnContext;
if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix|| HasVEXPrefix) { if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix|| HasVEXPrefix) {
if (HasVEX_LPrefix && HasVEX_WPrefix) if (HasVEX_LPrefix && HasVEX_WPrefix) {
llvm_unreachable("Don't support VEX.L and VEX.W together"); if (HasOpSizePrefix)
else if (HasOpSizePrefix && HasVEX_LPrefix) insnContext = IC_VEX_L_W_OPSIZE;
else
llvm_unreachable("Don't support VEX.L and VEX.W together");
} else if (HasOpSizePrefix && HasVEX_LPrefix)
insnContext = IC_VEX_L_OPSIZE; insnContext = IC_VEX_L_OPSIZE;
else if (HasOpSizePrefix && HasVEX_WPrefix) else if (HasOpSizePrefix && HasVEX_WPrefix)
insnContext = IC_VEX_W_OPSIZE; insnContext = IC_VEX_W_OPSIZE;