mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
This patch contains support for encoding FMA4 instructions and
tablegen patterns for scalar FMA4 operations and intrinsic. Also add tests for vfmaddsd. Patch by Jan Sjodin llvm-svn: 145133
This commit is contained in:
parent
48face75c4
commit
626d04cc6f
@ -1821,6 +1821,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FMA4
|
||||
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX
|
||||
|
||||
|
@ -418,7 +418,12 @@ namespace X86II {
|
||||
/// storing a classifier in the imm8 field. To simplify our implementation,
|
||||
/// we handle this by storeing the classifier in the opcode field and using
|
||||
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
|
||||
Has3DNow0F0FOpcode = 1U << 7
|
||||
Has3DNow0F0FOpcode = 1U << 7,
|
||||
|
||||
/// XOP_W - Same bit as VEX_W. Used to indicate swapping of
|
||||
/// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
|
||||
/// for FMA4 and XOP instructions.
|
||||
XOP_W = 1U << 8
|
||||
};
|
||||
|
||||
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
|
||||
@ -488,9 +493,12 @@ namespace X86II {
|
||||
return 0;
|
||||
case X86II::MRMSrcMem: {
|
||||
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
|
||||
bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
|
||||
unsigned FirstMemOp = 1;
|
||||
if (HasVEX_4V)
|
||||
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
|
||||
if (HasXOP_W)
|
||||
++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
|
||||
|
||||
// FIXME: Maybe lea should have its own form? This is a horrible hack.
|
||||
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
|
||||
|
@ -415,6 +415,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
// opcode extension, or ignored, depending on the opcode byte)
|
||||
unsigned char VEX_W = 0;
|
||||
|
||||
// XOP_W: opcode specific, same bit as VEX_W, but used to
|
||||
// swap operand 3 and 4 for FMA4 and XOP instructions
|
||||
unsigned char XOP_W = 0;
|
||||
|
||||
// VEX_5M (VEX m-mmmmm field):
|
||||
//
|
||||
// 0b00000: Reserved for future use
|
||||
@ -453,6 +457,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
|
||||
VEX_W = 1;
|
||||
|
||||
if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
|
||||
XOP_W = 1;
|
||||
|
||||
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
|
||||
VEX_L = 1;
|
||||
|
||||
@ -529,6 +536,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
// src1(ModR/M), MemAddr, imm8
|
||||
// src1(ModR/M), MemAddr, src2(VEX_I8IMM)
|
||||
//
|
||||
// FMA4:
|
||||
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
|
||||
// dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
|
||||
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
|
||||
VEX_R = 0x0;
|
||||
|
||||
@ -629,7 +639,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
||||
// 3 byte VEX prefix
|
||||
EmitByte(0xC4, CurByte, OS);
|
||||
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
|
||||
EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
|
||||
EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
|
||||
}
|
||||
|
||||
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
|
||||
@ -889,6 +899,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
// It uses the VEX.VVVV field?
|
||||
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
|
||||
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
|
||||
bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
|
||||
unsigned XOP_W_I8IMMOperand = 2;
|
||||
|
||||
// Determine where the memory operand starts, if present.
|
||||
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
|
||||
@ -961,6 +973,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
|
||||
SrcRegNum++;
|
||||
|
||||
// GAS sets the XOP_W even with register operands, we want to match this.
|
||||
// XOP_W is ignored, so there is no swapping of the operands
|
||||
XOP_W_I8IMMOperand = 3;
|
||||
|
||||
EmitRegModRMByte(MI.getOperand(SrcRegNum),
|
||||
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
|
||||
CurOp = SrcRegNum + 1;
|
||||
@ -975,14 +991,20 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
++AddrOperands;
|
||||
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
|
||||
}
|
||||
if(HasXOP_W) // Skip second register source (encoded in I8IMM)
|
||||
++FirstMemOp;
|
||||
|
||||
EmitByte(BaseOpcode, CurByte, OS);
|
||||
|
||||
EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
|
||||
TSFlags, CurByte, OS, Fixups);
|
||||
CurOp += AddrOperands + 1;
|
||||
if (HasVEX_4VOp3)
|
||||
++CurOp;
|
||||
if(HasXOP_W) {
|
||||
CurOp = NumOps - 1; // We have consumed all except one operand (third)
|
||||
} else {
|
||||
CurOp += AddrOperands + 1;
|
||||
if (HasVEX_4VOp3)
|
||||
++CurOp;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1064,7 +1086,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
// The last source register of a 4 operand instruction in AVX is encoded
|
||||
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
|
||||
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
|
||||
const MCOperand &MO = MI.getOperand(CurOp++);
|
||||
const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
|
||||
: CurOp);
|
||||
CurOp++;
|
||||
bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
|
||||
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
|
||||
RegNum |= GetX86RegNum(MO) << 4;
|
||||
|
@ -58,3 +58,42 @@ let isAsmParserOnly = 1 in {
|
||||
defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
|
||||
defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
multiclass fma4s<bits<8> opc, string OpcodeStr> {
|
||||
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
|
||||
[]>, XOP_W;
|
||||
def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_W;
|
||||
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>;
|
||||
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
|
||||
}
|
||||
|
||||
// FMA4 Intrinsics patterns
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
@ -118,7 +118,7 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
|
||||
class VEX_L { bit hasVEX_L = 1; }
|
||||
class VEX_LIG { bit ignoresVEX_L = 1; }
|
||||
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
|
||||
|
||||
class XOP_W { bit hasXOP_WPrefix = 1; }
|
||||
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
string AsmStr, Domain d = GenericDomain>
|
||||
: Instruction {
|
||||
@ -158,6 +158,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
|
||||
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
|
||||
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
|
||||
bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands
|
||||
|
||||
// TSFlags layout should be kept in sync with X86InstrInfo.h.
|
||||
let TSFlags{5-0} = FormBits;
|
||||
@ -179,6 +180,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
let TSFlags{38} = hasVEX_L;
|
||||
let TSFlags{39} = ignoresVEX_L;
|
||||
let TSFlags{40} = has3DNow0F0FOpcode;
|
||||
let TSFlags{41} = hasXOP_WPrefix;
|
||||
}
|
||||
|
||||
class PseudoI<dag oops, dag iops, list<dag> pattern>
|
||||
@ -496,6 +498,12 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
|
||||
OpSize, VEX_4V, Requires<[HasFMA3]>;
|
||||
|
||||
// FMA4 Instruction Templates
|
||||
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
|
||||
OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
|
||||
|
||||
// X86-64 Instruction templates...
|
||||
//
|
||||
|
||||
|
9
test/CodeGen/X86/fma4-intrinsics-x86_64.ll
Normal file
9
test/CodeGen/X86/fma4-intrinsics-x86_64.ll
Normal file
@ -0,0 +1,9 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=fma4 | FileCheck %s
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmaddsd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
13
test/MC/X86/x86_64-fma4-encoding.s
Normal file
13
test/MC/X86/x86_64-fma4-encoding.s
Normal file
@ -0,0 +1,13 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
|
||||
vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
|
||||
vfmaddsd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
|
||||
vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
Loading…
Reference in New Issue
Block a user