1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

- Add AVX form of all SSE2 logical instructions

- Add VEX encoding bits to x86 MRM0r-MRM7r

llvm-svn: 107238
This commit is contained in:
Bruno Cardoso Lopes 2010-06-30 01:58:37 +00:00
parent 00ee7ba38c
commit cfbebb3921
4 changed files with 379 additions and 22 deletions

View File

@ -2440,6 +2440,68 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw>;
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
VEX_4V;
defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
VEX_4V;
defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
VEX_4V;
defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
VEX_4V;
defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
VEX_4V;
defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
VEX_4V;
defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
VEX_4V;
defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
VEX_4V;
let isCommutable = 1 in {
defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 0>, VEX_4V;
defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 0>, VEX_4V;
defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 0>, VEX_4V;
}
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V;
def VPSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
}
def VPANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
VR128:$src2)))]>, VEX_4V;
def VPANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
(memopv2i64 addr:$src2))))]>,
VEX_4V;
}
}
let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;

View File

@ -60,6 +60,27 @@ public:
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
// 0-7 and the difference between the 2 groups is given by the REX prefix.
// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
// in 1's complement form, example:
//
// ModRM field => XMM9 => 1
// VEX.VVVV => XMM9 => ~9
//
// See table 4-35 of Intel AVX Programming Reference for details.
static unsigned char getVEXRegisterEncoding(const MCInst &MI,
unsigned OpNum) {
unsigned SrcReg = MI.getOperand(OpNum).getReg();
unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
SrcRegNum += 8;
// The registers represented through VEX_VVVV should
// be encoded in 1's complement form.
return (~SrcRegNum) & 0xf;
}
void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
OS << (char)C;
@ -134,7 +155,6 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
return new X86MCCodeEmitter(TM, Ctx, true);
}
/// isDisp8 - Return true if this signed displacement fits in a 8-bit
/// sign-extended field.
static bool isDisp8(int Value) {
@ -469,29 +489,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
// If the memory destination has been checked first,
// go back to the first operand
// CurOp and NumOps are equal when VEX_R represents a register used
// to index a memory destination (which is the last operand)
CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
// range 0-7 and the difference between the 2 groups is given by the
// REX prefix. In the VEX prefix, registers are seen sequencially
// from 0-15 and encoded in 1's complement form, example:
//
// ModRM field => XMM9 => 1
// VEX.VVVV => XMM9 => ~9
//
// See table 4-35 of Intel AVX Programming Reference for details.
if (HasVEX_4V) {
unsigned SrcReg = MI.getOperand(CurOp).getReg();
unsigned SrcRegNum = GetX86RegNum(MI.getOperand(1));
if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
SrcRegNum += 8;
// The registers represented through VEX_VVVV should
// be encoded in 1's complement form.
VEX_4V = (~SrcRegNum) & 0xf;
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
CurOp++;
}
@ -505,7 +508,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
}
break;
default:
default: // MRM0r-MRM7r
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
CurOp++;
for (; CurOp != NumOps; ++CurOp) {
const MCOperand &MO = MI.getOperand(CurOp);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_B = 0x0;
}
break;
assert(0 && "Not implemented!");
}
@ -831,6 +844,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
CurOp++;
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp++),
(TSFlags & X86II::FormMask)-X86II::MRM0r,

View File

@ -11182,3 +11182,143 @@
// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
vpsadbw (%eax), %xmm2, %xmm3
// CHECK: vpsllw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
vpsllw %xmm1, %xmm2, %xmm3
// CHECK: vpsllw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
vpsllw (%eax), %xmm2, %xmm3
// CHECK: vpslld %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
vpslld %xmm1, %xmm2, %xmm3
// CHECK: vpslld (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
vpslld (%eax), %xmm2, %xmm3
// CHECK: vpsllq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
vpsllq %xmm1, %xmm2, %xmm3
// CHECK: vpsllq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
vpsllq (%eax), %xmm2, %xmm3
// CHECK: vpsraw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
vpsraw %xmm1, %xmm2, %xmm3
// CHECK: vpsraw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
vpsraw (%eax), %xmm2, %xmm3
// CHECK: vpsrad %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
vpsrad %xmm1, %xmm2, %xmm3
// CHECK: vpsrad (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
vpsrad (%eax), %xmm2, %xmm3
// CHECK: vpsrlw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
vpsrlw %xmm1, %xmm2, %xmm3
// CHECK: vpsrlw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
vpsrlw (%eax), %xmm2, %xmm3
// CHECK: vpsrld %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
vpsrld %xmm1, %xmm2, %xmm3
// CHECK: vpsrld (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
vpsrld (%eax), %xmm2, %xmm3
// CHECK: vpsrlq %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
vpsrlq %xmm1, %xmm2, %xmm3
// CHECK: vpsrlq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
vpsrlq (%eax), %xmm2, %xmm3
// CHECK: vpslld $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
vpslld $10, %xmm2, %xmm3
// CHECK: vpslldq $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
vpslldq $10, %xmm2, %xmm3
// CHECK: vpsllq $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
vpsllq $10, %xmm2, %xmm3
// CHECK: vpsllw $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
vpsllw $10, %xmm2, %xmm3
// CHECK: vpsrad $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
vpsrad $10, %xmm2, %xmm3
// CHECK: vpsraw $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
vpsraw $10, %xmm2, %xmm3
// CHECK: vpsrld $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
vpsrld $10, %xmm2, %xmm3
// CHECK: vpsrldq $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
vpsrldq $10, %xmm2, %xmm3
// CHECK: vpsrlq $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
vpsrlq $10, %xmm2, %xmm3
// CHECK: vpsrlw $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
vpsrlw $10, %xmm2, %xmm3
// CHECK: vpslld $10, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
vpslld $10, %xmm2, %xmm3
// CHECK: vpand %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
vpand %xmm1, %xmm2, %xmm3
// CHECK: vpand (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
vpand (%eax), %xmm2, %xmm3
// CHECK: vpor %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
vpor %xmm1, %xmm2, %xmm3
// CHECK: vpor (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
vpor (%eax), %xmm2, %xmm3
// CHECK: vpxor %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
vpxor %xmm1, %xmm2, %xmm3
// CHECK: vpxor (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
vpxor (%eax), %xmm2, %xmm3
// CHECK: vpandn %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
vpandn %xmm1, %xmm2, %xmm3
// CHECK: vpandn (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
vpandn (%eax), %xmm2, %xmm3

View File

@ -1222,3 +1222,143 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
vpsadbw (%rax), %xmm12, %xmm13
// CHECK: vpsllw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
vpsllw %xmm11, %xmm12, %xmm13
// CHECK: vpsllw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
vpsllw (%rax), %xmm12, %xmm13
// CHECK: vpslld %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
vpslld %xmm11, %xmm12, %xmm13
// CHECK: vpslld (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
vpslld (%rax), %xmm12, %xmm13
// CHECK: vpsllq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
vpsllq %xmm11, %xmm12, %xmm13
// CHECK: vpsllq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
vpsllq (%rax), %xmm12, %xmm13
// CHECK: vpsraw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
vpsraw %xmm11, %xmm12, %xmm13
// CHECK: vpsraw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
vpsraw (%rax), %xmm12, %xmm13
// CHECK: vpsrad %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
vpsrad %xmm11, %xmm12, %xmm13
// CHECK: vpsrad (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
vpsrad (%rax), %xmm12, %xmm13
// CHECK: vpsrlw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
vpsrlw %xmm11, %xmm12, %xmm13
// CHECK: vpsrlw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
vpsrlw (%rax), %xmm12, %xmm13
// CHECK: vpsrld %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
vpsrld %xmm11, %xmm12, %xmm13
// CHECK: vpsrld (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
vpsrld (%rax), %xmm12, %xmm13
// CHECK: vpsrlq %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
vpsrlq %xmm11, %xmm12, %xmm13
// CHECK: vpsrlq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
vpsrlq (%rax), %xmm12, %xmm13
// CHECK: vpslld $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
vpslld $10, %xmm12, %xmm13
// CHECK: vpslldq $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
vpslldq $10, %xmm12, %xmm13
// CHECK: vpsllq $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
vpsllq $10, %xmm12, %xmm13
// CHECK: vpsllw $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
vpsllw $10, %xmm12, %xmm13
// CHECK: vpsrad $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
vpsrad $10, %xmm12, %xmm13
// CHECK: vpsraw $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
vpsraw $10, %xmm12, %xmm13
// CHECK: vpsrld $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
vpsrld $10, %xmm12, %xmm13
// CHECK: vpsrldq $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
vpsrldq $10, %xmm12, %xmm13
// CHECK: vpsrlq $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
vpsrlq $10, %xmm12, %xmm13
// CHECK: vpsrlw $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
vpsrlw $10, %xmm12, %xmm13
// CHECK: vpslld $10, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
vpslld $10, %xmm12, %xmm13
// CHECK: vpand %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
vpand %xmm11, %xmm12, %xmm13
// CHECK: vpand (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
vpand (%rax), %xmm12, %xmm13
// CHECK: vpor %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
vpor %xmm11, %xmm12, %xmm13
// CHECK: vpor (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
vpor (%rax), %xmm12, %xmm13
// CHECK: vpxor %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
vpxor %xmm11, %xmm12, %xmm13
// CHECK: vpxor (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xef,0x28]
vpxor (%rax), %xmm12, %xmm13
// CHECK: vpandn %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
vpandn %xmm11, %xmm12, %xmm13
// CHECK: vpandn (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
vpandn (%rax), %xmm12, %xmm13