From aa8ac259e9a2e8570abc10b21154987e8013621f Mon Sep 17 00:00:00 2001 From: Joerg Sonnenberger Date: Fri, 18 Mar 2011 11:59:40 +0000 Subject: [PATCH] Support explicit argument forms for the X86 string instructions. For now, only the default segments are supported. llvm-svn: 127875 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 127 +++++++++++++++++++- test/MC/X86/x86-32.s | 110 ++++++++++++++++++ test/MC/X86/x86-64.s | 135 ++++++++++++++++++++++ 3 files changed, 371 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8fe549ba312..e0989b081e4 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -53,6 +53,14 @@ private: SmallVectorImpl &Operands, MCStreamer &Out); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) + /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. + bool isSrcOp(X86Operand &Op); + + /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode + /// or %es:(%edi) in 32bit mode. + bool isDstOp(X86Operand &Op); + /// @name Auto-generated Matcher Functions /// { @@ -356,6 +364,24 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. +bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { + unsigned basereg = Is64Bit ? X86::RSI : X86::ESI; + + return (Op.isMem() && + (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); +} + +bool X86ATTAsmParser::isDstOp(X86Operand &Op) { + unsigned basereg = Is64Bit ? X86::RDI : X86::EDI; + + return Op.isMem() && Op.Mem.SegReg == X86::ES && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; +} bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { @@ -788,7 +814,106 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } - + // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" + if (Name.startswith("ins") && Operands.size() == 3 && + (Name == "insb" || Name == "insw" || Name == "insl")) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + + // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" + if (Name.startswith("outs") && Operands.size() == 3 && + (Name == "outsb" || Name == "outsw" || Name == "outsl")) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + + // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" + if (Name.startswith("movs") && Operands.size() == 3 && + (Name == "movsb" || Name == "movsw" || Name == "movsl" || + (Is64Bit && Name == "movsq"))) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (isSrcOp(Op) && isDstOp(Op2)) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" + if (Name.startswith("lods") && Operands.size() == 3 && + (Name == "lods" || Name == "lodsb" || Name == "lodsw" || + Name == "lodsl" || (Is64Bit && Name == "lodsq"))) { + X86Operand *Op1 = static_cast(Operands[1]); + X86Operand *Op2 = static_cast(Operands[2]); + if (isSrcOp(*Op1) && Op2->isReg()) { + const char *ins; + unsigned reg = Op2->getReg(); + bool isLods = Name == "lods"; + if (reg == X86::AL && (isLods || Name == "lodsb")) + ins = "lodsb"; + else if (reg == X86::AX && (isLods || Name == "lodsw")) + ins = "lodsw"; + else if (reg == X86::EAX && (isLods || Name == "lodsl")) + ins = "lodsl"; + else if (reg == X86::RAX && (isLods || Name == "lodsq")) + ins = "lodsq"; + else + ins = NULL; + if (ins != NULL) { + Operands.pop_back(); + Operands.pop_back(); + delete Op1; + delete Op2; + if (Name != ins) + static_cast(Operands[0])->setTokenValue(ins); + } + } + } + // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" + if (Name.startswith("stos") && Operands.size() == 3 && + (Name == "stos" || Name == "stosb" || Name == "stosw" || + Name == "stosl" || (Is64Bit && Name == "stosq"))) { + X86Operand *Op1 = static_cast(Operands[1]); + X86Operand *Op2 = static_cast(Operands[2]); + if (isDstOp(*Op2) && Op1->isReg()) { + const char *ins; + unsigned reg = Op1->getReg(); + bool isStos = Name == "stos"; + if (reg == X86::AL && (isStos || Name == "stosb")) + ins = "stosb"; + else if (reg == X86::AX && (isStos || Name == "stosw")) + ins = "stosw"; + else if (reg == X86::EAX && (isStos || Name == "stosl")) + ins = "stosl"; + else if (reg == X86::RAX && (isStos || Name == "stosq")) + ins = "stosq"; + else + ins = NULL; + if (ins != NULL) { + Operands.pop_back(); + Operands.pop_back(); + delete Op1; + delete Op2; + if (Name != ins) + static_cast(Operands[0])->setTokenValue(ins); + } + } + } + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to // "shift ". if ((Name.startswith("shr") || Name.startswith("sar") || diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s index 9dba65298c1..ad9aee5ee59 100644 --- a/test/MC/X86/x86-32.s +++ b/test/MC/X86/x86-32.s @@ -817,6 +817,116 @@ pshufw $90, %mm4, %mm0 // CHECK: encoding: [0xe0,A] loopnz 0 +// CHECK: outsb # encoding: [0x6e] +// CHECK: outsb +// CHECK: outsb + outsb + outsb %ds:(%esi), %dx + outsb (%esi), %dx + +// CHECK: outsw # encoding: [0x66,0x6f] +// CHECK: outsw +// CHECK: outsw + outsw + outsw %ds:(%esi), %dx + outsw (%esi), %dx + +// CHECK: outsl # encoding: [0x6f] +// CHECK: outsl + outsl + outsl %ds:(%esi), %dx + outsl (%esi), %dx + +// CHECK: insb # encoding: [0x6c] +// CHECK: insb + insb + insb %dx, %es:(%edi) + +// CHECK: insw # encoding: [0x66,0x6d] +// CHECK: insw + insw + insw %dx, %es:(%edi) + +// CHECK: insl # encoding: [0x6d] +// CHECK: insl + insl + insl %dx, %es:(%edi) + +// CHECK: movsb # encoding: [0xa4] +// CHECK: movsb +// CHECK: movsb + movsb + movsb %ds:(%esi), %es:(%edi) + movsb (%esi), %es:(%edi) + +// CHECK: movsw # encoding: [0x66,0xa5] +// CHECK: movsw +// CHECK: movsw + movsw + movsw %ds:(%esi), %es:(%edi) + movsw (%esi), %es:(%edi) + +// CHECK: movsl # encoding: [0xa5] +// CHECK: movsl +// CHECK: movsl + movsl + movsl %ds:(%esi), %es:(%edi) + movsl (%esi), %es:(%edi) + +// CHECK: lodsb # encoding: [0xac] +// CHECK: lodsb +// CHECK: lodsb +// CHECK: lodsb +// CHECK: lodsb + lodsb + lodsb %ds:(%esi), %al + lodsb (%esi), %al + lods %ds:(%esi), %al + lods (%esi), %al + +// CHECK: lodsw # encoding: [0x66,0xad] +// CHECK: lodsw +// CHECK: lodsw +// CHECK: lodsw +// CHECK: lodsw + lodsw + lodsw %ds:(%esi), %ax + lodsw (%esi), %ax + lods %ds:(%esi), %ax + lods (%esi), %ax + +// CHECK: lodsl # encoding: [0xad] +// CHECK: lodsl +// CHECK: lodsl +// CHECK: lodsl +// CHECK: lodsl + lodsl + lodsl %ds:(%esi), %eax + lodsl (%esi), %eax + lods %ds:(%esi), %eax + lods (%esi), %eax + +// CHECK: stosb # encoding: [0xaa] +// CHECK: stosb +// CHECK: stosb + stosb + stosb %al, %es:(%edi) + stos %al, %es:(%edi) + +// CHECK: stosw # encoding: [0x66,0xab] +// CHECK: stosw +// CHECK: stosw + stosw + stosw %ax, %es:(%edi) + stos %ax, %es:(%edi) + +// CHECK: stosl # encoding: [0xab] +// CHECK: stosl +// CHECK: stosl + stosl + stosl %eax, %es:(%edi) + stos %eax, %es:(%edi) + // CHECK: strw // CHECK: encoding: [0x66,0x0f,0x00,0xc8] str %ax diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index fe056be012e..1d41d5b2d5f 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -974,6 +974,141 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1] // CHECK: encoding: [0xe0,A] loopnz 0 +// CHECK: outsb # encoding: [0x6e] +// CHECK: outsb +// CHECK: outsb + outsb + outsb %ds:(%rsi), %dx + outsb (%rsi), %dx + +// CHECK: outsw # encoding: [0x66,0x6f] +// CHECK: outsw +// CHECK: outsw + outsw + outsw %ds:(%rsi), %dx + outsw (%rsi), %dx + +// CHECK: outsl # encoding: [0x6f] +// CHECK: outsl + outsl + outsl %ds:(%rsi), %dx + outsl (%rsi), %dx + +// CHECK: insb # encoding: [0x6c] +// CHECK: insb + insb + insb %dx, %es:(%rdi) + +// CHECK: insw # encoding: [0x66,0x6d] +// CHECK: insw + insw + insw %dx, %es:(%rdi) + +// CHECK: insl # encoding: [0x6d] +// CHECK: insl + insl + insl %dx, %es:(%rdi) + +// CHECK: movsb # encoding: [0xa4] +// CHECK: movsb +// CHECK: movsb + movsb + movsb %ds:(%rsi), %es:(%rdi) + movsb (%rsi), %es:(%rdi) + +// CHECK: movsw # encoding: [0x66,0xa5] +// CHECK: movsw +// CHECK: movsw + movsw + movsw %ds:(%rsi), %es:(%rdi) + movsw (%rsi), %es:(%rdi) + +// CHECK: movsl # encoding: [0xa5] +// CHECK: movsl +// CHECK: movsl + movsl + movsl %ds:(%rsi), %es:(%rdi) + movsl (%rsi), %es:(%rdi) + +// CHECK: movsq # encoding: [0x48,0xa5] +// CHECK: movsq +// CHECK: movsq + movsq + movsq %ds:(%rsi), %es:(%rdi) + movsq (%rsi), %es:(%rdi) + +// CHECK: lodsb # encoding: [0xac] +// CHECK: lodsb +// CHECK: lodsb +// CHECK: lodsb +// CHECK: lodsb + lodsb + lodsb %ds:(%rsi), %al + lodsb (%rsi), %al + lods %ds:(%rsi), %al + lods (%rsi), %al + +// CHECK: lodsw # encoding: [0x66,0xad] +// CHECK: lodsw +// CHECK: lodsw +// CHECK: lodsw +// CHECK: lodsw + lodsw + lodsw %ds:(%rsi), %ax + lodsw (%rsi), %ax + lods %ds:(%rsi), %ax + lods (%rsi), %ax + +// CHECK: lodsl # encoding: [0xad] +// CHECK: lodsl +// CHECK: lodsl +// CHECK: lodsl +// CHECK: lodsl + lodsl + lodsl %ds:(%rsi), %eax + lodsl (%rsi), %eax + lods %ds:(%rsi), %eax + lods (%rsi), %eax + +// CHECK: lodsq # encoding: [0x48,0xad] +// CHECK: lodsq +// CHECK: lodsq +// CHECK: lodsq +// CHECK: lodsq + lodsq + lodsq %ds:(%rsi), %rax + lodsq (%rsi), %rax + lods %ds:(%rsi), %rax + lods (%rsi), %rax + +// CHECK: stosb # encoding: [0xaa] +// CHECK: stosb +// CHECK: stosb + stosb + stosb %al, %es:(%rdi) + stos %al, %es:(%rdi) + +// CHECK: stosw # encoding: [0x66,0xab] +// CHECK: stosw +// CHECK: stosw + stosw + stosw %ax, %es:(%rdi) + stos %ax, %es:(%rdi) + +// CHECK: stosl # encoding: [0xab] +// CHECK: stosl +// CHECK: stosl + stosl + stosl %eax, %es:(%rdi) + stos %eax, %es:(%rdi) + +// CHECK: stosq # encoding: [0x48,0xab] +// CHECK: stosq +// CHECK: stosq + stosq + stosq %rax, %es:(%rdi) + stos %rax, %es:(%rdi) + // CHECK: strw // CHECK: encoding: [0x66,0x0f,0x00,0xc8] str %ax