x86: Emit LOCK prefix after DATA16

Summary: x86 allows either ordering for the LOCK and DATA16 prefixes, but using GCC+GAS leads to different code generation than using LLVM. This change matches the order that GAS emits the x86 prefixes when a semicolon isn't used in inline assembly (see tc-i386.c comment before define LOCK_PREFIX), and helps simplify tooling that operates on the instruction's byte sequence (such as NaCl's validator). This change shouldn't have any performance impact. Test Plan: ninja check Reviewers: craig.topper, jvoung Subscribers: jfb, llvm-commits Differential Revision: http://reviews.llvm.org/D6630 llvm-svn: 224283
2024-11-23 19:23:23 +01:00 · 2014-12-15 22:34:58 +00:00 · 2014-12-15 22:34:58 +00:00 · 27a63b4d77
commit 27a63b4d77
parent a6e921963f
2 changed files with 18 additions and 16 deletions
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@ -590,6 +590,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                           int MemOperand, const MCInst &MI,
                                           const MCInstrDesc &Desc,
                                           raw_ostream &OS) const {
+  assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX.");
+
  uint64_t Encoding = TSFlags & X86II::EncodingMask;
  bool HasEVEX_K = TSFlags & X86II::EVEX_K;
  bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@ -1109,6 +1111,10 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                                         : X86II::OpSize16))
    EmitByte(0x66, CurByte, OS);

+  // Emit the LOCK opcode prefix.
+  if (TSFlags & X86II::LOCK)
+    EmitByte(0xF0, CurByte, OS);
+
  switch (TSFlags & X86II::OpPrefixMask) {
  case X86II::PD:   // 66
    EmitByte(0x66, CurByte, OS);
@ -1182,10 +1188,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
  if (MemoryOperand != -1) MemoryOperand += CurOp;

-  // Emit the lock opcode prefix as needed.
-  if (TSFlags & X86II::LOCK)
-    EmitByte(0xF0, CurByte, OS);
-
  // Emit segment override opcode prefix as needed.
  if (MemoryOperand >= 0)
    EmitSegmentOverridePrefix(CurByte, MemoryOperand+X86::AddrSegmentReg,
--- a/test/CodeGen/X86/atomic16.ll
+++ b/test/CodeGen/X86/atomic16.ll
@ -15,17 +15,17 @@ entry:
 ; X32:       incw
  %t2 = atomicrmw add  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       addw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       addw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       addw $3
  %t3 = atomicrmw add  i16* @sc16, i16 5 acquire
 ; X64:       lock
-; X64:       xaddw {{.*}} # encoding: [0xf0,0x66
+; X64:       xaddw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xaddw
  %t4 = atomicrmw add  i16* @sc16, i16 %t3 acquire
 ; X64:       lock
-; X64:       addw {{.*}} # encoding: [0xf0,0x66
+; X64:       addw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       addw
  ret void
@ -43,17 +43,17 @@ define void @atomic_fetch_sub16() nounwind {
 ; X32:       decw
  %t2 = atomicrmw sub  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       subw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       subw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       subw $3
  %t3 = atomicrmw sub  i16* @sc16, i16 5 acquire
 ; X64:       lock
-; X64:       xaddw {{.*}} # encoding: [0xf0,0x66
+; X64:       xaddw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xaddw
  %t4 = atomicrmw sub  i16* @sc16, i16 %t3 acquire
 ; X64:       lock
-; X64:       subw {{.*}} # encoding: [0xf0,0x66
+; X64:       subw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       subw
  ret void
@ -66,7 +66,7 @@ define void @atomic_fetch_and16() nounwind {
 ; X32-LABEL:   atomic_fetch_and16
  %t1 = atomicrmw and  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       andw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       andw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       andw $3
  %t2 = atomicrmw and  i16* @sc16, i16 5 acquire
@ -78,7 +78,7 @@ define void @atomic_fetch_and16() nounwind {
 ; X32:       cmpxchgw
  %t3 = atomicrmw and  i16* @sc16, i16 %t2 acquire
 ; X64:       lock
-; X64:       andw {{.*}} # encoding: [0xf0,0x66
+; X64:       andw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       andw
  ret void
@ -91,7 +91,7 @@ define void @atomic_fetch_or16() nounwind {
 ; X32-LABEL:   atomic_fetch_or16
  %t1 = atomicrmw or   i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       orw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       orw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       orw $3
  %t2 = atomicrmw or   i16* @sc16, i16 5 acquire
@ -103,7 +103,7 @@ define void @atomic_fetch_or16() nounwind {
 ; X32:       cmpxchgw
  %t3 = atomicrmw or   i16* @sc16, i16 %t2 acquire
 ; X64:       lock
-; X64:       orw {{.*}} # encoding: [0xf0,0x66
+; X64:       orw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       orw
  ret void
@ -116,7 +116,7 @@ define void @atomic_fetch_xor16() nounwind {
 ; X32-LABEL:   atomic_fetch_xor16
  %t1 = atomicrmw xor  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       xorw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       xorw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xorw $3
  %t2 = atomicrmw xor  i16* @sc16, i16 5 acquire
@ -128,7 +128,7 @@ define void @atomic_fetch_xor16() nounwind {
 ; X32:       cmpxchgw
  %t3 = atomicrmw xor  i16* @sc16, i16 %t2 acquire
 ; X64:       lock
-; X64:       xorw {{.*}} # encoding: [0xf0,0x66
+; X64:       xorw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xorw
  ret void