mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] For 32-bit targets, emit two-byte NOP when possible
In order to support hot-patching, we need to make sure the first emitted instruction in a function is a two-byte+ op. This is already the case on x86_64, which seems to always emit two-byte+ ops. However on 32-bit targets this wasn't the case. PATCHABLE_OP now lowers to a XCHG AX, AX, (66 90) like MSVC does. However when targetting pentium3 (/arch:SSE) or i386 (/arch:IA32) targets, we generate MOV EDI,EDI (8B FF) like MSVC does. This is for compatiblity reasons with older tools that rely on this two byte pattern. Differential Revision: https://reviews.llvm.org/D81301
This commit is contained in:
parent
f74b1eb181
commit
39f366394d
@ -1083,26 +1083,23 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
|
|||||||
/// target cpu. 15-bytes is the longest single NOP instruction, but some
|
/// target cpu. 15-bytes is the longest single NOP instruction, but some
|
||||||
/// platforms can't decode the longest forms efficiently.
|
/// platforms can't decode the longest forms efficiently.
|
||||||
static unsigned maxLongNopLength(const X86Subtarget *Subtarget) {
|
static unsigned maxLongNopLength(const X86Subtarget *Subtarget) {
|
||||||
uint64_t MaxNopLength = 10;
|
|
||||||
if (Subtarget->getFeatureBits()[X86::ProcIntelSLM])
|
if (Subtarget->getFeatureBits()[X86::ProcIntelSLM])
|
||||||
MaxNopLength = 7;
|
return 7;
|
||||||
else if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
|
if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
|
||||||
MaxNopLength = 15;
|
return 15;
|
||||||
else if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
|
if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
|
||||||
MaxNopLength = 11;
|
return 11;
|
||||||
return MaxNopLength;
|
if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit())
|
||||||
|
return 10;
|
||||||
|
if (Subtarget->is32Bit())
|
||||||
|
return 2;
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
|
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
|
||||||
/// bytes. Return the size of nop emitted.
|
/// bytes. Return the size of nop emitted.
|
||||||
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
|
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
|
||||||
const X86Subtarget *Subtarget) {
|
const X86Subtarget *Subtarget) {
|
||||||
if (!Subtarget->is64Bit()) {
|
|
||||||
// TODO Do additional checking if the CPU supports multi-byte nops.
|
|
||||||
OS.emitInstruction(MCInstBuilder(X86::NOOP), *Subtarget);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cap a single nop emission at the profitable value for the target
|
// Cap a single nop emission at the profitable value for the target
|
||||||
NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));
|
NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));
|
||||||
|
|
||||||
@ -1342,7 +1339,17 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
|
|||||||
CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
|
CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
|
||||||
|
|
||||||
if (Code.size() < MinSize) {
|
if (Code.size() < MinSize) {
|
||||||
if (MinSize == 2 && Opcode == X86::PUSH64r) {
|
if (MinSize == 2 && Subtarget->is32Bit() &&
|
||||||
|
Subtarget->isTargetWindowsMSVC() &&
|
||||||
|
(Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
|
||||||
|
// For compatibilty reasons, when targetting MSVC, is is important to
|
||||||
|
// generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
|
||||||
|
// rely specifically on this pattern to be able to patch a function.
|
||||||
|
// This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
|
||||||
|
OutStreamer->emitInstruction(
|
||||||
|
MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
|
||||||
|
*Subtarget);
|
||||||
|
} else if (MinSize == 2 && Opcode == X86::PUSH64r) {
|
||||||
// This is an optimization that lets us get away without emitting a nop in
|
// This is an optimization that lets us get away without emitting a nop in
|
||||||
// many cases.
|
// many cases.
|
||||||
//
|
//
|
||||||
|
@ -31,7 +31,7 @@ define void @f1() "patchable-function-entry"="1" {
|
|||||||
define void @f2() "patchable-function-entry"="2" {
|
define void @f2() "patchable-function-entry"="2" {
|
||||||
; CHECK-LABEL: f2:
|
; CHECK-LABEL: f2:
|
||||||
; CHECK-NEXT: .Lfunc_begin2:
|
; CHECK-NEXT: .Lfunc_begin2:
|
||||||
; 32-COUNT-2: nop
|
; 32: xchgw %ax, %ax
|
||||||
; 64: xchgw %ax, %ax
|
; 64: xchgw %ax, %ax
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; CHECK: .section __patchable_function_entries,"awo",@progbits,f2{{$}}
|
; CHECK: .section __patchable_function_entries,"awo",@progbits,f2{{$}}
|
||||||
@ -46,7 +46,8 @@ $f3 = comdat any
|
|||||||
define void @f3() "patchable-function-entry"="3" comdat {
|
define void @f3() "patchable-function-entry"="3" comdat {
|
||||||
; CHECK-LABEL: f3:
|
; CHECK-LABEL: f3:
|
||||||
; CHECK-NEXT: .Lfunc_begin3:
|
; CHECK-NEXT: .Lfunc_begin3:
|
||||||
; 32-COUNT-3: nop
|
; 32: xchgw %ax, %ax
|
||||||
|
; 32-NEXT: nop
|
||||||
; 64: nopl (%rax)
|
; 64: nopl (%rax)
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f3,comdat,f3{{$}}
|
; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f3,comdat,f3{{$}}
|
||||||
@ -61,7 +62,8 @@ $f5 = comdat any
|
|||||||
define void @f5() "patchable-function-entry"="5" comdat {
|
define void @f5() "patchable-function-entry"="5" comdat {
|
||||||
; CHECK-LABEL: f5:
|
; CHECK-LABEL: f5:
|
||||||
; CHECK-NEXT: .Lfunc_begin4:
|
; CHECK-NEXT: .Lfunc_begin4:
|
||||||
; 32-COUNT-5: nop
|
; 32-COUNT-2: xchgw %ax, %ax
|
||||||
|
; 32-NEXT: nop
|
||||||
; 64: nopl 8(%rax,%rax)
|
; 64: nopl 8(%rax,%rax)
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}}
|
; CHECK: .section __patchable_function_entries,"aGwo",@progbits,f5,comdat,f5{{$}}
|
||||||
|
@ -1,5 +1,14 @@
|
|||||||
; RUN: llc -verify-machineinstrs -filetype=obj -o - -mtriple=x86_64-apple-macosx < %s | llvm-objdump --triple=x86_64-apple-macosx -d - | FileCheck %s
|
; RUN: llc -verify-machineinstrs -filetype=obj -o - -mtriple=x86_64-apple-macosx < %s | llvm-objdump --triple=x86_64-apple-macosx -d - | FileCheck %s
|
||||||
; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx < %s | FileCheck %s --check-prefix=CHECK-ALIGN
|
; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx < %s | FileCheck %s --check-prefix=CHECK-ALIGN
|
||||||
|
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386 < %s | FileCheck %s --check-prefixes=32,32CFI,XCHG
|
||||||
|
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc < %s | FileCheck %s --check-prefixes=32,MOV
|
||||||
|
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium3 < %s | FileCheck %s --check-prefixes=32,MOV
|
||||||
|
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-windows-msvc -mcpu=pentium4 < %s | FileCheck %s --check-prefixes=32,XCHG
|
||||||
|
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=64
|
||||||
|
; RUN: llc -verify-machineinstrs -show-mc-encoding -mtriple=i386-unknown-linux-code16 < %s | FileCheck %s --check-prefix=16
|
||||||
|
|
||||||
|
; 16-NOT: movl %edi, %edi
|
||||||
|
; 16-NOT: xchgw %ax, %ax
|
||||||
|
|
||||||
declare void @callee(i64*)
|
declare void @callee(i64*)
|
||||||
|
|
||||||
@ -10,6 +19,18 @@ define void @f0() "patchable-function"="prologue-short-redirect" {
|
|||||||
; CHECK-ALIGN: .p2align 4, 0x90
|
; CHECK-ALIGN: .p2align 4, 0x90
|
||||||
; CHECK-ALIGN: _f0:
|
; CHECK-ALIGN: _f0:
|
||||||
|
|
||||||
|
; 32: f0:
|
||||||
|
; 32CFI-NEXT: .cfi_startproc
|
||||||
|
; 32-NEXT: # %bb.0:
|
||||||
|
; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
|
||||||
|
; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
|
||||||
|
; 32-NEXT: retl
|
||||||
|
|
||||||
|
; 64: f0:
|
||||||
|
; 64-NEXT: # %bb.0:
|
||||||
|
; 64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
|
||||||
|
; 64-NEXT: retq
|
||||||
|
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -19,6 +40,19 @@ define void @f1() "patchable-function"="prologue-short-redirect" "frame-pointer"
|
|||||||
|
|
||||||
; CHECK-ALIGN: .p2align 4, 0x90
|
; CHECK-ALIGN: .p2align 4, 0x90
|
||||||
; CHECK-ALIGN: _f1:
|
; CHECK-ALIGN: _f1:
|
||||||
|
|
||||||
|
; 32: f1:
|
||||||
|
; 32CFI-NEXT: .cfi_startproc
|
||||||
|
; 32-NEXT: # %bb.0:
|
||||||
|
; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
|
||||||
|
; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
|
||||||
|
; 32-NEXT: pushl %ebp
|
||||||
|
|
||||||
|
; 64: f1:
|
||||||
|
; 64-NEXT: .seh_proc f1
|
||||||
|
; 64-NEXT: # %bb.0:
|
||||||
|
; 64-NEXT: pushq %rbp
|
||||||
|
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,6 +62,19 @@ define void @f2() "patchable-function"="prologue-short-redirect" {
|
|||||||
|
|
||||||
; CHECK-ALIGN: .p2align 4, 0x90
|
; CHECK-ALIGN: .p2align 4, 0x90
|
||||||
; CHECK-ALIGN: _f2:
|
; CHECK-ALIGN: _f2:
|
||||||
|
|
||||||
|
; 32: f2:
|
||||||
|
; 32CFI-NEXT: .cfi_startproc
|
||||||
|
; 32-NEXT: # %bb.0:
|
||||||
|
; XCHG-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90]
|
||||||
|
; MOV-NEXT: movl %edi, %edi # encoding: [0x8b,0xff]
|
||||||
|
; 32-NEXT: pushl %ebp
|
||||||
|
|
||||||
|
; 64: f2:
|
||||||
|
; 64-NEXT: .seh_proc f2
|
||||||
|
; 64-NEXT: # %bb.0:
|
||||||
|
; 64-NEXT: subq $200, %rsp
|
||||||
|
|
||||||
%ptr = alloca i64, i32 20
|
%ptr = alloca i64, i32 20
|
||||||
call void @callee(i64* %ptr)
|
call void @callee(i64* %ptr)
|
||||||
ret void
|
ret void
|
||||||
@ -39,6 +86,19 @@ define void @f3() "patchable-function"="prologue-short-redirect" optsize {
|
|||||||
|
|
||||||
; CHECK-ALIGN: .p2align 4, 0x90
|
; CHECK-ALIGN: .p2align 4, 0x90
|
||||||
; CHECK-ALIGN: _f3:
|
; CHECK-ALIGN: _f3:
|
||||||
|
|
||||||
|
; 32: f3:
|
||||||
|
; 32CFI-NEXT: .cfi_startproc
|
||||||
|
; 32-NEXT: # %bb.0:
|
||||||
|
; XCHG-NEXT: xchgw %ax, %ax
|
||||||
|
; MOV-NEXT: movl %edi, %edi
|
||||||
|
; 32-NEXT: retl
|
||||||
|
|
||||||
|
; 64: f3:
|
||||||
|
; 64-NEXT: # %bb.0:
|
||||||
|
; 64-NEXT: xchgw %ax, %ax
|
||||||
|
; 64-NEXT: retq
|
||||||
|
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,6 +107,17 @@ define void @f3() "patchable-function"="prologue-short-redirect" optsize {
|
|||||||
; patchable one.
|
; patchable one.
|
||||||
; CHECK-LABEL: f4{{>?}}:
|
; CHECK-LABEL: f4{{>?}}:
|
||||||
; CHECK-NEXT: 8b 0c 37 movl (%rdi,%rsi), %ecx
|
; CHECK-NEXT: 8b 0c 37 movl (%rdi,%rsi), %ecx
|
||||||
|
; 32: f4:
|
||||||
|
; 32CFI-NEXT: .cfi_startproc
|
||||||
|
; 32-NEXT: # %bb.0:
|
||||||
|
; XCHG-NEXT: xchgw %ax, %ax
|
||||||
|
; MOV-NEXT: movl %edi, %edi
|
||||||
|
; 32-NEXT: pushl %ebx
|
||||||
|
|
||||||
|
; 64: f4:
|
||||||
|
; 64-NEXT: # %bb.0:
|
||||||
|
; 64-NOT: xchgw %ax, %ax
|
||||||
|
|
||||||
define i32 @f4(i8* %arg1, i64 %arg2, i32 %arg3) "patchable-function"="prologue-short-redirect" {
|
define i32 @f4(i8* %arg1, i64 %arg2, i32 %arg3) "patchable-function"="prologue-short-redirect" {
|
||||||
bb:
|
bb:
|
||||||
%tmp10 = getelementptr i8, i8* %arg1, i64 %arg2
|
%tmp10 = getelementptr i8, i8* %arg1, i64 %arg2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user