1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[X86] Disable the NOP padding for branches when bundle is enabled

When bundle is enabled, data fragment itself has a space to emit NOP
to bundle-align instructions. The behaviour makes it impossible for
us to determine whether the macro fusion really happen when emitting
instructions. In addition, boundary-align fragment is also used to
emit NOPs to align instructions, currently using them together sometimes
makes code crazy.

Differential Revision: https://reviews.llvm.org/D75346
This commit is contained in:
Shengchen Kan 2020-02-28 21:09:30 +08:00
parent b06ad89757
commit 616dd1c201
2 changed files with 24 additions and 3 deletions

View File

@ -364,10 +364,8 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
return false;
assert(allowAutoPadding() && "incorrect initialization!");
MCAssembler &Assembler = OS.getAssembler();
MCSection *Sec = OS.getCurrentSectionOnly();
// To be Done: Currently don't deal with Bundle cases.
if (Assembler.isBundlingEnabled() && Sec->isBundleLocked())
if (OS.getAssembler().isBundlingEnabled())
return false;
// Branches only need to be aligned in 32-bit or 64-bit mode.

View File

@ -0,0 +1,23 @@
# Check using option --x86-align-branch-boundary=16 --x86-align-branch=fused+jcc --mc-relax-all with bundle won't make code crazy
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --x86-align-branch-boundary=16 --x86-align-branch=fused+jcc --mc-relax-all %s | llvm-objdump -d - > %t1
# RUN: FileCheck --input-file=%t1 %s
# CHECK: 0000000000000000 foo:
# CHECK-NEXT: 0: 55 pushq %rbp
# CHECK-NEXT: 1: 48 f7 c2 02 00 00 00 testq $2, %rdx
# CHECK-NEXT: 8: 0f 85 f2 ff ff ff jne {{.*}}
# CHECK-NEXT: e: 90 nop
# CHECK-NEXT: f: 90 nop
# CHECK-NEXT: 10: 0f 8e ea ff ff ff jle {{.*}}
.text
.p2align 4
foo:
push %rbp
# Will be bundle-aligning to 8 byte boundaries
.bundle_align_mode 3
test $2, %rdx
jne foo
# This jle is 6 bytes long and should have started at 0xe, so two bytes
# of nop padding are inserted instead and it starts at 0x10
jle foo