1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Add ST0 as an implicit def/use of x87 load/store instructions during FP stackifying.

These instructions implicitly operate on ST0, but we don't currently add that information to the MachineInstr. We also don't add it the tablegen definitions either.

For the most part this doesn't cause any problems because the stackifying occurs after register allocation. All the instructions are marked as having side effects so the postRA scheduler won't reorder them amongst themselves.

But nothing stops inline assembly using X87 instructions from being reordered around other x87 instructions if that inline assembly wasn't marked volatile.

The two test cases I've identified so far in PR40539 involve loads and stores used to set up the inline assembly or capture the results of the inline assembly ending up in the wrong order.

This patch adds implicit ST0 uses/defs to the load/store instructions to prevent this from happening.

I plan to fix all of the FP instructions, but the binops are bit trickier to get right. So I've chosen fixing the known test cases as a good first step.

I think we also need to update the tablegen descriptions so MS inline assembly infers the right clobbers, but I haven't checked that yet.

Differential Revision: https://reviews.llvm.org/D57644

llvm-svn: 353070
This commit is contained in:
Craig Topper 2019-02-04 18:43:55 +00:00
parent 896c7cc6eb
commit 2613e553a6
2 changed files with 9 additions and 8 deletions

View File

@ -1095,6 +1095,8 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
// Change from the pseudo instruction to the concrete instruction.
MI.RemoveOperand(0); // Remove the explicit ST(0) operand
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.addOperand(
MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
// Result gets pushed on the stack.
pushReg(DestReg);
@ -1139,6 +1141,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
// Convert from the pseudo instruction to the concrete instruction.
MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.addOperand(
MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||

View File

@ -1,9 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=pentium4 | FileCheck %s
; FIXME: The fstps in the following test case should be between the inline
; assembly expansion and the cmpeqss. The postRA scheduler has rearranged them.
@f1 = global float 1.000000e+00, align 4
define zeroext i1 @_Z9test_log2v() {
@ -12,13 +9,13 @@ define zeroext i1 @_Z9test_log2v() {
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: flds f1
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: #APP
; CHECK-NEXT: fld1
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fyl2x
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cmpeqss (%esp), %xmm0
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
@ -44,12 +41,12 @@ define zeroext i1 @_Z8test_cosv() {
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: #APP
; CHECK-NEXT: fcos
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: divss {{\.LCPI.*}}, %xmm0
; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: #APP
; CHECK-NEXT: fcos
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss %xmm0, %xmm1