From ee00ebbffc6f5e0ea25f6bff924eb6ff5099d38e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 14 Dec 2019 14:49:17 -0800 Subject: [PATCH] [LegalizeTypes] Teach BitcastToInt_ATOMIC_SWAP to only create FP16_TO_FP when called from PromoteFloatResult. There's also a call from SoftenFloatResult that should not be promoted. The change test case would fail with the new RUN line prior to this change. --- .../SelectionDAG/LegalizeFloatTypes.cpp | 13 +- test/CodeGen/X86/atomic32.ll | 175 ++++++++++++++++++ 2 files changed, 184 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 8c96dcf896c..1f053cea6e5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2360,7 +2360,6 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { EVT VT = N->getValueType(0); - EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); AtomicSDNode *AM = cast(N); SDLoc SL(N); @@ -2374,13 +2373,19 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { { AM->getChain(), AM->getBasePtr(), CastVal }, AM->getMemOperand()); - SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, - NewAtomic); + SDValue Result = NewAtomic; + + if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, + NewAtomic); + } + // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1)); - return ResultCast; + return Result; } diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll index 461e0b5beff..4fb03356f99 100644 --- a/test/CodeGen/X86/atomic32.ll +++ b/test/CodeGen/X86/atomic32.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s -check-prefixes=X64,X64-CMOV ; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s -check-prefixes=X86,X86-CMOV ; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV +; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse,-x87 -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOX87 @sc32 = external global i32 @fsc32 = external global float @@ -361,6 +362,48 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl +; +; X86-NOX87-LABEL: atomic_fetch_max32: +; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx +; X86-NOX87-NEXT: pushl %esi +; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOX87-NEXT: movl sc32, %ecx +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB6_1: # %atomicrmw.start +; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, %ecx +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NOX87-NEXT: subl %edx, %ecx +; X86-NOX87-NEXT: movl %eax, %esi +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jge .LBB6_4 +; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB6_4: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, %eax +; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jne .LBB6_2 +; X86-NOX87-NEXT: jmp .LBB6_1 +; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end +; X86-NOX87-NEXT: addl $20, %esp +; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx +; X86-NOX87-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void } @@ -456,6 +499,48 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl +; +; X86-NOX87-LABEL: atomic_fetch_min32: +; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx +; X86-NOX87-NEXT: pushl %esi +; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOX87-NEXT: movl sc32, %ecx +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB7_1: # %atomicrmw.start +; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, %ecx +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NOX87-NEXT: subl %edx, %ecx +; X86-NOX87-NEXT: movl %eax, %esi +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jle .LBB7_4 +; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB7_4: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, %eax +; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jne .LBB7_2 +; X86-NOX87-NEXT: jmp .LBB7_1 +; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end +; X86-NOX87-NEXT: addl $20, %esp +; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx +; X86-NOX87-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void } @@ -551,6 +636,48 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl +; +; X86-NOX87-LABEL: atomic_fetch_umax32: +; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx +; X86-NOX87-NEXT: pushl %esi +; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOX87-NEXT: movl sc32, %ecx +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB8_1: # %atomicrmw.start +; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, %ecx +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NOX87-NEXT: subl %edx, %ecx +; X86-NOX87-NEXT: movl %eax, %esi +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: ja .LBB8_4 +; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB8_4: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, %eax +; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jne .LBB8_2 +; X86-NOX87-NEXT: jmp .LBB8_1 +; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end +; X86-NOX87-NEXT: addl $20, %esp +; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx +; X86-NOX87-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void } @@ -646,6 +773,48 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: popl %esi ; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl +; +; X86-NOX87-LABEL: atomic_fetch_umin32: +; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: pushl %ebx +; X86-NOX87-NEXT: pushl %esi +; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOX87-NEXT: movl sc32, %ecx +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB9_1: # %atomicrmw.start +; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, %ecx +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NOX87-NEXT: subl %edx, %ecx +; X86-NOX87-NEXT: movl %eax, %esi +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jbe .LBB9_4 +; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: .LBB9_4: # %atomicrmw.start +; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOX87-NEXT: movl %ecx, %eax +; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 +; X86-NOX87-NEXT: sete %bl +; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: jne .LBB9_2 +; X86-NOX87-NEXT: jmp .LBB9_1 +; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end +; X86-NOX87-NEXT: addl $20, %esp +; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: popl %ebx +; X86-NOX87-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void } @@ -721,6 +890,12 @@ define void @atomic_fetch_swapf32(float %x) nounwind { ; X86-NOCMOV-NEXT: xchgl %eax, fsc32 ; X86-NOCMOV-NEXT: popl %eax ; X86-NOCMOV-NEXT: retl +; +; X86-NOX87-LABEL: atomic_fetch_swapf32: +; X86-NOX87: # %bb.0: +; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOX87-NEXT: xchgl %eax, fsc32 +; X86-NOX87-NEXT: retl %t1 = atomicrmw xchg float* @fsc32, float %x acquire ret void }