From 4f5992325a0a292b91c91e5d9c2f95bea7361699 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 20 Mar 2019 23:35:49 +0000 Subject: [PATCH] [X86] Add CMPXCHG8B feature flag. Set it for all CPUs except i386/i486 including 'generic'. Disable use of CMPXCHG8B when this flag isn't set. CMPXCHG8B was introduced on i586/pentium generation. If its not enabled, limit the atomic width to 32 bits so the AtomicExpandPass will expand to lib calls. Unclear if we should be using a different limit for other configs. The default is 1024 and experimentation shows that using an i256 atomic will cause a crash in SelectionDAG. Differential Revision: https://reviews.llvm.org/D59576 llvm-svn: 356631 --- lib/Support/Host.cpp | 1 + lib/Target/X86/X86.td | 107 +++-- lib/Target/X86/X86ISelLowering.cpp | 15 +- lib/Target/X86/X86InstrCompiler.td | 7 +- lib/Target/X86/X86InstrInfo.td | 3 +- lib/Target/X86/X86Subtarget.h | 4 + test/CodeGen/X86/atomic64.ll | 603 +++++++++++++++++++++++++++++ test/CodeGen/X86/cmpxchg8b.ll | 23 ++ 8 files changed, 717 insertions(+), 46 deletions(-) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 52e7080e744..69362704687 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1264,6 +1264,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + Features["cx8"] = (EDX >> 8) & 1; Features["cmov"] = (EDX >> 15) & 1; Features["mmx"] = (EDX >> 23) & 1; Features["fxsr"] = (EDX >> 24) & 1; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7deae9152f8..fa8dd8a59f0 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -39,6 +39,9 @@ def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; +def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true", + "Support CMPXCHG8B instructions">; + def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; @@ -471,6 +474,7 @@ include "X86SchedSkylakeServer.td" def ProcessorFeatures { // Nehalem list NHMInheritableFeatures = [FeatureX87, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE42, @@ -629,6 +633,7 @@ def ProcessorFeatures { // Atom list AtomInheritableFeatures = [FeatureX87, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSSE3, @@ -707,6 +712,7 @@ def ProcessorFeatures { // Knights Landing list KNLFeatures = [FeatureX87, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureFXSR, @@ -749,6 +755,7 @@ def ProcessorFeatures { // Bobcat list BtVer1InheritableFeatures = [FeatureX87, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSSE3, @@ -785,6 +792,7 @@ def ProcessorFeatures { // Bulldozer list BdVer1InheritableFeatures = [FeatureX87, + FeatureCMPXCHG8B, FeatureCMOV, FeatureXOP, Feature64Bit, @@ -883,23 +891,31 @@ def ProcessorFeatures { class Proc Features> : ProcessorModel; -def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; +// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled +// if i386/i486 is specifically requested. +def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B]>; def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; +def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B]>; +def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B]>; +def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B, FeatureMMX]>; -def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>; -def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, - FeatureNOPL]>; +def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureCMOV]>; +def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureCMOV, FeatureNOPL]>; -def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureCMOV, FeatureFXSR, FeatureNOPL]>; +def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX, FeatureCMOV, FeatureFXSR, + FeatureNOPL]>; foreach P = ["pentium3", "pentium3m"] in { - def : Proc; + def : Proc; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. @@ -913,13 +929,15 @@ foreach P = ["pentium3", "pentium3m"] in { // changes slightly. def : ProcessorModel<"pentium-m", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>; + [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, + FeatureCMOV]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcessorModel; + [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, + FeatureCMOV]>; } // Intel Quark. @@ -927,16 +945,19 @@ def : Proc<"lakemont", []>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR, FeatureNOPL, FeatureCMOV]>; + [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, + FeatureCMOV]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR, FeatureNOPL, FeatureCMOV]>; + [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, + FeatureCMOV]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE3, @@ -950,6 +971,7 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSSE3, @@ -963,6 +985,7 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureX87, FeatureSlowUAMem16, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE41, @@ -1033,36 +1056,41 @@ def : ProcessorModel<"icelake-server", SkylakeServerModel, // AMD CPUs. -def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; -def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; -def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; +def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX]>; +def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + Feature3DNow]>; +def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + Feature3DNow]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; + def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { - def : Proc; + def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { - def : Proc; + def : Proc; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { - def : Proc; + def : Proc; } foreach P = ["amdfam10", "barcelona"] in { - def : Proc; + def : Proc; } // Bobcat @@ -1082,13 +1110,15 @@ def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>; -def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; +def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; -def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE1, FeatureFXSR, FeatureCMOV]>; +def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, + FeatureMMX, FeatureSSE1, FeatureFXSR, + FeatureCMOV]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the @@ -1102,6 +1132,7 @@ def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, // forming a common base for them. def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureX87, + FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d871f0ce3b7..875fe4f4d0a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -158,6 +158,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setUseUnderscoreLongJmp(true); } + // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to + // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b. + // FIXME: Should we be limitting the atomic size on other configs? Default is + // 1024. + if (!Subtarget.hasCmpxchg8b()) + setMaxAtomicSizeInBitsSupported(32); + // Set up the register classes. addRegisterClass(MVT::i8, &X86::GR8RegClass); addRegisterClass(MVT::i16, &X86::GR16RegClass); @@ -25475,11 +25482,11 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) - return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b - else if (OpWidth == 128) + return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit(); + if (OpWidth == 128) return Subtarget.hasCmpxchg16b(); - else - return false; + + return false; } bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 16afcf6330b..4c06b176543 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -867,7 +867,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in { } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - SchedRW = [WriteCMPXCHGRMW] in { + Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in { defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>; } @@ -891,8 +891,9 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>; // the instruction and we are sure we will have a valid register to restore // the value of RBX. let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX], - SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, - Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in { + Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW], + isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst", + usesCustomInserter = 1 in { def LCMPXCHG8B_SAVE_EBX : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 278dba50cf4..67ceceb6698 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -880,6 +880,7 @@ def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">; def HasRDPID : Predicate<"Subtarget->hasRDPID()">; def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">; +def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, @@ -2073,7 +2074,7 @@ def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), - "cmpxchg8b\t$dst", []>, TB; + "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in // NOTE: In64BitMode check needed for the AssemblerPredicate. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index ceb1e86769b..8d330fa6f9a 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -88,6 +88,9 @@ protected: /// True if the processor supports X87 instructions. bool HasX87 = false; + /// True if the processor supports CMPXCHG8B. + bool HasCmpxchg8b = false; + /// True if this processor has NOPL instruction /// (generally pentium pro+). bool HasNOPL = false; @@ -546,6 +549,7 @@ public: void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasX87() const { return HasX87; } + bool hasCmpxchg8b() const { return HasCmpxchg8b; } bool hasNOPL() const { return HasNOPL; } // SSE codegen depends on cmovs, and all SSE1+ processors support them. // All 64-bit processors support cmov. diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll index ae39bd724db..11bd6e05558 100644 --- a/test/CodeGen/X86/atomic64.ll +++ b/test/CodeGen/X86/atomic64.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64 +; RUN: llc < %s -O0 -mtriple=i386-- -mcpu=i486 -verify-machineinstrs | FileCheck %s --check-prefix I486 @sc64 = external global i64 @fsc64 = external global double @@ -13,6 +14,52 @@ define void @atomic_fetch_add64() nounwind { ; X64-NEXT: lock xaddq %rax, {{.*}}(%rip) ; X64-NEXT: lock addq %rax, {{.*}}(%rip) ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_add64: +; I486: # %bb.0: # %entry +; I486-NEXT: pushl %esi +; I486-NEXT: subl $56, %esp +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %ecx +; I486-NEXT: movl $2, 12(%ecx) +; I486-NEXT: movl $0, 8(%ecx) +; I486-NEXT: movl $1, 4(%ecx) +; I486-NEXT: movl $sc64, (%ecx) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_add_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $3, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_add_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $5, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_add_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %edx, 8(%esi) +; I486-NEXT: movl %eax, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_add_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $56, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl entry: %t1 = atomicrmw add i64* @sc64, i64 1 acquire %t2 = atomicrmw add i64* @sc64, i64 3 acquire @@ -30,6 +77,52 @@ define void @atomic_fetch_sub64() nounwind { ; X64-NEXT: lock xaddq %rax, {{.*}}(%rip) ; X64-NEXT: lock subq %rax, {{.*}}(%rip) ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_sub64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $56, %esp +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %ecx +; I486-NEXT: movl $2, 12(%ecx) +; I486-NEXT: movl $0, 8(%ecx) +; I486-NEXT: movl $1, 4(%ecx) +; I486-NEXT: movl $sc64, (%ecx) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_sub_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $3, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_sub_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $5, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_sub_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %edx, 8(%esi) +; I486-NEXT: movl %eax, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_sub_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $56, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl %t1 = atomicrmw sub i64* @sc64, i64 1 acquire %t2 = atomicrmw sub i64* @sc64, i64 3 acquire %t3 = atomicrmw sub i64* @sc64, i64 5 acquire @@ -61,6 +154,42 @@ define void @atomic_fetch_and64() nounwind { ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: lock andq %rax, {{.*}}(%rip) ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_and64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $44, %esp +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %ecx +; I486-NEXT: movl $2, 12(%ecx) +; I486-NEXT: movl $0, 8(%ecx) +; I486-NEXT: movl $3, 4(%ecx) +; I486-NEXT: movl $sc64, (%ecx) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_and_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $5, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_and_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %edx, 8(%esi) +; I486-NEXT: movl %eax, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_and_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $44, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl %t1 = atomicrmw and i64* @sc64, i64 3 acquire %t2 = atomicrmw and i64* @sc64, i64 5 acquire %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire @@ -90,6 +219,42 @@ define void @atomic_fetch_or64() nounwind { ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: lock orq %rax, {{.*}}(%rip) ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_or64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $44, %esp +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %ecx +; I486-NEXT: movl $2, 12(%ecx) +; I486-NEXT: movl $0, 8(%ecx) +; I486-NEXT: movl $3, 4(%ecx) +; I486-NEXT: movl $sc64, (%ecx) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_or_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $5, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_or_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %edx, 8(%esi) +; I486-NEXT: movl %eax, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_or_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $44, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl %t1 = atomicrmw or i64* @sc64, i64 3 acquire %t2 = atomicrmw or i64* @sc64, i64 5 acquire %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire @@ -119,6 +284,42 @@ define void @atomic_fetch_xor64() nounwind { ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: lock xorq %rax, {{.*}}(%rip) ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_xor64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $44, %esp +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %ecx +; I486-NEXT: movl $2, 12(%ecx) +; I486-NEXT: movl $0, 8(%ecx) +; I486-NEXT: movl $3, 4(%ecx) +; I486-NEXT: movl $sc64, (%ecx) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_xor_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $0, 8(%esi) +; I486-NEXT: movl $5, 4(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_xor_8 +; I486-NEXT: leal sc64, %ecx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %edx, 8(%esi) +; I486-NEXT: movl %eax, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_xor_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $44, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl %t1 = atomicrmw xor i64* @sc64, i64 3 acquire %t2 = atomicrmw xor i64* @sc64, i64 5 acquire %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire @@ -146,6 +347,26 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { ; X64-NEXT: jmp .LBB5_1 ; X64-NEXT: .LBB5_2: # %atomicrmw.end ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_nand64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $28, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: leal sc64, %edx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %eax, 8(%esi) +; I486-NEXT: movl %ecx, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_fetch_nand_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $28, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl %t1 = atomicrmw nand i64* @sc64, i64 %x acquire ret void } @@ -172,6 +393,81 @@ define void @atomic_fetch_max64(i64 %x) nounwind { ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_max64: +; I486: # %bb.0: +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: pushl %ebx +; I486-NEXT: pushl %edi +; I486-NEXT: pushl %esi +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $80, %esp +; I486-NEXT: movl 12(%ebp), %eax +; I486-NEXT: movl 8(%ebp), %ecx +; I486-NEXT: movl sc64+4, %edx +; I486-NEXT: movl sc64, %esi +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jmp .LBB6_1 +; I486-NEXT: .LBB6_1: # %atomicrmw.start +; I486-NEXT: # =>This Inner Loop Header: Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl %ecx, %edx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: subl %esi, %edx +; I486-NEXT: movl %eax, %edi +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; I486-NEXT: sbbl %ebx, %edi +; I486-NEXT: movl %ecx, %esi +; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jge .LBB6_4 +; I486-NEXT: # %bb.3: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB6_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: .LBB6_4: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB6_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %edi +; I486-NEXT: movl %eax, 12(%edi) +; I486-NEXT: movl %ecx, 8(%edi) +; I486-NEXT: leal {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl %eax, 4(%edi) +; I486-NEXT: movl $2, 20(%edi) +; I486-NEXT: movl $2, 16(%edi) +; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx +; I486-NEXT: testb %al, %al +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: je .LBB6_1 +; I486-NEXT: jmp .LBB6_2 +; I486-NEXT: .LBB6_2: # %atomicrmw.end +; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: popl %esi +; I486-NEXT: popl %edi +; I486-NEXT: popl %ebx +; I486-NEXT: popl %ebp +; I486-NEXT: retl %t1 = atomicrmw max i64* @sc64, i64 %x acquire ret void @@ -199,6 +495,79 @@ define void @atomic_fetch_min64(i64 %x) nounwind { ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_min64: +; I486: # %bb.0: +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: pushl %ebx +; I486-NEXT: pushl %edi +; I486-NEXT: pushl %esi +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $80, %esp +; I486-NEXT: movl 12(%ebp), %eax +; I486-NEXT: movl 8(%ebp), %ecx +; I486-NEXT: movl sc64+4, %edx +; I486-NEXT: movl sc64, %esi +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jmp .LBB7_1 +; I486-NEXT: .LBB7_1: # %atomicrmw.start +; I486-NEXT: # =>This Inner Loop Header: Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: subl %ecx, %edx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: sbbl %eax, %esi +; I486-NEXT: movl %ecx, %edi +; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jge .LBB7_4 +; I486-NEXT: # %bb.3: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB7_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: .LBB7_4: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB7_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %edi +; I486-NEXT: movl %eax, 12(%edi) +; I486-NEXT: movl %ecx, 8(%edi) +; I486-NEXT: leal {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl %eax, 4(%edi) +; I486-NEXT: movl $2, 20(%edi) +; I486-NEXT: movl $2, 16(%edi) +; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx +; I486-NEXT: testb %al, %al +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: je .LBB7_1 +; I486-NEXT: jmp .LBB7_2 +; I486-NEXT: .LBB7_2: # %atomicrmw.end +; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: popl %esi +; I486-NEXT: popl %edi +; I486-NEXT: popl %ebx +; I486-NEXT: popl %ebp +; I486-NEXT: retl %t1 = atomicrmw min i64* @sc64, i64 %x acquire ret void @@ -226,6 +595,79 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_umax64: +; I486: # %bb.0: +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: pushl %ebx +; I486-NEXT: pushl %edi +; I486-NEXT: pushl %esi +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $80, %esp +; I486-NEXT: movl 12(%ebp), %eax +; I486-NEXT: movl 8(%ebp), %ecx +; I486-NEXT: movl sc64+4, %edx +; I486-NEXT: movl sc64, %esi +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jmp .LBB8_1 +; I486-NEXT: .LBB8_1: # %atomicrmw.start +; I486-NEXT: # =>This Inner Loop Header: Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: subl %ecx, %edx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: sbbl %eax, %esi +; I486-NEXT: movl %ecx, %edi +; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jb .LBB8_4 +; I486-NEXT: # %bb.3: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB8_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: .LBB8_4: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB8_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %edi +; I486-NEXT: movl %eax, 12(%edi) +; I486-NEXT: movl %ecx, 8(%edi) +; I486-NEXT: leal {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl %eax, 4(%edi) +; I486-NEXT: movl $2, 20(%edi) +; I486-NEXT: movl $2, 16(%edi) +; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx +; I486-NEXT: testb %al, %al +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: je .LBB8_1 +; I486-NEXT: jmp .LBB8_2 +; I486-NEXT: .LBB8_2: # %atomicrmw.end +; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: popl %esi +; I486-NEXT: popl %edi +; I486-NEXT: popl %ebx +; I486-NEXT: popl %ebp +; I486-NEXT: retl %t1 = atomicrmw umax i64* @sc64, i64 %x acquire ret void @@ -253,6 +695,79 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_umin64: +; I486: # %bb.0: +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: pushl %ebx +; I486-NEXT: pushl %edi +; I486-NEXT: pushl %esi +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $80, %esp +; I486-NEXT: movl 12(%ebp), %eax +; I486-NEXT: movl 8(%ebp), %ecx +; I486-NEXT: movl sc64+4, %edx +; I486-NEXT: movl sc64, %esi +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jmp .LBB9_1 +; I486-NEXT: .LBB9_1: # %atomicrmw.start +; I486-NEXT: # =>This Inner Loop Header: Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: subl %ecx, %edx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: sbbl %eax, %esi +; I486-NEXT: movl %ecx, %edi +; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: jae .LBB9_4 +; I486-NEXT: # %bb.3: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB9_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: .LBB9_4: # %atomicrmw.start +; I486-NEXT: # in Loop: Header=BB9_1 Depth=1 +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %edi +; I486-NEXT: movl %eax, 12(%edi) +; I486-NEXT: movl %ecx, 8(%edi) +; I486-NEXT: leal {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl %eax, 4(%edi) +; I486-NEXT: movl $2, 20(%edi) +; I486-NEXT: movl $2, 16(%edi) +; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx +; I486-NEXT: testb %al, %al +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: je .LBB9_1 +; I486-NEXT: jmp .LBB9_2 +; I486-NEXT: .LBB9_2: # %atomicrmw.end +; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: popl %esi +; I486-NEXT: popl %edi +; I486-NEXT: popl %ebx +; I486-NEXT: popl %ebp +; I486-NEXT: retl %t1 = atomicrmw umin i64* @sc64, i64 %x acquire ret void @@ -267,6 +782,30 @@ define void @atomic_fetch_cmpxchg64() nounwind { ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_cmpxchg64: +; I486: # %bb.0: +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $40, %esp +; I486-NEXT: leal sc64, %eax +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl $0, {{[0-9]+}}(%esp) +; I486-NEXT: movl $0, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %edx +; I486-NEXT: movl %ecx, 4(%edx) +; I486-NEXT: movl $2, 20(%edx) +; I486-NEXT: movl $2, 16(%edx) +; I486-NEXT: movl $0, 12(%edx) +; I486-NEXT: movl $1, 8(%edx) +; I486-NEXT: movl $sc64, (%edx) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; I486-NEXT: movl %ebp, %esp +; I486-NEXT: popl %ebp +; I486-NEXT: retl %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire ret void } @@ -276,6 +815,24 @@ define void @atomic_fetch_store64(i64 %x) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq %rdi, {{.*}}(%rip) ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_store64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $20, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: leal sc64, %edx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %eax, 8(%esi) +; I486-NEXT: movl %ecx, 4(%esi) +; I486-NEXT: movl $3, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_store_8 +; I486-NEXT: addl $20, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl store atomic i64 %x, i64* @sc64 release, align 8 ret void } @@ -286,6 +843,26 @@ define void @atomic_fetch_swap64(i64 %x) nounwind { ; X64-NEXT: xchgq %rdi, {{.*}}(%rip) ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_swap64: +; I486: # %bb.0: +; I486-NEXT: pushl %esi +; I486-NEXT: subl $28, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: leal sc64, %edx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %eax, 8(%esi) +; I486-NEXT: movl %ecx, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $sc64, (%esi) +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_exchange_8 +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: addl $28, %esp +; I486-NEXT: popl %esi +; I486-NEXT: retl %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire ret void } @@ -297,6 +874,32 @@ define void @atomic_fetch_swapf64(double %x) nounwind { ; X64-NEXT: xchgq %rax, {{.*}}(%rip) ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_swapf64: +; I486: # %bb.0: +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: pushl %esi +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $48, %esp +; I486-NEXT: fldl 8(%ebp) +; I486-NEXT: leal fsc64, %eax +; I486-NEXT: fstpl {{[0-9]+}}(%esp) +; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx +; I486-NEXT: movl %esp, %esi +; I486-NEXT: movl %edx, 8(%esi) +; I486-NEXT: movl %ecx, 4(%esi) +; I486-NEXT: movl $2, 12(%esi) +; I486-NEXT: movl $fsc64, (%esi) +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: calll __atomic_exchange_8 +; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal -4(%ebp), %esp +; I486-NEXT: popl %esi +; I486-NEXT: popl %ebp +; I486-NEXT: retl %t1 = atomicrmw xchg double* @fsc64, double %x acquire ret void } diff --git a/test/CodeGen/X86/cmpxchg8b.ll b/test/CodeGen/X86/cmpxchg8b.ll index fa8fff8fd7d..8eb3dda6b6e 100644 --- a/test/CodeGen/X86/cmpxchg8b.ll +++ b/test/CodeGen/X86/cmpxchg8b.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown- -mcpu=core2 | FileCheck %s --check-prefixes=CHECK,X86 ; RUN: llc < %s -mtriple=x86_64-unknown- -mcpu=core2 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown- -mcpu=i486 | FileCheck %s --check-prefixes=I486 ; Basic 64-bit cmpxchg define void @t1(i64* nocapture %p) nounwind ssp { @@ -24,6 +25,28 @@ define void @t1(i64* nocapture %p) nounwind ssp { ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: lock cmpxchgq %rcx, (%rdi) ; X64-NEXT: retq +; +; I486-LABEL: t1: +; I486: # %bb.0: # %entry +; I486-NEXT: pushl %ebp +; I486-NEXT: movl %esp, %ebp +; I486-NEXT: andl $-8, %esp +; I486-NEXT: subl $8, %esp +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl $0, {{[0-9]+}}(%esp) +; I486-NEXT: movl $0, (%esp) +; I486-NEXT: movl %esp, %ecx +; I486-NEXT: pushl $5 +; I486-NEXT: pushl $5 +; I486-NEXT: pushl $0 +; I486-NEXT: pushl $1 +; I486-NEXT: pushl %ecx +; I486-NEXT: pushl %eax +; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: addl $24, %esp +; I486-NEXT: movl %ebp, %esp +; I486-NEXT: popl %ebp +; I486-NEXT: retl entry: %r = cmpxchg i64* %p, i64 0, i64 1 seq_cst seq_cst ret void