diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index e76a47dff9d..d1e84cdd4a7 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -48,6 +48,8 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb / dsb) instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; @@ -134,11 +136,15 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>; + FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack, + FeatureDB]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; -def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; -def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; + [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack, + FeatureDB]>; +def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv, + FeatureDB]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv, + FeatureDB]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6398f1bf0c5..56adcf2b37f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -412,12 +412,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise - // use the default expansion. - bool canHandleAtomics = - (Subtarget->hasV7Ops() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); - if (canHandleAtomics) { + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use + // the default expansion. + if (Subtarget->hasDataBarrier() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -1992,17 +1990,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); unsigned isDeviceBarrier = cast(Op5)->getZExtValue(); - // v6 and v7 can both handle barriers directly, but need handled a bit - // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // Some subtargets which have dmb and dsb instructions can handle barriers + // directly. Some ARMv6 cpus can support them with the help of mcr + // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should // never get here. unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; - if (Subtarget->hasV7Ops()) + if (Subtarget->hasDataBarrier()) return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); - else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + else { + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); - assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return SDValue(); + } } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index fa8bcb9db95..d4782793280 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -54,10 +54,10 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -120,14 +120,14 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; -def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; +def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; +def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; @@ -154,6 +154,7 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; def HasDivide : Predicate<"Subtarget->hasDivide()">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; +def HasDB : Predicate<"Subtarget->hasDataBarrier()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; @@ -2369,41 +2370,33 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def Int_MemBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DMBsy : AInoP<(outs), (ins), Pseudo, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB // See DMB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_SyncBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DSBsy : AInoP<(outs), (ins), Pseudo, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB // See DSB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DMB_MCR : AInoP<(outs), (ins GPR:$zero), Pseudo, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierV6 GPR:$zero)]>, + [(ARMMemBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DMB // FIXME: add encoding } -def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DSB_MCR : AInoP<(outs), (ins GPR:$zero), Pseudo, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 4", - [(ARMSyncBarrierV6 GPR:$zero)]>, + [(ARMSyncBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DSB // FIXME: add encoding diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 161f50fef0c..aaacefbb149 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2229,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2Int_MemBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[HasDB]> { let Inst{31-4} = 0xF3BF8F5; // FIXME: add support for options other than a full system DMB let Inst{3-0} = 0b1111; } -def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[HasDB]> { let Inst{31-4} = 0xF3BF8F4; // FIXME: add support for options other than a full system DSB let Inst{3-0} = 0b1111; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 52913e9db6e..db50c9f32de 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -42,6 +42,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasFP16(false) , HasHardwareDivide(false) , HasT2ExtractPack(false) + , HasDataBarrier(false) , Pref32BitThumb(false) , stackAlignment(4) , CPUString("generic") diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3f2455838f1..4e08e57d6cf 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -84,6 +84,10 @@ protected: /// instructions. bool HasT2ExtractPack; + /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier + /// instructions. + bool HasDataBarrier; + /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions /// over 16-bit ones. bool Pref32BitThumb; @@ -139,6 +143,7 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } + bool hasDataBarrier() const { return HasDataBarrier; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool prefers32BitThumb() const { return Pref32BitThumb; } diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index f9c57634efe..99d9f01c048 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -493,7 +493,7 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) { static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { - if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7) + if (Opcode == ARM::DMBsy || Opcode == ARM::DSBsy) return true; assert(0 && "Unexpected pseudo instruction!"); diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll new file mode 100644 index 00000000000..44a3f46a181 --- /dev/null +++ b/test/CodeGen/Thumb/barrier.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; CHECK: t1: +; CHECK: blx {{_*}}sync_synchronize + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; CHECK: t2: +; CHECK: blx {{_*}}sync_synchronize + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +} diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll new file mode 100644 index 00000000000..a54d09e6291 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-barrier.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s + +declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) + +define void @t1() { +; CHECK: t1: +; CHECK: dsb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) + ret void +} + +define void @t2() { +; CHECK: t2: +; CHECK: dmb + call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) + ret void +}