1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

- Add subtarget feature -mattr=+db which determine whether an ARM cpu has the

memory and synchronization barrier dmb and dsb instructions.
- Change instruction names to something more sensible (matching name of actual
  instructions).
- Added tests for memory barrier codegen.

llvm-svn: 110785
This commit is contained in:
Evan Cheng 2010-08-11 06:22:01 +00:00
parent 89a64ee590
commit 5fca4ca5f9
9 changed files with 88 additions and 55 deletions

View File

@ -48,6 +48,8 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
"Has data barrier (dmb / dsb) instructions">;
def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
@ -134,11 +136,15 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack,
FeatureDB]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack,
FeatureDB]>;
def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv,
FeatureDB]>;
def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv,
FeatureDB]>;
//===----------------------------------------------------------------------===//
// Register File Description

View File

@ -412,12 +412,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
// use the default expansion.
bool canHandleAtomics =
(Subtarget->hasV7Ops() ||
(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
if (canHandleAtomics) {
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasDataBarrier() ||
(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@ -1992,17 +1990,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
// v6 and v7 can both handle barriers directly, but need handled a bit
// differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// Some subtargets which have dmb and dsb instructions can handle barriers
// directly. Some ARMv6 cpus can support them with the help of mcr
// instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// never get here.
unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
if (Subtarget->hasV7Ops())
if (Subtarget->hasDataBarrier())
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
else {
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
"Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return SDValue();
}
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {

View File

@ -54,10 +54,10 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@ -120,14 +120,14 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
[SDNPHasChain]>;
def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
[SDNPHasChain]>;
def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
[SDNPHasChain]>;
def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
[SDNPHasChain]>;
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
[SDNPHasChain]>;
def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
[SDNPHasChain]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
@ -154,6 +154,7 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
def HasNEON : Predicate<"Subtarget->hasNEON()">;
def HasDivide : Predicate<"Subtarget->hasDivide()">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">;
@ -2369,41 +2370,33 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def Int_MemBarrierV7 : AInoP<(outs), (ins),
Pseudo, NoItinerary,
"dmb", "",
[(ARMMemBarrierV7)]>,
Requires<[IsARM, HasV7]> {
def DMBsy : AInoP<(outs), (ins), Pseudo, NoItinerary, "dmb", "",
[(ARMMemBarrier)]>, Requires<[IsARM, HasV7]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
// See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
def Int_SyncBarrierV7 : AInoP<(outs), (ins),
Pseudo, NoItinerary,
"dsb", "",
[(ARMSyncBarrierV7)]>,
Requires<[IsARM, HasV7]> {
def DSBsy : AInoP<(outs), (ins), Pseudo, NoItinerary, "dsb", "",
[(ARMSyncBarrier)]>, Requires<[IsARM, HasV7]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
// See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
Pseudo, NoItinerary,
def DMB_MCR : AInoP<(outs), (ins GPR:$zero), Pseudo, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
[(ARMMemBarrierV6 GPR:$zero)]>,
[(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
Pseudo, NoItinerary,
def DSB_MCR : AInoP<(outs), (ins GPR:$zero), Pseudo, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 4",
[(ARMSyncBarrierV6 GPR:$zero)]>,
[(ARMSyncBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DSB
// FIXME: add encoding

View File

@ -2229,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
ThumbFrm, NoItinerary,
"dmb", "",
[(ARMMemBarrierV7)]>,
Requires<[IsThumb2]> {
def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
[(ARMMemBarrier)]>, Requires<[HasDB]> {
let Inst{31-4} = 0xF3BF8F5;
// FIXME: add support for options other than a full system DMB
let Inst{3-0} = 0b1111;
}
def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
ThumbFrm, NoItinerary,
"dsb", "",
[(ARMSyncBarrierV7)]>,
Requires<[IsThumb2]> {
def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
[(ARMSyncBarrier)]>, Requires<[HasDB]> {
let Inst{31-4} = 0xF3BF8F4;
// FIXME: add support for options other than a full system DSB
let Inst{3-0} = 0b1111;

View File

@ -42,6 +42,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, HasFP16(false)
, HasHardwareDivide(false)
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
, stackAlignment(4)
, CPUString("generic")

View File

@ -84,6 +84,10 @@ protected:
/// instructions.
bool HasT2ExtractPack;
/// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
/// instructions.
bool HasDataBarrier;
/// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
/// over 16-bit ones.
bool Pref32BitThumb;
@ -139,6 +143,7 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool prefers32BitThumb() const { return Pref32BitThumb; }

View File

@ -493,7 +493,7 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO) {
if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
if (Opcode == ARM::DMBsy || Opcode == ARM::DSBsy)
return true;
assert(0 && "Unexpected pseudo instruction!");

View File

@ -0,0 +1,17 @@
; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
define void @t1() {
; CHECK: t1:
; CHECK: blx {{_*}}sync_synchronize
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
ret void
}
define void @t2() {
; CHECK: t2:
; CHECK: blx {{_*}}sync_synchronize
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
ret void
}

View File

@ -0,0 +1,17 @@
; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
define void @t1() {
; CHECK: t1:
; CHECK: dsb
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
ret void
}
define void @t2() {
; CHECK: t2:
; CHECK: dmb
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
ret void
}