AArch64: Do not test for CPUs, use SubtargetFeatures

Testing for specific CPUs has a number of problems, better use subtarget features: - When some tweak is added for a specific CPU it is often desirable for the next version of that CPU as well, yet we often forget to add it. - It is hard to keep track of checks scattered around the target code; Declaring all target specifics together with the CPU in the tablegen file is a clear representation. - Subtarget features can be tweaked from the command line. To discourage people from using CPU checks in the future I removed the isCortexXX(), isCyclone(), ... functions. I added an getProcFamily() function for exceptional circumstances but made it clear in the comment that usage is discouraged. Reformat feature list in AArch64.td to have 1 feature per line in alphabetical order to simplify merging and sorting for out of tree tweaks. No functional change intended. Differential Revision: http://reviews.llvm.org/D20762 llvm-svn: 271555
2024-11-23 19:23:23 +01:00 · 2016-06-02 18:03:53 +00:00 · 2016-06-02 18:03:53 +00:00 · 5a2d283ab8
commit 5a2d283ab8
parent 647e745fb4
10 changed files with 223 additions and 114 deletions
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@ -58,6 +58,50 @@ def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
                                         "Reserve X18, making it unavailable "
                                         "as a GPR">;

+def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
+                                            "MergeNarrowLoads", "true",
+                                            "Merge narrow load instructions">;
+
+def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
+                                    "Use alias analysis during codegen">;
+
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
+    "true",
+    "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+
+def FeaturePredictableSelectIsExpensive : SubtargetFeature<
+    "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
+    "Prefer likely predicted branches over selects">;
+
+def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
+    "CustomAsCheapAsMove", "true",
+    "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+    "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
+    "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
+
+def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
+    "AvoidQuadLdStPairs", "true",
+    "Do not form quad load/store pair operations">;
+
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
+    "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
+    "true", "Use alternative pattern for sextload convert to f32">;
+
+def FeatureMacroOpFusion : SubtargetFeature<
+    "macroop-fusion", "HasMacroOpFusion", "true",
+    "CPU supports macro op fusion">;
+
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+    "Disable latency scheduling heuristic">;
+
+def FeatureUseRSqrt : SubtargetFeature<
+    "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
@ -94,57 +138,87 @@ include "AArch64SchedM1.td"
 include "AArch64SchedKryo.td"

 def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
-                                   "Cortex-A35 ARM processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Cortex-A35 ARM processors", [
                                   FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeaturePerfMon
+                                   ]>;

 def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
-                                   "Cortex-A53 ARM processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Cortex-A53 ARM processors", [
+                                   FeatureBalanceFPOps,
                                   FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeatureUseAA
+                                   ]>;

 def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
-                                   "Cortex-A57 ARM processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Cortex-A57 ARM processors", [
+                                   FeatureBalanceFPOps,
                                   FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFPARMv8,
+                                   FeatureMergeNarrowLd,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive
+                                   ]>;

 def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
-                                   "Cyclone",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
+                                   "Cyclone", [
+                                   FeatureAlternateSExtLoadCVTF32Pattern,
                                   FeatureCrypto,
+                                   FeatureDisableLatencySchedHeuristic,
+                                   FeatureFPARMv8,
+                                   FeatureMacroOpFusion,
+                                   FeatureNEON,
                                   FeaturePerfMon,
-                                   FeatureZCRegMove, FeatureZCZeroing]>;
+                                   FeatureSlowMisaligned128Store,
+                                   FeatureZCRegMove,
+                                   FeatureZCZeroing
+                                   ]>;

 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
-                                    "Samsung Exynos-M1 processors",
-                                    [FeatureFPARMv8,
-                                    FeatureNEON,
-                                    FeatureCrypto,
+                                    "Samsung Exynos-M1 processors", [
+                                    FeatureAvoidQuadLdStPairs,
                                    FeatureCRC,
-                                    FeaturePerfMon]>;
+                                    FeatureCrypto,
+                                    FeatureCustomCheapAsMoveHandling,
+                                    FeatureFPARMv8,
+                                    FeatureNEON,
+                                    FeaturePerfMon,
+                                    FeatureUseRSqrt
+                                    ]>;

 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
-                                   "Qualcomm Kryo processors",
-                                   [FeatureFPARMv8,
-                                   FeatureNEON,
-                                   FeatureCrypto,
+                                   "Qualcomm Kryo processors", [
                                   FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeatureCrypto,
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureFPARMv8,
+                                   FeatureMergeNarrowLd,
+                                   FeatureNEON,
+                                   FeaturePerfMon,
+                                   FeaturePostRAScheduler,
+                                   FeaturePredictableSelectIsExpensive
+                                   ]>;

-def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
-                                              FeatureNEON,
-                                              FeatureCRC,
-                                              FeaturePerfMon]>;
+def : ProcessorModel<"generic", NoSchedModel, [
+                     FeatureCRC,
+                     FeatureFPARMv8,
+                     FeatureNEON,
+                     FeaturePerfMon,
+                     FeaturePostRAScheduler
+                     ]>;

 // FIXME: Cortex-A35 is currently modelled as a Cortex-A53
 def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@ -314,9 +314,7 @@ bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
  if (skipFunction(*F.getFunction()))
    return false;

-  // Don't do anything if this isn't an A53 or A57.
-  if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
-        F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+  if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
    return false;

  bool Changed = false;
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -634,9 +634,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    }
  }

-  // Prefer likely predicted branches to selects on out-of-order cores.
-  if (Subtarget->isCortexA57() || Subtarget->isKryo())
-    PredictableSelectIsExpensive = true;
+  PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 }

 void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
@ -814,12 +812,9 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
  if (Subtarget->requiresStrictAlign())
    return false;

-  // FIXME: This is mostly true for Cyclone, but not necessarily others.
  if (Fast) {
-    // FIXME: Define an attribute for slow unaligned accesses instead of
-    // relying on the CPU type as a proxy.
-    // On Cyclone, unaligned 128-bit stores are slow.
-    *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
+    // Some CPUs are fine with unaligned stores except for 128-bit ones.
+    *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
            // See comments in performSTORECombine() for more details about
            // these conditions.

@ -8792,9 +8787,7 @@ static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
  // a call to that function here.

-  // Cyclone has bad performance on unaligned 16B stores when crossing line and
-  // page boundaries. We want to split such stores.
-  if (!Subtarget->isCyclone())
+  if (!Subtarget->isMisaligned128StoreSlow())
    return SDValue();

  // Don't split at -Oz.
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@ -544,8 +544,7 @@ static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
 // FIXME: this implementation should be micro-architecture dependent, so a
 // micro-architecture target hook should be introduced here in future.
 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
-  if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
-      !Subtarget.isExynosM1() && !Subtarget.isKryo())
+  if (!Subtarget.hasCustomCheapAsMoveHandling())
    return MI->isAsCheapAsAMove();

  unsigned Imm;
@ -559,7 +558,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
  case AArch64::ADDXri:
  case AArch64::SUBWri:
  case AArch64::SUBXri:
-    return (Subtarget.isExynosM1() ||
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
            MI->getOperand(3).getImm() == 0);

  // add/sub on register with shift
@ -568,7 +567,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
  case AArch64::SUBWrs:
  case AArch64::SUBXrs:
    Imm = MI->getOperand(3).getImm();
-    return (Subtarget.isExynosM1() &&
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
            AArch64_AM::getArithShiftValue(Imm) < 4);

  // logical ops on immediate
@ -609,7 +608,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
  case AArch64::ORRWrs:
  case AArch64::ORRXrs:
    Imm = MI->getOperand(3).getImm();
-    return (Subtarget.isExynosM1() &&
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
            AArch64_AM::getShiftValue(Imm) < 4 &&
            AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);

@ -1522,8 +1521,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr *MI) const {
  if (isLdStPairSuppressed(MI))
    return false;

-  // Do not pair quad ld/st for Exynos.
-  if (Subtarget.isExynosM1()) {
+  // On some CPUs quad load/store pairs are slower than two single load/stores.
+  if (Subtarget.avoidQuadLdStPairs()) {
    switch (MI->getOpcode()) {
    default:
      break;
@ -1801,8 +1800,8 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt,

 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
                                              MachineInstr *Second) const {
-  if (Subtarget.isCyclone()) {
-    // Cyclone can fuse CMN, CMP, TST followed by Bcc.
+  if (Subtarget.hasMacroOpFusion()) {
+    // Fuse CMN, CMP, TST followed by Bcc.
    unsigned SecondOpcode = Second->getOpcode();
    if (SecondOpcode == AArch64::Bcc) {
      switch (First->getOpcode()) {
@ -1817,7 +1816,7 @@ bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
        return true;
      }
    }
-    // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
+    // Fuse ALU operations followed by CBZ/CBNZ.
    if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
        SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
      switch (First->getOpcode()) {
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@ -34,7 +34,8 @@ def HasSPE           : Predicate<"Subtarget->hasSPE()">,

 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
-def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
+def UseAlternateSExtLoadCVTF32
+    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;

 //===----------------------------------------------------------------------===//
 // AArch64-specific DAG Nodes.
@ -4957,7 +4958,8 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
                                    0),
                                  dsub)),
                               0),
-                             ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+                             ssub)))>,
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;

 def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@ -5010,7 +5012,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
                                     0),
                                   dsub)),
                               0),
-                             dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+                             dsub)))>,
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
 
 def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@ -160,10 +160,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
  // Find and promote load instructions which read directly from store.
  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);

-  // Check if converting two narrow loads into a single wider load with
-  // bitfield extracts could be enabled.
-  bool enableNarrowLdMerge(MachineFunction &Fn);
-
  bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);

  bool runOnMachineFunction(MachineFunction &Fn) override;
@ -1912,15 +1908,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
  return Modified;
 }

-bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
-  bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();
-  // FIXME: The benefit from converting narrow loads into a wider load could be
-  // microarchitectural as it assumes that a single load with two bitfield
-  // extracts is cheaper than two narrow loads. Currently, this conversion is
-  // enabled only in cortex-a57 on which performance benefits were verified.
-  return ProfitableArch && !Subtarget->requiresStrictAlign();
-}
-
 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  if (skipFunction(*Fn.getFunction()))
    return false;
@ -1936,7 +1923,8 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  UsedRegs.resize(TRI->getNumRegs());

  bool Modified = false;
-  bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
+  bool enableNarrowLdOpt =
+    Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
  for (auto &MBB : Fn)
    Modified |= optimizeBlock(MBB, enableNarrowLdOpt);

--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@ -44,9 +44,36 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
    CPUString = "generic";

  ParseSubtargetFeatures(CPUString, FS);
+  initializeProperties();
+
  return *this;
 }

+void AArch64Subtarget::initializeProperties() {
+  // Initialize CPU specific properties. We should add a tablegen feature for
+  // this in the future so we can specify it together with the subtarget
+  // features.
+  switch (ARMProcFamily) {
+  case Cyclone:
+    CacheLineSize = 64;
+    PrefetchDistance = 280;
+    MinPrefetchStride = 2048;
+    MaxPrefetchIterationsAhead = 3;
+    break;
+  case CortexA57:
+    MaxInterleaveFactor = 4;
+    break;
+  case Kryo:
+    MaxInterleaveFactor = 4;
+    VectorInsertExtractBaseCost = 2;
+    break;
+  case Others: break;
+  case CortexA35: break;
+  case CortexA53: break;
+  case ExynosM1: break;
+  }
+}
+
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                   const std::string &FS,
                                   const TargetMachine &TM, bool LittleEndian)
@ -110,8 +137,7 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
  // Enabling or Disabling the latency heuristic is a close call: It seems to
  // help nearly no benchmark on out-of-order architectures, on the other hand
  // it regresses register pressure on a few benchmarking.
-  if (isCyclone())
-    Policy.DisableLatencyHeuristic = true;
+  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
 }

 bool AArch64Subtarget::enableEarlyIfConversion() const {
@ -133,8 +159,5 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const {

 std::unique_ptr<PBQPRAConstraint>
 AArch64Subtarget::getCustomPBQPConstraints() const {
-  if (!isCortexA57())
-    return nullptr;
-
-  return llvm::make_unique<A57ChainingConstraint>();
+  return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
 }
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@ -33,8 +33,8 @@ class StringRef;
 class Triple;

 class AArch64Subtarget : public AArch64GenSubtargetInfo {
-protected:
-  enum ARMProcFamilyEnum {
+public:
+  enum ARMProcFamilyEnum : uint8_t {
    Others,
    CortexA35,
    CortexA53,
@ -44,6 +44,7 @@ protected:
    Kryo
  };

+protected:
  /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
  ARMProcFamilyEnum ARMProcFamily = Others;

@ -66,6 +67,24 @@ protected:

  // StrictAlign - Disallow unaligned memory accesses.
  bool StrictAlign = false;
+  bool MergeNarrowLoads = false;
+  bool UseAA = false;
+  bool PredictableSelectIsExpensive = false;
+  bool BalanceFPOps = false;
+  bool CustomAsCheapAsMove = false;
+  bool UsePostRAScheduler = false;
+  bool Misaligned128StoreIsSlow = false;
+  bool AvoidQuadLdStPairs = false;
+  bool UseAlternateSExtLoadCVTF32Pattern = false;
+  bool HasMacroOpFusion = false;
+  bool DisableLatencySchedHeuristic = false;
+  bool UseRSqrt = false;
+  uint8_t MaxInterleaveFactor = 2;
+  uint8_t VectorInsertExtractBaseCost = 3;
+  uint16_t CacheLineSize = 0;
+  uint16_t PrefetchDistance = 0;
+  uint16_t MinPrefetchStride = 1;
+  unsigned MaxPrefetchIterationsAhead = UINT_MAX;

  // ReserveX18 - X18 is not available as a general purpose register.
  bool ReserveX18;
@ -93,6 +112,9 @@ private:
  /// subtarget initialization.
  AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);

+  /// Initialize properties based on the selected processor family.
+  void initializeProperties();
+
 public:
  /// This constructor initializes the data members to match that
  /// of the specified triple.
@ -123,7 +145,15 @@ public:
  const Triple &getTargetTriple() const { return TargetTriple; }
  bool enableMachineScheduler() const override { return true; }
  bool enablePostRAScheduler() const override {
-    return isGeneric() || isCortexA53() || isCortexA57() || isKryo();
+    return UsePostRAScheduler;
+  }
+
+  /// Returns ARM processor family.
+  /// Avoid this function! CPU specifics should be kept local to this class
+  /// and preferably modeled with SubtargetFeatures or properties in
+  /// initializeProperties().
+  ARMProcFamilyEnum getProcFamily() const {
+    return ARMProcFamily;
  }

  bool hasV8_1aOps() const { return HasV8_1aOps; }
@ -140,6 +170,30 @@ public:
  bool hasNEON() const { return HasNEON; }
  bool hasCrypto() const { return HasCrypto; }
  bool hasCRC() const { return HasCRC; }
+  bool mergeNarrowLoads() const { return MergeNarrowLoads; }
+  bool balanceFPOps() const { return BalanceFPOps; }
+  bool predictableSelectIsExpensive() const {
+    return PredictableSelectIsExpensive;
+  }
+  bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
+  bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
+  bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
+  bool useAlternateSExtLoadCVTF32Pattern() const {
+    return UseAlternateSExtLoadCVTF32Pattern;
+  }
+  bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+  bool useRSqrt() const { return UseRSqrt; }
+  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+  unsigned getVectorInsertExtractBaseCost() const {
+    return VectorInsertExtractBaseCost;
+  }
+  unsigned getCacheLineSize() const { return CacheLineSize; }
+  unsigned getPrefetchDistance() const { return PrefetchDistance; }
+  unsigned getMinPrefetchStride() const { return MinPrefetchStride; }
+  unsigned getMaxPrefetchIterationsAhead() const {
+    return MaxPrefetchIterationsAhead;
+  }
+
  /// CPU has TBI (top byte of addresses is ignored during HW address
  /// translation) and OS enables it.
  bool supportsAddressTopByteIgnored() const;
@ -160,14 +214,7 @@ public:
  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
  bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }

-  bool isGeneric() const { return CPUString == "generic"; }
-  bool isCyclone() const { return CPUString == "cyclone"; }
-  bool isCortexA57() const { return CPUString == "cortex-a57"; }
-  bool isCortexA53() const { return CPUString == "cortex-a53"; }
-  bool isExynosM1() const { return CPUString == "exynos-m1"; }
-  bool isKryo() const { return CPUString == "kryo"; }
-
-  bool useAA() const override { return isCortexA53(); }
+  bool useAA() const override { return UseAA; }

  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
  /// that still makes it profitable to inline the call.
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@ -147,8 +147,7 @@ static void initReciprocals(AArch64TargetMachine& TM, AArch64Subtarget& ST)
  // (52 mantissa bits) are 2 and 3, respectively.
  unsigned ExtraStepsF = 2,
           ExtraStepsD = ExtraStepsF + 1;
-  // FIXME: Enable x^-1/2 only for Exynos M1 at the moment.
-  bool UseRsqrt = ST.isExynosM1();
+  bool UseRsqrt = ST.useRSqrt();

  TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
  TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -368,9 +368,7 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
  }

  // All other insert/extracts cost this much.
-  if (ST->isKryo())
-    return 2;
-  return 3;
+  return ST->getVectorInsertExtractBaseCost();
 }

 int AArch64TTIImpl::getArithmeticInstrCost(
@ -529,9 +527,7 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
 }

 unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
-  if (ST->isCortexA57() || ST->isKryo())
-    return 4;
-  return 2;
+  return ST->getMaxInterleaveFactor();
 }

 void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
@ -630,28 +626,17 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
 }

 unsigned AArch64TTIImpl::getCacheLineSize() {
-  if (ST->isCyclone())
-    return 64;
-  return BaseT::getCacheLineSize();
+  return ST->getCacheLineSize();
 }

 unsigned AArch64TTIImpl::getPrefetchDistance() {
-  if (ST->isCyclone())
-    return 280;
-  return BaseT::getPrefetchDistance();
+  return ST->getPrefetchDistance();
 }

 unsigned AArch64TTIImpl::getMinPrefetchStride() {
-  if (ST->isCyclone())
-    // The HW prefetcher handles accesses with strides up to 2KB.
-    return 2048;
-  return BaseT::getMinPrefetchStride();
+  return ST->getMinPrefetchStride();
 }

 unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
-  if (ST->isCyclone())
-    // Be conservative for now and don't prefetch ahead too much since the loop
-    // may terminate early.
-    return 3;
-  return BaseT::getMaxPrefetchIterationsAhead();
+  return ST->getMaxPrefetchIterationsAhead();
 }