[AArch64] Add Fujitsu A64FX scheduling model

Basic support of A64FX was added in D75594 but its scheduling model was missing. This commit adds the scheduling model. Also, this commit amends/adds some subtarget parameters of A64FX. The A64FX Microarchitecture Manual, which is source information of this commit, is on GitHub. https://github.com/fujitsu/A64FX/ Differential Revision: https://reviews.llvm.org/D93791
2024-11-22 02:33:06 +01:00 · 2021-01-04 11:25:42 +09:00 · 2021-01-04 11:25:42 +09:00 · 008c1588a4
commit 008c1588a4
parent 4f287c334a
5 changed files with 3904 additions and 6 deletions
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@ -553,6 +553,7 @@ include "AArch64SchedExynosM4.td"
 include "AArch64SchedExynosM5.td"
 include "AArch64SchedThunderX.td"
 include "AArch64SchedThunderX2T99.td"
+include "AArch64SchedA64FX.td"
 include "AArch64SchedThunderX3T110.td"
 include "AArch64SchedTSV110.td"

@ -743,7 +744,10 @@ def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
                                  FeatureFullFP16,
                                  FeatureSVE,
                                  FeaturePostRAScheduler,
-                                  FeatureComplxNum
+                                  FeatureComplxNum,
+                                  FeatureAggressiveFMA,
+                                  FeatureArithmeticBccFusion,
+                                  FeaturePredictableSelectIsExpensive
                                  ]>;

 def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
@ -1152,8 +1156,7 @@ def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
 def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>;

 // Fujitsu A64FX
-// FIXME: Scheduling model is not implemented yet.
-def : ProcessorModel<"a64fx", NoSchedModel, [ProcA64FX]>;
+def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;

 // Nvidia Carmel
 def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
--- a/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/lib/Target/AArch64/AArch64SchedA64FX.td
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@ -110,8 +110,12 @@ void AArch64Subtarget::initializeProperties() {
    break;
  case A64FX:
    CacheLineSize = 256;
-    PrefFunctionLogAlignment = 5;
-    PrefLoopLogAlignment = 5;
+    PrefFunctionLogAlignment = 3;
+    PrefLoopLogAlignment = 2;
+    MaxInterleaveFactor = 4;
+    PrefetchDistance = 128;
+    MinPrefetchStride = 1024;
+    MaxPrefetchIterationsAhead = 4;
    break;
  case AppleA7:
  case AppleA10:
--- a/test/CodeGen/AArch64/machine-combiner-madd.ll
+++ b/test/CodeGen/AArch64/machine-combiner-madd.ll
@ -1,4 +1,5 @@
 ; Test all AArch64 subarches with scheduling models.
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=a64fx      < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a73 < %s | FileCheck %s
--- a/test/CodeGen/AArch64/preferred-function-alignment.ll
+++ b/test/CodeGen/AArch64/preferred-function-alignment.ll
@ -8,7 +8,7 @@
 ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a73 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s
 ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a75 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s
 ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a76 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s
-; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=a64fx < %s | FileCheck --check-prefixes=ALIGN5,CHECK %s
+; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=a64fx < %s | FileCheck --check-prefixes=ALIGN3,CHECK %s
 ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cyclone < %s | FileCheck --check-prefixes=ALIGN2,CHECK %s
 ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=falkor < %s | FileCheck --check-prefixes=ALIGN2,CHECK %s
 ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=kryo < %s | FileCheck --check-prefixes=ALIGN2,CHECK %s