mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AArch64][Falkor] Avoid generating STRQro* instructions
Summary: STRQro* instructions are slower than the alternative ADD/STRQui expanded instructions on Falkor, so avoid generating them unless we're optimizing for code size. Reviewers: t.p.northover, mcrosier Subscribers: aemerson, rengolin, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D37020 llvm-svn: 311931
This commit is contained in:
parent
5965f0fe5f
commit
82a5b35667
@ -94,6 +94,9 @@ def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
|
||||
def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
|
||||
"Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
|
||||
|
||||
def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow",
|
||||
"true", "STR of Q register with register offset is slow">;
|
||||
|
||||
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
|
||||
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
|
||||
"true", "Use alternative pattern for sextload convert to f32">;
|
||||
@ -339,7 +342,8 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
|
||||
FeaturePredictableSelectIsExpensive,
|
||||
FeatureRDM,
|
||||
FeatureZCZeroing,
|
||||
FeatureLSLFast
|
||||
FeatureLSLFast,
|
||||
FeatureSlowSTRQro
|
||||
]>;
|
||||
|
||||
def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
|
||||
|
@ -3072,22 +3072,18 @@ multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
|
||||
|
||||
multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
|
||||
string asm, ValueType Ty, SDPatternOperator storeop> {
|
||||
let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
|
||||
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
|
||||
def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
|
||||
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
|
||||
[(storeop (Ty regtype:$Rt),
|
||||
(ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend128:$extend))]>,
|
||||
[]>,
|
||||
Sched<[WriteSTIdx, ReadAdrBase]> {
|
||||
let Inst{13} = 0b0;
|
||||
}
|
||||
|
||||
let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
|
||||
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
|
||||
def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
|
||||
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
|
||||
[(storeop (Ty regtype:$Rt),
|
||||
(ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend128:$extend))]>,
|
||||
[]>,
|
||||
Sched<[WriteSTIdx, ReadAdrBase]> {
|
||||
let Inst{13} = 0b1;
|
||||
}
|
||||
|
@ -330,6 +330,8 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
|
||||
let RecomputePerFunction = 1 in {
|
||||
def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">;
|
||||
def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">;
|
||||
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
|
||||
def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction()->optForSize()">;
|
||||
}
|
||||
|
||||
include "AArch64InstrFormats.td"
|
||||
@ -2139,6 +2141,17 @@ defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>;
|
||||
defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>;
|
||||
defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>;
|
||||
|
||||
let Predicates = [UseSTRQro], AddedComplexity = 10 in {
|
||||
def : Pat<(store (f128 FPR128:$Rt),
|
||||
(ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
|
||||
ro_Wextend128:$extend)),
|
||||
(STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
|
||||
def : Pat<(store (f128 FPR128:$Rt),
|
||||
(ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
|
||||
ro_Xextend128:$extend)),
|
||||
(STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
|
||||
}
|
||||
|
||||
multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
|
||||
Instruction STRW, Instruction STRX> {
|
||||
|
||||
@ -2186,7 +2199,7 @@ defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
|
||||
defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
|
||||
|
||||
// Match all store 128 bits width whose type is compatible with FPR128
|
||||
let Predicates = [IsLE] in {
|
||||
let Predicates = [IsLE, UseSTRQro] in {
|
||||
// We must use ST1 to store vectors in big-endian.
|
||||
defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
|
||||
defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
|
||||
|
@ -102,6 +102,7 @@ protected:
|
||||
bool UsePostRAScheduler = false;
|
||||
bool Misaligned128StoreIsSlow = false;
|
||||
bool Paired128IsSlow = false;
|
||||
bool STRQroIsSlow = false;
|
||||
bool UseAlternateSExtLoadCVTF32Pattern = false;
|
||||
bool HasArithmeticBccFusion = false;
|
||||
bool HasArithmeticCbzFusion = false;
|
||||
@ -219,6 +220,7 @@ public:
|
||||
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
|
||||
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
|
||||
bool isPaired128Slow() const { return Paired128IsSlow; }
|
||||
bool isSTRQroSlow() const { return STRQroIsSlow; }
|
||||
bool useAlternateSExtLoadCVTF32Pattern() const {
|
||||
return UseAlternateSExtLoadCVTF32Pattern;
|
||||
}
|
||||
|
47
test/CodeGen/AArch64/strqro.ll
Normal file
47
test/CodeGen/AArch64/strqro.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-STRQRO %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=falkor | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTRQRO %s
|
||||
|
||||
; CHECK-LABEL: strqrox:
|
||||
; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x
|
||||
; CHECK-NOSTRQRO-NOT: str q{{[0-9]+}}, [x{{[0-9]+}}, x
|
||||
define void @strqrox(fp128 %val64, i64 %base, i64 %offset) {
|
||||
%addrint = add i64 %base, %offset
|
||||
%addr = inttoptr i64 %addrint to fp128*
|
||||
store volatile fp128 %val64, fp128* %addr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that STRQro is generated for both cases if we're optimizing for code size.
|
||||
; CHECK-LABEL: strqrox_optsize:
|
||||
; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x
|
||||
; CHECK-NOSTRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, x
|
||||
define void @strqrox_optsize(fp128 %val64, i64 %base, i64 %offset) minsize {
|
||||
%addrint = add i64 %base, %offset
|
||||
%addr = inttoptr i64 %addrint to fp128*
|
||||
store volatile fp128 %val64, fp128* %addr
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: strqrow:
|
||||
; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w
|
||||
; CHECK-NOSTRQRO-NOT: str q{{[0-9]+}}, [x{{[0-9]+}}, w
|
||||
define void @strqrow(fp128 %val64, i64 %base, i32 %offset) {
|
||||
%offset64 = zext i32 %offset to i64
|
||||
%addrint = add i64 %base, %offset64
|
||||
%addr = inttoptr i64 %addrint to fp128*
|
||||
store volatile fp128 %val64, fp128* %addr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that STRQro is generated for both cases if we're optimizing for code size.
|
||||
; CHECK-LABEL: strqrow_optsize:
|
||||
; CHECK-STRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w
|
||||
; CHECK-NOSTRQRO: str q{{[0-9]+}}, [x{{[0-9]+}}, w
|
||||
define void @strqrow_optsize(fp128 %val64, i64 %base, i32 %offset) minsize {
|
||||
%offset64 = zext i32 %offset to i64
|
||||
%addrint = add i64 %base, %offset64
|
||||
%addr = inttoptr i64 %addrint to fp128*
|
||||
store volatile fp128 %val64, fp128* %addr
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user