mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
AArch64: Change modeling of zero cycle zeroing.
On CPUs with the zero cycle zeroing feature enabled "movi v.2d" should be used to zero a vector register. This was previously done at instruction selection time, however the register coalescer sometimes widened multiple vregs to the Q width because of that leading to extra spills. This patch leaves the decision on how to zero a register to the AsmPrinter phase where it doesn't affect register allocation anymore. This patch also sets isAsCheapAsAMove=1 on FMOVS0, FMOVD0. This fixes http://llvm.org/PR27454, rdar://25866262 Differential Revision: http://reviews.llvm.org/D21826 llvm-svn: 274686
This commit is contained in:
parent
31becb24de
commit
778da94f90
@ -49,6 +49,7 @@ namespace {
|
|||||||
class AArch64AsmPrinter : public AsmPrinter {
|
class AArch64AsmPrinter : public AsmPrinter {
|
||||||
AArch64MCInstLower MCInstLowering;
|
AArch64MCInstLower MCInstLowering;
|
||||||
StackMaps SM;
|
StackMaps SM;
|
||||||
|
const AArch64Subtarget *STI;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
|
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
|
||||||
@ -83,6 +84,7 @@ public:
|
|||||||
|
|
||||||
bool runOnMachineFunction(MachineFunction &F) override {
|
bool runOnMachineFunction(MachineFunction &F) override {
|
||||||
AArch64FI = F.getInfo<AArch64FunctionInfo>();
|
AArch64FI = F.getInfo<AArch64FunctionInfo>();
|
||||||
|
STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
|
||||||
return AsmPrinter::runOnMachineFunction(F);
|
return AsmPrinter::runOnMachineFunction(F);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,6 +113,9 @@ private:
|
|||||||
/// \brief Emit the LOHs contained in AArch64FI.
|
/// \brief Emit the LOHs contained in AArch64FI.
|
||||||
void EmitLOHs();
|
void EmitLOHs();
|
||||||
|
|
||||||
|
/// Emit instruction to set float register to zero.
|
||||||
|
void EmitFMov0(const MachineInstr &MI);
|
||||||
|
|
||||||
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
|
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
|
||||||
MInstToMCSymbol LOHInstToLabel;
|
MInstToMCSymbol LOHInstToLabel;
|
||||||
};
|
};
|
||||||
@ -224,8 +229,7 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
|
|||||||
const TargetRegisterClass *RC,
|
const TargetRegisterClass *RC,
|
||||||
bool isVector, raw_ostream &O) {
|
bool isVector, raw_ostream &O) {
|
||||||
assert(MO.isReg() && "Should only get here with a register!");
|
assert(MO.isReg() && "Should only get here with a register!");
|
||||||
const AArch64RegisterInfo *RI =
|
const TargetRegisterInfo *RI = STI->getRegisterInfo();
|
||||||
MF->getSubtarget<AArch64Subtarget>().getRegisterInfo();
|
|
||||||
unsigned Reg = MO.getReg();
|
unsigned Reg = MO.getReg();
|
||||||
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
|
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
|
||||||
assert(RI->regsOverlap(RegToPrint, Reg));
|
assert(RI->regsOverlap(RegToPrint, Reg));
|
||||||
@ -416,6 +420,40 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
|||||||
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
|
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
|
||||||
|
unsigned DestReg = MI.getOperand(0).getReg();
|
||||||
|
if (STI->hasZeroCycleZeroing()) {
|
||||||
|
// Convert S/D register to corresponding Q register
|
||||||
|
if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
|
||||||
|
DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
|
||||||
|
} else {
|
||||||
|
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
|
||||||
|
DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
|
||||||
|
}
|
||||||
|
MCInst MOVI;
|
||||||
|
MOVI.setOpcode(AArch64::MOVIv2d_ns);
|
||||||
|
MOVI.addOperand(MCOperand::createReg(DestReg));
|
||||||
|
MOVI.addOperand(MCOperand::createImm(0));
|
||||||
|
EmitToStreamer(*OutStreamer, MOVI);
|
||||||
|
} else {
|
||||||
|
MCInst FMov;
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
default: llvm_unreachable("Unexpected opcode");
|
||||||
|
case AArch64::FMOVS0:
|
||||||
|
FMov.setOpcode(AArch64::FMOVWSr);
|
||||||
|
FMov.addOperand(MCOperand::createReg(DestReg));
|
||||||
|
FMov.addOperand(MCOperand::createReg(AArch64::WZR));
|
||||||
|
break;
|
||||||
|
case AArch64::FMOVD0:
|
||||||
|
FMov.setOpcode(AArch64::FMOVXDr);
|
||||||
|
FMov.addOperand(MCOperand::createReg(DestReg));
|
||||||
|
FMov.addOperand(MCOperand::createReg(AArch64::XZR));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
EmitToStreamer(*OutStreamer, FMov);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Simple pseudo-instructions have their lowering (with expansion to real
|
// Simple pseudo-instructions have their lowering (with expansion to real
|
||||||
// instructions) auto-generated.
|
// instructions) auto-generated.
|
||||||
#include "AArch64GenMCPseudoLowering.inc"
|
#include "AArch64GenMCPseudoLowering.inc"
|
||||||
@ -521,6 +559,11 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case AArch64::FMOVS0:
|
||||||
|
case AArch64::FMOVD0:
|
||||||
|
EmitFMov0(*MI);
|
||||||
|
return;
|
||||||
|
|
||||||
case TargetOpcode::STACKMAP:
|
case TargetOpcode::STACKMAP:
|
||||||
return LowerSTACKMAP(*OutStreamer, SM, *MI);
|
return LowerSTACKMAP(*OutStreamer, SM, *MI);
|
||||||
|
|
||||||
|
@ -301,9 +301,6 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// AArch64 Instruction Predicate Definitions.
|
// AArch64 Instruction Predicate Definitions.
|
||||||
//
|
|
||||||
def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
|
|
||||||
def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
|
|
||||||
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
|
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
|
||||||
def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
|
def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
|
||||||
def ForCodeSize : Predicate<"ForCodeSize">;
|
def ForCodeSize : Predicate<"ForCodeSize">;
|
||||||
@ -2565,15 +2562,11 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
|
|||||||
defm FMOV : UnscaledConversion<"fmov">;
|
defm FMOV : UnscaledConversion<"fmov">;
|
||||||
|
|
||||||
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
|
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
|
||||||
let isReMaterializable = 1, isCodeGenOnly = 1 in {
|
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
|
||||||
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
|
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
|
||||||
PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
|
Sched<[WriteF]>;
|
||||||
Sched<[WriteF]>,
|
|
||||||
Requires<[NoZCZ]>;
|
|
||||||
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
|
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
|
||||||
PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
|
Sched<[WriteF]>;
|
||||||
Sched<[WriteF]>,
|
|
||||||
Requires<[NoZCZ]>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -4435,18 +4428,6 @@ def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
|
|||||||
"movi", ".2d",
|
"movi", ".2d",
|
||||||
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
|
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
|
||||||
|
|
||||||
|
|
||||||
// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
|
|
||||||
// Complexity is added to break a tie with a plain MOVI.
|
|
||||||
let AddedComplexity = 1 in {
|
|
||||||
def : Pat<(f32 fpimm0),
|
|
||||||
(f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>,
|
|
||||||
Requires<[HasZCZ]>;
|
|
||||||
def : Pat<(f64 fpimm0),
|
|
||||||
(f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>,
|
|
||||||
Requires<[HasZCZ]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
|
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
|
||||||
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
|
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
|
||||||
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
|
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
|
||||||
|
@ -47,3 +47,29 @@ declare void @bar(double, double, double, double)
|
|||||||
declare void @bari(i32, i32)
|
declare void @bari(i32, i32)
|
||||||
declare void @barl(i64, i64)
|
declare void @barl(i64, i64)
|
||||||
declare void @barf(float, float)
|
declare void @barf(float, float)
|
||||||
|
|
||||||
|
; We used to produce spills+reloads for a Q register with zero cycle zeroing
|
||||||
|
; enabled.
|
||||||
|
; CHECK-LABEL: foo:
|
||||||
|
; CHECK-NOT: str {{q[0-9]+}}
|
||||||
|
; CHECK-NOT: ldr {{q[0-9]+}}
|
||||||
|
define double @foo(i32 %n) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%phi0 = phi double [ 1.0, %entry ], [ %v0, %for.body ]
|
||||||
|
%i.076 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||||
|
%conv21 = sitofp i32 %i.076 to double
|
||||||
|
%call = tail call fast double @sin(double %conv21)
|
||||||
|
%cmp.i = fcmp fast olt double %phi0, %call
|
||||||
|
%v0 = select i1 %cmp.i, double %call, double %phi0
|
||||||
|
%inc = add nuw nsw i32 %i.076, 1
|
||||||
|
%cmp = icmp slt i32 %inc, %n
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret double %v0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare double @sin(double)
|
||||||
|
@ -12,8 +12,8 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
|
|||||||
%tst1 = icmp ugt i32 %lhs32, %rhs32
|
%tst1 = icmp ugt i32 %lhs32, %rhs32
|
||||||
%val1 = select i1 %tst1, float 0.0, float 1.0
|
%val1 = select i1 %tst1, float 0.0, float 1.0
|
||||||
store float %val1, float* @varfloat
|
store float %val1, float* @varfloat
|
||||||
; CHECK: movi v[[FLT0:[0-9]+]].2d, #0
|
; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0
|
||||||
; CHECK: fmov s[[FLT1:[0-9]+]], #1.0
|
; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0
|
||||||
; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
|
; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
|
||||||
|
|
||||||
%rhs64 = sext i32 %rhs32 to i64
|
%rhs64 = sext i32 %rhs32 to i64
|
||||||
|
Loading…
Reference in New Issue
Block a user