mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[PowerPC] Add the MacroFusion support for Power8
This patch is intend to implement the missing P8 MacroFusion for LLVM according to Power8 User's Manual Section 10.1.12 Instruction Fusion Differential Revision: https://reviews.llvm.org/D70651
This commit is contained in:
parent
85ff68b9dd
commit
f1bb8b8502
@ -33,6 +33,7 @@ add_llvm_target(PowerPCCodeGen
|
||||
PPCMCInstLower.cpp
|
||||
PPCMachineFunctionInfo.cpp
|
||||
PPCMachineScheduler.cpp
|
||||
PPCMacroFusion.cpp
|
||||
PPCMIPeephole.cpp
|
||||
PPCRegisterInfo.cpp
|
||||
PPCQPXLoadSplat.cpp
|
||||
|
@ -166,6 +166,16 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
|
||||
"Enable Hardware Transactional Memory instructions">;
|
||||
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
|
||||
"Implement mftb using the mfspr instruction">;
|
||||
def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
|
||||
"Target supports instruction fusion">;
|
||||
def FeatureAddiLoadFusion : SubtargetFeature<"fuse-addi-load",
|
||||
"HasAddiLoadFusion", "true",
|
||||
"Power8 Addi-Load fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
|
||||
"HasAddisLoadFusion", "true",
|
||||
"Power8 Addis-Load fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureUnalignedFloats :
|
||||
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
|
||||
"true", "CPU does not trap on unaligned FP access">;
|
||||
@ -279,7 +289,8 @@ def ProcessorFeatures {
|
||||
FeatureDirectMove,
|
||||
FeatureICBT,
|
||||
FeaturePartwordAtomic];
|
||||
list<SubtargetFeature> P8SpecificFeatures = [];
|
||||
list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
|
||||
FeatureAddisLoadFusion];
|
||||
list<SubtargetFeature> P8InheritableFeatures =
|
||||
!listconcat(P7InheritableFeatures, P8AdditionalFeatures);
|
||||
list<SubtargetFeature> P8Features =
|
||||
|
203
lib/Target/PowerPC/PPCMacroFusion.cpp
Normal file
203
lib/Target/PowerPC/PPCMacroFusion.cpp
Normal file
@ -0,0 +1,203 @@
|
||||
//===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file This file contains the PowerPC implementation of the DAG scheduling
|
||||
/// mutation to pair instructions back to back.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PPC.h"
|
||||
#include "PPCSubtarget.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/CodeGen/MacroFusion.h"
|
||||
|
||||
using namespace llvm;
|
||||
namespace {
|
||||
|
||||
class FusionFeature {
|
||||
public:
|
||||
typedef SmallDenseSet<unsigned> FusionOpSet;
|
||||
|
||||
enum FusionKind {
|
||||
#define FUSION_KIND(KIND) FK_##KIND
|
||||
#define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \
|
||||
FUSION_KIND(KIND),
|
||||
#include "PPCMacroFusion.def"
|
||||
FUSION_KIND(END)
|
||||
};
|
||||
private:
|
||||
// Each fusion feature is assigned with one fusion kind. All the
|
||||
// instructions with the same fusion kind have the same fusion characteristic.
|
||||
FusionKind Kd;
|
||||
// True if this feature is enabled.
|
||||
bool Supported;
|
||||
// li rx, si
|
||||
// load rt, ra, rx
|
||||
// The dependent operand index in the second op(load). And the negative means
|
||||
// it could be any one.
|
||||
int DepOpIdx;
|
||||
// The first fusion op set.
|
||||
FusionOpSet OpSet1;
|
||||
// The second fusion op set.
|
||||
FusionOpSet OpSet2;
|
||||
public:
|
||||
FusionFeature(FusionKind Kind, bool HasFeature, int Index,
|
||||
const FusionOpSet &First, const FusionOpSet &Second) :
|
||||
Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
|
||||
OpSet2(Second) {}
|
||||
|
||||
bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
|
||||
bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
|
||||
bool isSupported() const { return Supported; }
|
||||
Optional<unsigned> depOpIdx() const {
|
||||
if (DepOpIdx < 0)
|
||||
return None;
|
||||
return DepOpIdx;
|
||||
}
|
||||
|
||||
FusionKind getKind() const { return Kd; }
|
||||
};
|
||||
|
||||
static bool matchingRegOps(const MachineInstr &FirstMI,
|
||||
int FirstMIOpIndex,
|
||||
const MachineInstr &SecondMI,
|
||||
int SecondMIOpIndex) {
|
||||
const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex);
|
||||
const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex);
|
||||
if (!Op1.isReg() || !Op2.isReg())
|
||||
return false;
|
||||
|
||||
return Op1.getReg() == Op2.getReg();
|
||||
}
|
||||
|
||||
// Return true if the FirstMI meets the constraints of SecondMI according to
|
||||
// fusion specification.
|
||||
static bool checkOpConstraints(FusionFeature::FusionKind Kd,
|
||||
const MachineInstr &FirstMI,
|
||||
const MachineInstr &SecondMI) {
|
||||
switch (Kd) {
|
||||
// The hardware didn't require any specific check for the fused instructions'
|
||||
// operands. Therefore, return true to indicate that, it is fusable.
|
||||
default: return true;
|
||||
// [addi rt,ra,si - lxvd2x xt,ra,rb] etc.
|
||||
case FusionFeature::FK_AddiLoad: {
|
||||
// lxvd2x(ra) cannot be zero
|
||||
const MachineOperand &RA = SecondMI.getOperand(1);
|
||||
if (!RA.isReg())
|
||||
return true;
|
||||
|
||||
return Register::isVirtualRegister(RA.getReg()) ||
|
||||
(RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8);
|
||||
}
|
||||
// [addis rt,ra,si - ld rt,ds(ra)] etc.
|
||||
case FusionFeature::FK_AddisLoad: {
|
||||
const MachineOperand &RT = SecondMI.getOperand(0);
|
||||
if (!RT.isReg())
|
||||
return true;
|
||||
|
||||
// Only check it for non-virtual register.
|
||||
if (!Register::isVirtualRegister(RT.getReg()))
|
||||
// addis(rt) = ld(ra) = ld(rt)
|
||||
// ld(rt) cannot be zero
|
||||
if (!matchingRegOps(SecondMI, 0, SecondMI, 2) ||
|
||||
(RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8))
|
||||
return false;
|
||||
|
||||
// addis(si) first 12 bits must be all 1s or all 0s
|
||||
const MachineOperand &SI = FirstMI.getOperand(2);
|
||||
if (!SI.isImm())
|
||||
return true;
|
||||
int64_t Imm = SI.getImm();
|
||||
if (((Imm & 0xFFF0) != 0) || ((Imm & 0xFFF0) != 0xFFF0))
|
||||
return false;
|
||||
|
||||
// If si = 1111111111110000 and the msb of the d/ds field of the load equals
|
||||
// 1, then fusion does not occur.
|
||||
if ((Imm & 0xFFF0) == 0xFFF0) {
|
||||
const MachineOperand &D = SecondMI.getOperand(1);
|
||||
if (!D.isImm())
|
||||
return true;
|
||||
|
||||
// 14 bit for DS field, while 16 bit for D field.
|
||||
int MSB = 15;
|
||||
if (SecondMI.getOpcode() == PPC::LD)
|
||||
MSB = 13;
|
||||
|
||||
return (D.getImm() & (1ULL << MSB)) == 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
llvm_unreachable("All the cases should have been handled");
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Check if the instr pair, FirstMI and SecondMI, should be fused together.
|
||||
/// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be
|
||||
/// part of a fused pair at all.
|
||||
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
|
||||
const TargetSubtargetInfo &TSI,
|
||||
const MachineInstr *FirstMI,
|
||||
const MachineInstr &SecondMI) {
|
||||
// We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in
|
||||
// the def file.
|
||||
using namespace PPC;
|
||||
|
||||
const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI);
|
||||
static const FusionFeature FusionFeatures[] = {
|
||||
#define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \
|
||||
FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\
|
||||
{ OPSET2 } },
|
||||
#include "PPCMacroFusion.def"
|
||||
};
|
||||
#undef FUSION_KIND
|
||||
|
||||
for (auto &Feature : FusionFeatures) {
|
||||
// Skip if the feature is not supported.
|
||||
if (!Feature.isSupported())
|
||||
continue;
|
||||
|
||||
// Only when the SecondMI is fusable, we are starting to look for the
|
||||
// fusable FirstMI.
|
||||
if (Feature.hasOp2(SecondMI.getOpcode())) {
|
||||
// If FirstMI == nullptr, that means, we're only checking whether SecondMI
|
||||
// can be fused at all.
|
||||
if (!FirstMI)
|
||||
return true;
|
||||
|
||||
// Checking if the FirstMI is fusable with the SecondMI.
|
||||
if (!Feature.hasOp1(FirstMI->getOpcode()))
|
||||
continue;
|
||||
|
||||
auto DepOpIdx = Feature.depOpIdx();
|
||||
if (DepOpIdx.hasValue()) {
|
||||
// Checking if the result of the FirstMI is the desired operand of the
|
||||
// SecondMI if the DepOpIdx is set. Otherwise, ignore it.
|
||||
if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Checking more on the instruction operands.
|
||||
if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
namespace llvm {
|
||||
|
||||
std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
|
||||
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
45
lib/Target/PowerPC/PPCMacroFusion.def
Normal file
45
lib/Target/PowerPC/PPCMacroFusion.def
Normal file
@ -0,0 +1,45 @@
|
||||
//=== ---- PPCMacroFusion.def - PowerPC MacroFuson Candidates -v-*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https)//llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains descriptions of the macro-fusion pair for PowerPC.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// NOTE: NO INCLUDE GUARD DESIRED!
|
||||
|
||||
#ifndef FUSION_FEATURE
|
||||
|
||||
// Each FUSION_FEATURE is assigned with one TYPE, and can be enabled/disabled
|
||||
// by HAS_FEATURE. The instructions pair is fusable only when the opcode
|
||||
// of the first instruction is in OPSET1, and the second instruction opcode is
|
||||
// in OPSET2. And if DEP_OP_IDX >=0, we will check the result of first OP is
|
||||
// the operand of the second op with DEP_OP_IDX as its operand index. We assume
|
||||
// that the result of the first op is its operand zero.
|
||||
#define FUSION_FEATURE(TYPE, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2)
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef FUSION_OP_SET
|
||||
#define FUSION_OP_SET(...) __VA_ARGS__
|
||||
#endif
|
||||
|
||||
// Power8 User Manual Section 10.1.12, Instruction Fusion
|
||||
// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
|
||||
// lvewx, lvx, lxsdx}
|
||||
FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
|
||||
FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \
|
||||
FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
|
||||
LVX, LXSDX))
|
||||
|
||||
// {addis) followed by one of these {ld, lbz, lhz, lwz}
|
||||
FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
|
||||
FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
|
||||
FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
|
||||
|
||||
#undef FUSION_FEATURE
|
||||
#undef FUSION_OP_SET
|
22
lib/Target/PowerPC/PPCMacroFusion.h
Normal file
22
lib/Target/PowerPC/PPCMacroFusion.h
Normal file
@ -0,0 +1,22 @@
|
||||
//===- PPCMacroFusion.h - PowerPC Macro Fusion ----------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file This file contains the PowerPC definition of the DAG scheduling
|
||||
/// mutation to pair instructions back to back.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// Note that you have to add:
|
||||
/// DAG.addMutation(createPowerPCMacroFusionDAGMutation());
|
||||
/// to PPCPassConfig::createMachineScheduler() to have an effect.
|
||||
std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation();
|
||||
} // llvm
|
@ -111,6 +111,9 @@ void PPCSubtarget::initializeEnvironment() {
|
||||
IsQPXStackUnaligned = false;
|
||||
HasHTM = false;
|
||||
HasFloat128 = false;
|
||||
HasFusion = false;
|
||||
HasAddiLoadFusion = false;
|
||||
HasAddisLoadFusion = false;
|
||||
IsISA3_0 = false;
|
||||
UseLongCalls = false;
|
||||
SecurePlt = false;
|
||||
|
@ -135,6 +135,9 @@ protected:
|
||||
bool HasDirectMove;
|
||||
bool HasHTM;
|
||||
bool HasFloat128;
|
||||
bool HasFusion;
|
||||
bool HasAddiLoadFusion;
|
||||
bool HasAddisLoadFusion;
|
||||
bool IsISA3_0;
|
||||
bool UseLongCalls;
|
||||
bool SecurePlt;
|
||||
@ -306,6 +309,9 @@ public:
|
||||
bool hasFloat128() const { return HasFloat128; }
|
||||
bool isISA3_0() const { return IsISA3_0; }
|
||||
bool useLongCalls() const { return UseLongCalls; }
|
||||
bool hasFusion() const { return HasFusion; }
|
||||
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
|
||||
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
|
||||
bool needsSwapsForVSXMemOps() const {
|
||||
return hasVSX() && isLittleEndian() && !hasP9Vector();
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "MCTargetDesc/PPCMCTargetDesc.h"
|
||||
#include "PPC.h"
|
||||
#include "PPCMachineScheduler.h"
|
||||
#include "PPCMacroFusion.h"
|
||||
#include "PPCSubtarget.h"
|
||||
#include "PPCTargetObjectFile.h"
|
||||
#include "PPCTargetTransformInfo.h"
|
||||
@ -275,6 +276,9 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
|
||||
std::make_unique<GenericScheduler>(C));
|
||||
// add DAG Mutations here.
|
||||
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
|
||||
if (ST.hasFusion())
|
||||
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
|
||||
|
||||
return DAG;
|
||||
}
|
||||
|
||||
@ -286,6 +290,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
|
||||
std::make_unique<PPCPostRASchedStrategy>(C) :
|
||||
std::make_unique<PostGenericScheduler>(C), true);
|
||||
// add DAG Mutations here.
|
||||
if (ST.hasFusion())
|
||||
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
|
||||
return DAG;
|
||||
}
|
||||
|
||||
|
21
test/CodeGen/PowerPC/macro-fusion.ll
Normal file
21
test/CodeGen/PowerPC/macro-fusion.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-misched -debug-only=machine-scheduler \
|
||||
; RUN: -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P8
|
||||
|
||||
@m = local_unnamed_addr global i64 0, align 8
|
||||
|
||||
define i64 @fuse_addis_ld() {
|
||||
entry:
|
||||
; CHECK-P8: ********** MI Scheduling **********
|
||||
; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry
|
||||
; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD
|
||||
; CHECK-P8: SU([[SU0]]): %[[REG3:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @m
|
||||
; CHECK-P8: SU([[SU1]]): %{{[0-9]+}}:g8rc = LD target-flags(ppc-toc-lo) @m, %[[REG3]]
|
||||
; CHECK-P8: ********** MI Scheduling **********
|
||||
; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry
|
||||
; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD
|
||||
; CHECK-P8: SU([[SU0]]): renamable $x[[REG3:[0-9]+]] = ADDIStocHA8 $x2, @m
|
||||
; CHECK-P8: SU([[SU1]]): renamable $x[[REG3]] = LD target-flags(ppc-toc-lo) @m, renamable $x[[REG3]]
|
||||
%0 = load i64, i64* @m, align 8
|
||||
ret i64 %0
|
||||
}
|
Loading…
Reference in New Issue
Block a user