1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[ARM] Add new target feature to fuse literal generation

This feature enables the fusion of such operations on Cortex A57 and Cortex
A72, as recommended in their Software Optimisation Guides, sections 4.14 and
4.11, respectively.

Differential revision: https://reviews.llvm.org/D49563

llvm-svn: 338147
This commit is contained in:
Evandro Menezes 2018-07-27 18:16:47 +00:00
parent 9f289a7b66
commit f363e3bc43
4 changed files with 94 additions and 19 deletions

View File

@ -141,6 +141,10 @@ def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
// Fast execution of bottom and top halves of literal generation
def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
// The way of reading thread pointer
def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
"Reading thread pointer from register">;

View File

@ -19,6 +19,47 @@
namespace llvm {
// Fuse AES crypto encoding or decoding.
static bool isAESPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
unsigned FirstOpcode =
FirstMI ? FirstMI->getOpcode()
: static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
unsigned SecondOpcode = SecondMI.getOpcode();
switch(SecondOpcode) {
// AES encode.
case ARM::AESMC :
return FirstOpcode == ARM::AESE ||
FirstOpcode == ARM::INSTRUCTION_LIST_END;
// AES decode.
case ARM::AESIMC:
return FirstOpcode == ARM::AESD ||
FirstOpcode == ARM::INSTRUCTION_LIST_END;
}
return false;
}
// Fuse literal generation.
static bool isLiteralsPair(const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
unsigned FirstOpcode =
FirstMI ? FirstMI->getOpcode()
: static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
unsigned SecondOpcode = SecondMI.getOpcode();
// 32 bit immediate.
if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
FirstOpcode == ARM::MOVi16) &&
SecondOpcode == ARM::MOVTi16)
return true;
return false;
}
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
@ -28,24 +69,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr &SecondMI) {
const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
// Assume wildcards for unspecified instrs.
unsigned FirstOpcode =
FirstMI ? FirstMI->getOpcode()
: static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
unsigned SecondOpcode = SecondMI.getOpcode();
if (ST.hasFuseAES())
// Fuse AES crypto operations.
switch(SecondOpcode) {
// AES encode.
case ARM::AESMC :
return FirstOpcode == ARM::AESE ||
FirstOpcode == ARM::INSTRUCTION_LIST_END;
// AES decode.
case ARM::AESIMC:
return FirstOpcode == ARM::AESD ||
FirstOpcode == ARM::INSTRUCTION_LIST_END;
}
if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
return true;
return false;
}

View File

@ -327,6 +327,10 @@ protected:
/// pairs faster.
bool HasFuseAES = false;
/// HasFuseLiterals - if true, processor executes back to back
/// bottom and top halves of literal generation faster.
bool HasFuseLiterals = false;
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@ -616,8 +620,9 @@ public:
bool hasFullFP16() const { return HasFullFP16; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const { return hasFuseAES(); }
bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
const Triple &getTargetTriple() const { return TargetTriple; }

View File

@ -0,0 +1,39 @@
; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=-fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=+fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
@g = common global i32* zeroinitializer
define i32* @litp(i32 %a, i32 %b) {
entry:
%add = add nsw i32 %b, %a
%ptr = getelementptr i32, i32* bitcast (i32* (i32, i32)* @litp to i32*), i32 %add
%res = getelementptr i32, i32* bitcast (i32** @g to i32*), i32 %add
store i32* %ptr, i32** @g, align 4
ret i32* %res
; CHECK-LABEL: litp:
; CHECK: movw [[R:r[0-9]+]], :lower16:litp
; CHECKDONT-NEXT: movw [[S:r[0-9]+]], :lower16:g
; CHECKFUSE-NEXT: movt [[R]], :upper16:litp
; CHECKFUSE-NEXT: movw [[S:r[0-9]+]], :lower16:g
; CHECKFUSE-NEXT: movt [[S]], :upper16:g
}
define i32 @liti(i32 %a, i32 %b) {
entry:
%adda = add i32 %a, -262095121
%add1 = add i32 %adda, %b
%addb = add i32 %b, 121110837
%add2 = add i32 %addb, %a
store i32 %add1, i32* bitcast (i32** @g to i32*), align 4
ret i32 %add2
; CHECK-LABEL: liti:
; CHECK: movw [[R:r[0-9]+]], #309
; CHECKDONT-NEXT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}
; CHECKFUSE-NEXT: movt [[R]], #1848
; CHECKFUSE: movw [[S:r[0-9]+]], :lower16:g
; CHECKFUSE-NEXT: movt [[S]], :upper16:g
; CHECKFUSE-NEXT: movw [[T:r[0-9]+]], #48879
; CHECKFUSE-NEXT: movt [[T]], #61536
}