1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/include/llvm/CodeGen/MachineCombinerPattern.h
Sanne Wouda 3f70a4c2df [AArch64] Fix over-eager fusing of NEON SIMD MUL/ADD
Summary:
The ISel pattern for SIMD MLA is a bit too eager: it replaces the ADD with an
MLA even when the MUL cannot be eliminated, e.g. when it has another use.  An
MLA is usually has a higher latency than an ADD (and there are fewer pipes
available that can execute it), so trading an MLA for an ADD is not great.

ISel is not taking the number of uses of the MUL result into account, nor any
other factors such as the length of the critical path or other resource pressure.

The MachineCombiner is able to make these judgments so this patch ports the ISel
pattern for MUL/ADD fusing to the MachineCombiner.

Similarly for MUL/SUB -> MLS, as well as the indexed variants.

The change has no impact on SPEC CPU© intrate nor fprate.

Reviewers: dmgreen, SjoerdMeijer, fhahn, Gerolf

Subscribers: kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70673
2019-12-03 15:48:37 +00:00

153 lines
3.4 KiB
C++

//===-- llvm/CodeGen/MachineCombinerPattern.h - Instruction pattern supported by
// combiner ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines instruction pattern supported by combiner
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
#define LLVM_CODEGEN_MACHINECOMBINERPATTERN_H
namespace llvm {
/// These are instruction patterns matched by the machine combiner pass.
enum class MachineCombinerPattern {
// These are commutative variants for reassociating a computation chain. See
// the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp.
REASSOC_AX_BY,
REASSOC_AX_YB,
REASSOC_XA_BY,
REASSOC_XA_YB,
// These are multiply-add patterns matched by the AArch64 machine combiner.
MULADDW_OP1,
MULADDW_OP2,
MULSUBW_OP1,
MULSUBW_OP2,
MULADDWI_OP1,
MULSUBWI_OP1,
MULADDX_OP1,
MULADDX_OP2,
MULSUBX_OP1,
MULSUBX_OP2,
MULADDXI_OP1,
MULSUBXI_OP1,
// NEON integers vectors
MULADDv8i8_OP1,
MULADDv8i8_OP2,
MULADDv16i8_OP1,
MULADDv16i8_OP2,
MULADDv4i16_OP1,
MULADDv4i16_OP2,
MULADDv8i16_OP1,
MULADDv8i16_OP2,
MULADDv2i32_OP1,
MULADDv2i32_OP2,
MULADDv4i32_OP1,
MULADDv4i32_OP2,
MULSUBv8i8_OP1,
MULSUBv8i8_OP2,
MULSUBv16i8_OP1,
MULSUBv16i8_OP2,
MULSUBv4i16_OP1,
MULSUBv4i16_OP2,
MULSUBv8i16_OP1,
MULSUBv8i16_OP2,
MULSUBv2i32_OP1,
MULSUBv2i32_OP2,
MULSUBv4i32_OP1,
MULSUBv4i32_OP2,
MULADDv4i16_indexed_OP1,
MULADDv4i16_indexed_OP2,
MULADDv8i16_indexed_OP1,
MULADDv8i16_indexed_OP2,
MULADDv2i32_indexed_OP1,
MULADDv2i32_indexed_OP2,
MULADDv4i32_indexed_OP1,
MULADDv4i32_indexed_OP2,
MULSUBv4i16_indexed_OP1,
MULSUBv4i16_indexed_OP2,
MULSUBv8i16_indexed_OP1,
MULSUBv8i16_indexed_OP2,
MULSUBv2i32_indexed_OP1,
MULSUBv2i32_indexed_OP2,
MULSUBv4i32_indexed_OP1,
MULSUBv4i32_indexed_OP2,
// Floating Point
FMULADDH_OP1,
FMULADDH_OP2,
FMULSUBH_OP1,
FMULSUBH_OP2,
FMULADDS_OP1,
FMULADDS_OP2,
FMULSUBS_OP1,
FMULSUBS_OP2,
FMULADDD_OP1,
FMULADDD_OP2,
FMULSUBD_OP1,
FMULSUBD_OP2,
FNMULSUBH_OP1,
FNMULSUBS_OP1,
FNMULSUBD_OP1,
FMLAv1i32_indexed_OP1,
FMLAv1i32_indexed_OP2,
FMLAv1i64_indexed_OP1,
FMLAv1i64_indexed_OP2,
FMLAv4f16_OP1,
FMLAv4f16_OP2,
FMLAv8f16_OP1,
FMLAv8f16_OP2,
FMLAv2f32_OP2,
FMLAv2f32_OP1,
FMLAv2f64_OP1,
FMLAv2f64_OP2,
FMLAv4i16_indexed_OP1,
FMLAv4i16_indexed_OP2,
FMLAv8i16_indexed_OP1,
FMLAv8i16_indexed_OP2,
FMLAv2i32_indexed_OP1,
FMLAv2i32_indexed_OP2,
FMLAv2i64_indexed_OP1,
FMLAv2i64_indexed_OP2,
FMLAv4f32_OP1,
FMLAv4f32_OP2,
FMLAv4i32_indexed_OP1,
FMLAv4i32_indexed_OP2,
FMLSv1i32_indexed_OP2,
FMLSv1i64_indexed_OP2,
FMLSv4f16_OP1,
FMLSv4f16_OP2,
FMLSv8f16_OP1,
FMLSv8f16_OP2,
FMLSv2f32_OP1,
FMLSv2f32_OP2,
FMLSv2f64_OP1,
FMLSv2f64_OP2,
FMLSv4i16_indexed_OP1,
FMLSv4i16_indexed_OP2,
FMLSv8i16_indexed_OP1,
FMLSv8i16_indexed_OP2,
FMLSv2i32_indexed_OP1,
FMLSv2i32_indexed_OP2,
FMLSv2i64_indexed_OP1,
FMLSv2i64_indexed_OP2,
FMLSv4f32_OP1,
FMLSv4f32_OP2,
FMLSv4i32_indexed_OP1,
FMLSv4i32_indexed_OP2
};
} // end namespace llvm
#endif