1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

GlobalISel: Introduce GenericMachineInstr classes and derivatives for idiomatic LLVM RTTI.

This adds some level of type safety, allows helper functions to be added for
specific opcodes for free, and also allows us to succinctly check for class
membership with the usual dyn_cast/isa/cast functions.

To start off with, add variants for the different load/store operations with some
places using it.

Differential Revision: https://reviews.llvm.org/D105751
This commit is contained in:
Amara Emerson 2021-07-09 15:48:47 -07:00
parent b7cade9437
commit e11b55a90a
6 changed files with 252 additions and 114 deletions

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Alignment.h"
@ -596,7 +597,7 @@ private:
/// \param [in] MemSizeInBits - The number of bits each load should produce.
///
/// \returns The lowest-index load found and the lowest index on success.
Optional<std::pair<MachineInstr *, int64_t>> findLoadOffsetsForLoadOrCombine(
Optional<std::pair<GZExtLoad *, int64_t>> findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit,
const unsigned MemSizeInBits);

View File

@ -0,0 +1,141 @@
//===- llvm/CodeGen/GlobalISel/GenericMachineInstrs.h -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// Declares convenience wrapper classes for interpreting MachineInstr instances
/// as specific generic operations.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/Support/Casting.h"
namespace llvm {
/// A base class for all GenericMachineInstrs.
class GenericMachineInstr : public MachineInstr {
public:
GenericMachineInstr() = delete;
/// Access the Idx'th operand as a register and return it.
/// This assumes that the Idx'th operand is a Register type.
Register getReg(unsigned Idx) { return getOperand(Idx).getReg(); }
static bool classof(const MachineInstr *MI) {
return isPreISelGenericOpcode(MI->getOpcode());
}
};
/// Represents any type of generic load or store.
/// G_LOAD, G_STORE, G_ZEXTLOAD, G_SEXTLOAD.
class GLoadStore : public GenericMachineInstr {
public:
/// Get the source register of the pointer value.
Register getPointerReg() const { return getOperand(1).getReg(); }
/// Get the MachineMemOperand on this instruction.
MachineMemOperand &getMMO() const { return **memoperands_begin(); }
/// Returns true if the attached MachineMemOperand has the atomic flag set.
bool isAtomic() const { return getMMO().isAtomic(); }
/// Returns true if the attached MachineMemOpeand as the volatile flag set.
bool isVolatile() const { return getMMO().isVolatile(); }
/// Returns true if the memory operation is neither atomic or volatile.
bool isSimple() const { return !isAtomic() && !isVolatile(); }
/// Returns true if this memory operation doesn't have any ordering
/// constraints other than normal aliasing. Volatile and (ordered) atomic
/// memory operations can't be reordered.
bool isUnordered() const { return getMMO().isUnordered(); }
/// Returns the size in bytes of the memory access.
uint64_t getMemSize() { return getMMO().getSize();
} /// Returns the size in bits of the memory access.
uint64_t getMemSizeInBits() { return getMMO().getSizeInBits(); }
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_SEXTLOAD:
return true;
default:
return false;
}
}
};
/// Represents any generic load, including sign/zero extending variants.
class GAnyLoad : public GLoadStore {
public:
/// Get the definition register of the loaded value.
Register getDstReg() const { return getOperand(0).getReg(); }
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_SEXTLOAD:
return true;
default:
return false;
}
}
};
/// Represents a G_LOAD.
class GLoad : public GAnyLoad {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_LOAD;
}
};
/// Represents either a G_SEXTLOAD or G_ZEXTLOAD.
class GExtLoad : public GAnyLoad {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_SEXTLOAD ||
MI->getOpcode() == TargetOpcode::G_ZEXTLOAD;
}
};
/// Represents a G_SEXTLOAD.
class GSExtLoad : public GExtLoad {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_SEXTLOAD;
}
};
/// Represents a G_ZEXTLOAD.
class GZExtLoad : public GExtLoad {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_ZEXTLOAD;
}
};
/// Represents a G_STORE.
class GStore : public GLoadStore {
public:
/// Get the stored value register.
Register getValueReg() const { return getOperand(0).getReg(); }
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_STORE;
}
};
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H

View File

@ -21,6 +21,7 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERHELPER_H
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -308,8 +309,8 @@ public:
LegalizeResult fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy);
LegalizeResult
reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx,
LLT NarrowTy);
/// Legalize an instruction by reducing the operation width, either by
/// narrowing the type of the operation or by reducing the number of elements
@ -357,8 +358,8 @@ public:
LLT CastTy);
LegalizeResult lowerBitcast(MachineInstr &MI);
LegalizeResult lowerLoad(MachineInstr &MI);
LegalizeResult lowerStore(MachineInstr &MI);
LegalizeResult lowerLoad(GAnyLoad &MI);
LegalizeResult lowerStore(GStore &MI);
LegalizeResult lowerBitCount(MachineInstr &MI);
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);

View File

@ -230,6 +230,16 @@ MachineInstr *getDefIgnoringCopies(Register Reg,
/// Also walks through hints such as G_ASSERT_ZEXT.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI);
// Templated variant of getOpcodeDef returning a MachineInstr derived T.
/// See if Reg is defined by an single def instruction of type T
/// Also try to do trivial folding if it's a COPY with
/// same types. Returns null otherwise.
template <class T>
T *getOpcodeDef(Register Reg, const MachineRegisterInfo &MRI) {
MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
return dyn_cast_or_null<T>(DefMI);
}
/// Returns an APFloat from Val converted to the appropriate size.
APFloat getAPFloatFromSize(double Val, unsigned Size);

View File

@ -11,6 +11,7 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@ -441,16 +442,13 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// to find a safe place to sink it) whereas the extend is freely movable.
// It also prevents us from duplicating the load for the volatile case or just
// for performance.
if (MI.getOpcode() != TargetOpcode::G_LOAD &&
MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
if (!LoadMI)
return false;
auto &LoadValue = MI.getOperand(0);
assert(LoadValue.isReg() && "Result wasn't a register?");
Register LoadReg = LoadMI->getDstReg();
LLT LoadValueTy = MRI.getType(LoadValue.getReg());
LLT LoadValueTy = MRI.getType(LoadReg);
if (!LoadValueTy.isScalar())
return false;
@ -472,17 +470,16 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// and emit a variant of (extend (trunc X)) for the others according to the
// relative type sizes. At the same time, pick an extend to use based on the
// extend involved in the chosen type.
unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
? TargetOpcode::G_ANYEXT
: MI.getOpcode() == TargetOpcode::G_SEXTLOAD
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
unsigned PreferredOpcode =
isa<GLoad>(&MI)
? TargetOpcode::G_ANYEXT
: isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
Preferred = {LLT(), PreferredOpcode, nullptr};
for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
(UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
const auto &MMO = **MI.memoperands_begin();
const auto &MMO = LoadMI->getMMO();
// For atomics, only form anyextending loads.
if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
continue;
@ -493,9 +490,9 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
MMDesc.AlignInBits = MMO.getAlign().value() * 8;
MMDesc.Ordering = MMO.getSuccessOrdering();
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
LegalizeActions::Legal)
LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
.Action != LegalizeActions::Legal)
continue;
}
Preferred = ChoosePreferredUse(Preferred,
@ -668,12 +665,12 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
uint64_t SizeInBits = MI.getOperand(2).getImm();
// If the source is a G_SEXTLOAD from the same bit width, then we don't
// need any extend at all, just a truncate.
if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
const auto &MMO = **LoadMI->memoperands_begin();
if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
// If truncating more than the original extended value, abort.
if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
auto LoadSizeBits = LoadMI->getMemSizeInBits();
if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
return false;
if (MMO.getSizeInBits() == SizeInBits)
if (LoadSizeBits == SizeInBits)
return true;
}
return false;
@ -695,20 +692,16 @@ bool CombinerHelper::matchSextInRegOfLoad(
return false;
Register SrcReg = MI.getOperand(1).getReg();
MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
!LoadDef->isSimple())
return false;
// If the sign extend extends from a narrower width than the load's width,
// then we can narrow the load width when we combine to a G_SEXTLOAD.
auto &MMO = **LoadDef->memoperands_begin();
// Don't do this for non-simple loads.
if (MMO.isAtomic() || MMO.isVolatile())
return false;
// Avoid widening the load at all.
unsigned NewSizeBits =
std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
LoadDef->getMemSizeInBits());
// Don't generate G_SEXTLOADs with a < 1 byte width.
if (NewSizeBits < 8)
@ -717,7 +710,7 @@ bool CombinerHelper::matchSextInRegOfLoad(
// anyway for most targets.
if (!isPowerOf2_32(NewSizeBits))
return false;
MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
return true;
}
@ -727,8 +720,7 @@ void CombinerHelper::applySextInRegOfLoad(
Register LoadReg;
unsigned ScalarSizeBits;
std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
auto *LoadDef = MRI.getVRegDef(LoadReg);
assert(LoadDef && "Expected a load reg");
GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
// If we have the following:
// %ld = G_LOAD %ptr, (load 2)
@ -736,13 +728,13 @@ void CombinerHelper::applySextInRegOfLoad(
// ==>
// %ld = G_SEXTLOAD %ptr (load 1)
auto &MMO = **LoadDef->memoperands_begin();
auto &MMO = LoadDef->getMMO();
Builder.setInstrAndDebugLoc(*LoadDef);
auto &MF = Builder.getMF();
auto PtrInfo = MMO.getPointerInfo();
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
LoadDef->getOperand(1).getReg(), *NewMMO);
LoadDef->getPointerReg(), *NewMMO);
MI.eraseFromParent();
}
@ -3436,7 +3428,7 @@ CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
/// e.g. x[i] << 24
///
/// \returns The load instruction and the byte offset it is moved into.
static Optional<std::pair<MachineInstr *, int64_t>>
static Optional<std::pair<GZExtLoad *, int64_t>>
matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
const MachineRegisterInfo &MRI) {
assert(MRI.hasOneNonDBGUse(Reg) &&
@ -3453,18 +3445,17 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
return None;
// TODO: Handle other types of loads.
auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
if (!Load)
return None;
const auto &MMO = **Load->memoperands_begin();
if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
return None;
return std::make_pair(Load, Shift / MemSizeInBits);
}
Optional<std::pair<MachineInstr *, int64_t>>
Optional<std::pair<GZExtLoad *, int64_t>>
CombinerHelper::findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
@ -3476,7 +3467,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
int64_t LowestIdx = INT64_MAX;
// The load which uses the lowest index.
MachineInstr *LowestIdxLoad = nullptr;
GZExtLoad *LowestIdxLoad = nullptr;
// Keeps track of the load indices we see. We shouldn't see any indices twice.
SmallSet<int64_t, 8> SeenIdx;
@ -3505,7 +3496,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
if (!LoadAndPos)
return None;
MachineInstr *Load;
GZExtLoad *Load;
int64_t DstPos;
std::tie(Load, DstPos) = *LoadAndPos;
@ -3518,10 +3509,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
return None;
// Make sure that the MachineMemOperands of every seen load are compatible.
const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
auto &LoadMMO = Load->getMMO();
if (!MMO)
MMO = LoadMMO;
if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
MMO = &LoadMMO;
if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
return None;
// Find out what the base pointer and index for the load is.
@ -3643,7 +3634,7 @@ bool CombinerHelper::matchLoadOrCombine(
// Also verify that each of these ends up putting a[i] into the same memory
// offset as a load into a wide type would.
SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
MachineInstr *LowestIdxLoad;
GZExtLoad *LowestIdxLoad;
int64_t LowestIdx;
auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
@ -3683,8 +3674,8 @@ bool CombinerHelper::matchLoadOrCombine(
// We wil reuse the pointer from the load which ends up at byte offset 0. It
// may not use index 0.
Register Ptr = LowestIdxLoad->getOperand(1).getReg();
const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
Register Ptr = LowestIdxLoad->getPointerReg();
const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
LegalityQuery::MemDesc MMDesc;
MMDesc.MemoryTy = Ty;
MMDesc.AlignInBits = MMO.getAlign().value() * 8;

View File

@ -893,53 +893,53 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
auto &MMO = **MI.memoperands_begin();
Register DstReg = MI.getOperand(0).getReg();
auto &LoadMI = cast<GLoad>(MI);
Register DstReg = LoadMI.getDstReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
MIRBuilder.buildAnyExt(DstReg, TmpReg);
MI.eraseFromParent();
LoadMI.eraseFromParent();
return Legalized;
}
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_SEXTLOAD: {
bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
Register DstReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
auto &LoadMI = cast<GExtLoad>(MI);
Register DstReg = LoadMI.getDstReg();
Register PtrReg = LoadMI.getPointerReg();
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
auto &MMO = LoadMI.getMMO();
unsigned MemSize = MMO.getSizeInBits();
if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
} else if (MemSize < NarrowSize) {
MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
} else if (MemSize > NarrowSize) {
// FIXME: Need to split the load.
return UnableToLegalize;
}
if (ZExt)
if (isa<GZExtLoad>(LoadMI))
MIRBuilder.buildZExt(DstReg, TmpReg);
else
MIRBuilder.buildSExt(DstReg, TmpReg);
MI.eraseFromParent();
LoadMI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_STORE: {
const auto &MMO = **MI.memoperands_begin();
auto &StoreMI = cast<GStore>(MI);
Register SrcReg = MI.getOperand(0).getReg();
Register SrcReg = StoreMI.getValueReg();
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return UnableToLegalize;
@ -950,16 +950,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
MIRBuilder.buildTrunc(TmpReg, SrcReg);
MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
MI.eraseFromParent();
MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
StoreMI.eraseFromParent();
return Legalized;
}
return reduceLoadStoreWidth(MI, 0, NarrowTy);
return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
}
case TargetOpcode::G_SELECT:
return narrowScalarSelect(MI, TypeIdx, NarrowTy);
@ -2797,13 +2796,12 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerLoad(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
Register DstReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
Register DstReg = LoadMI.getDstReg();
Register PtrReg = LoadMI.getPointerReg();
LLT DstTy = MRI.getType(DstReg);
MachineMemOperand &MMO = **MI.memoperands_begin();
MachineMemOperand &MMO = LoadMI.getMMO();
LLT MemTy = MMO.getMemoryType();
MachineFunction &MF = MIRBuilder.getMF();
if (MemTy.isVector())
@ -2829,11 +2827,10 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
}
if (MI.getOpcode() == TargetOpcode::G_SEXTLOAD) {
if (isa<GSExtLoad>(LoadMI)) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
} else if (MI.getOpcode() == TargetOpcode::G_ZEXTLOAD ||
WideMemTy == DstTy) {
} else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
// The extra bits are guaranteed to be zero, since we stored them that
// way. A zext load from Wide thus automatically gives zext from MemVT.
@ -2845,7 +2842,7 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
if (DstTy != LoadTy)
MIRBuilder.buildTrunc(DstReg, LoadReg);
MI.eraseFromParent();
LoadMI.eraseFromParent();
return Legalized;
}
@ -2875,23 +2872,22 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =
MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
LLT PtrTy = MRI.getType(PtrReg);
unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
LLT AnyExtTy = LLT::scalar(AnyExtSize);
auto LargeLoad = MIRBuilder.buildLoadInstr(
TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
PtrReg, *LargeMMO);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
auto SmallLoad = MIRBuilder.buildLoadInstr(
MI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO);
auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
SmallPtr, *SmallMMO);
auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
@ -2903,23 +2899,22 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
MIRBuilder.buildTrunc(DstReg, {Or});
}
MI.eraseFromParent();
LoadMI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerStore(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
// Lower a non-power of 2 store into multiple pow-2 stores.
// E.g. split an i24 store into an i16 store + i8 store.
// We do this by first extending the stored value to the next largest power
// of 2 type, and then using truncating stores to store the components.
// By doing this, likewise with G_LOAD, generate an extend that can be
// artifact-combined away instead of leaving behind extracts.
Register SrcReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
Register SrcReg = StoreMI.getValueReg();
Register PtrReg = StoreMI.getPointerReg();
LLT SrcTy = MRI.getType(SrcReg);
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand &MMO = **MI.memoperands_begin();
MachineMemOperand &MMO = **StoreMI.memoperands_begin();
LLT MemTy = MMO.getMemoryType();
if (SrcTy.isVector())
@ -2945,7 +2940,7 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
MI.eraseFromParent();
StoreMI.eraseFromParent();
return Legalized;
}
@ -2976,7 +2971,7 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
MI.eraseFromParent();
StoreMI.eraseFromParent();
return Legalized;
}
@ -3167,9 +3162,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
return lowerLoad(MI);
return lowerLoad(cast<GAnyLoad>(MI));
case TargetOpcode::G_STORE:
return lowerStore(MI);
return lowerStore(cast<GStore>(MI));
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@ -3966,26 +3961,24 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
}
LegalizerHelper::LegalizeResult
LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
if (TypeIdx != 0)
return UnableToLegalize;
MachineMemOperand *MMO = *MI.memoperands_begin();
// This implementation doesn't work for atomics. Give up instead of doing
// something invalid.
if (MMO->isAtomic())
if (LdStMI.isAtomic())
return UnableToLegalize;
bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
Register ValReg = MI.getOperand(0).getReg();
Register AddrReg = MI.getOperand(1).getReg();
bool IsLoad = isa<GLoad>(LdStMI);
Register ValReg = LdStMI.getReg(0);
Register AddrReg = LdStMI.getPointerReg();
LLT ValTy = MRI.getType(ValReg);
// FIXME: Do we need a distinct NarrowMemory legalize action?
if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
return UnableToLegalize;
}
@ -4016,6 +4009,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
// is a load, return the new registers in ValRegs. For a store, each elements
// of ValRegs should be PartTy. Returns the next offset that needs to be
// handled.
auto MMO = LdStMI.getMMO();
auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
unsigned Offset) -> unsigned {
MachineFunction &MF = MIRBuilder.getMF();
@ -4028,7 +4022,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(MMO, ByteOffset, PartTy);
MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
if (IsLoad) {
Register Dst = MRI.createGenericVirtualRegister(PartTy);
@ -4053,7 +4047,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
LeftoverTy, NarrowLeftoverRegs);
}
MI.eraseFromParent();
LdStMI.eraseFromParent();
return Legalized;
}
@ -4327,7 +4321,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
GISEL_VECREDUCE_CASES_NONSEQ