1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AArch64][GlobalISel] Fold constants into G_GLOBAL_VALUE

This is pretty much just ports `performGlobalAddressCombine` from
AArch64ISelLowering. (AArch64 doesn't use the generic DAG combine for this.)

This adds a pre-legalize combine which looks for this pattern:

```
  %g = G_GLOBAL_VALUE @x
  %ptr1 = G_PTR_ADD %g, cst1
  %ptr2 = G_PTR_ADD %g, cst2
  ...
  %ptrN = G_PTR_ADD %g, cstN
```

And then, if possible, transforms it like so:

```
  %g = G_GLOBAL_VALUE @x
  %offset_g = G_PTR_ADD %g, -min(cst)
  %ptr1 = G_PTR_ADD %offset_g, cst1
  %ptr2 = G_PTR_ADD %offset_g, cst2
  ...
  %ptrN = G_PTR_ADD %offset_g, cstN
```

Where min(cst) is the smallest out of the G_PTR_ADD constants.

This means we should save at least one G_PTR_ADD.

This also updates code in the legalizer + selector which assumes that
G_GLOBAL_VALUE will never have an offset and adds/updates relevant tests.

Differential Revision: https://reviews.llvm.org/D96624
This commit is contained in:
Jessica Paquette 2021-02-11 17:00:00 -08:00
parent 42bd4878de
commit 1f262f8352
12 changed files with 955 additions and 50 deletions

View File

@ -24,10 +24,20 @@ def icmp_redundant_trunc : GICombineRule<
[{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
(apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
// AArch64-specific offset folding for G_GLOBAL_VALUE.
def fold_global_offset_matchdata : GIDefMatchData<"std::pair<uint64_t, uint64_t>">;
def fold_global_offset : GICombineRule<
(defs root:$root, fold_global_offset_matchdata:$matchinfo),
(match (wip_match_opcode G_GLOBAL_VALUE):$root,
[{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
(apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
>;
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
fconstant_to_constant,
icmp_redundant_trunc]> {
icmp_redundant_trunc,
fold_global_offset]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
let StateClass = "AArch64PreLegalizerCombinerHelperState";
let AdditionalArguments = [];

View File

@ -5628,8 +5628,10 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
return None;
// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
// TODO: Need to check GV's offset % size if doing offset folding into globals.
assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
auto Offset = Adrp.getOperand(1).getOffset();
if (Offset % Size != 0)
return None;
auto GV = Adrp.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return None;
@ -5643,7 +5645,7 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
Register AdrpReg = Adrp.getOperand(0).getReg();
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addGlobalAddress(GV, /* Offset */ 0,
MIB.addGlobalAddress(GV, Offset,
OpFlags | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
}}};

View File

@ -781,7 +781,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// G_ADD_LOW instructions.
// By splitting this here, we can optimize accesses in the small code model by
// folding in the G_ADD_LOW into the load/store offset.
auto GV = MI.getOperand(1).getGlobal();
auto &GlobalOp = MI.getOperand(1);
const auto* GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return true; // Don't want to modify TLS vars.
@ -791,9 +792,10 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
if (OpFlags & AArch64II::MO_GOT)
return true;
auto Offset = GlobalOp.getOffset();
Register DstReg = MI.getOperand(0).getReg();
auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
.addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
// Set the regclass on the dest reg too.
MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
@ -811,6 +813,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// binary must also be loaded into address range [0, 2^48). Both of these
// properties need to be ensured at runtime when using tagged addresses.
if (OpFlags & AArch64II::MO_TAGGED) {
assert(!Offset &&
"Should not have folded in an offset for a tagged global!");
ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
.addGlobalAddress(GV, 0x100000000,
AArch64II::MO_PREL | AArch64II::MO_G3)
@ -819,7 +823,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
}
MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
.addGlobalAddress(GV, 0,
.addGlobalAddress(GV, Offset,
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
MI.eraseFromParent();
return true;

View File

@ -107,6 +107,116 @@ static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
///
/// e.g.
///
/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
std::pair<uint64_t, uint64_t> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
MachineFunction &MF = *MI.getMF();
auto &GlobalOp = MI.getOperand(1);
auto *GV = GlobalOp.getGlobal();
// Don't allow anything that could represent offsets etc.
if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
return false;
// Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
//
// %g = G_GLOBAL_VALUE @x
// %ptr1 = G_PTR_ADD %g, cst1
// %ptr2 = G_PTR_ADD %g, cst2
// ...
// %ptrN = G_PTR_ADD %g, cstN
//
// Identify the *smallest* constant. We want to be able to form this:
//
// %offset_g = G_GLOBAL_VALUE @x + min_cst
// %g = G_PTR_ADD %offset_g, -min_cst
// %ptr1 = G_PTR_ADD %g, cst1
// ...
Register Dst = MI.getOperand(0).getReg();
uint64_t MinOffset = -1ull;
for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
auto Cst =
getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
}
// Require that the new offset is larger than the existing one to avoid
// infinite loops.
uint64_t CurrOffset = GlobalOp.getOffset();
uint64_t NewOffset = MinOffset + CurrOffset;
if (NewOffset <= CurrOffset)
return false;
// Check whether folding this offset is legal. It must not go out of bounds of
// the referenced object to avoid violating the code model, and must be
// smaller than 2^21 because this is the largest offset expressible in all
// object formats.
//
// This check also prevents us from folding negative offsets, which will end
// up being treated in the same way as large positive ones. They could also
// cause code model violations, and aren't really common enough to matter.
if (NewOffset >= (1 << 21))
return false;
Type *T = GV->getValueType();
if (!T->isSized() ||
NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
return false;
MatchInfo = std::make_pair(NewOffset, MinOffset);
return true;
}
static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
GISelChangeObserver &Observer,
std::pair<uint64_t, uint64_t> &MatchInfo) {
// Change:
//
// %g = G_GLOBAL_VALUE @x
// %ptr1 = G_PTR_ADD %g, cst1
// %ptr2 = G_PTR_ADD %g, cst2
// ...
// %ptrN = G_PTR_ADD %g, cstN
//
// To:
//
// %offset_g = G_GLOBAL_VALUE @x + min_cst
// %g = G_PTR_ADD %offset_g, -min_cst
// %ptr1 = G_PTR_ADD %g, cst1
// ...
// %ptrN = G_PTR_ADD %g, cstN
//
// Then, the original G_PTR_ADDs should be folded later on so that they look
// like this:
//
// %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
uint64_t Offset, MinOffset;
std::tie(Offset, MinOffset) = MatchInfo;
B.setInstrAndDebugLoc(MI);
Observer.changingInstr(MI);
auto &GlobalOp = MI.getOperand(1);
auto *GV = GlobalOp.getGlobal();
GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
Register Dst = MI.getOperand(0).getReg();
Register NewGVDst = MRI.cloneVirtualRegister(Dst);
MI.getOperand(0).setReg(NewGVDst);
Observer.changedInstr(MI);
B.buildPtrAdd(
Dst, NewGVDst,
B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
return true;
}
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;

View File

@ -0,0 +1,241 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=DEFAULT,CHECK
# RUN: llc -mtriple aarch64-apple-darwin -code-model=large -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=LARGE-MACHO,CHECK
# RUN: llc -mtriple aarch64-apple-darwin -code-model=small -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=SMALL-MACHO,CHECK
# RUN: llc -mtriple aarch64-linux-elf -code-model=large -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=LARGE-ELF,CHECK
# RUN: llc -mtriple aarch64-linux-elf -code-model=tiny -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=TINY,CHECK
# RUN: llc -mtriple aarch64-windows-coff -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=WINDOWS,CHECK
# Each of these tests has a trivial pattern for folding a G_PTR_ADD into a
# G_GLOBAL_VALUE.
#
# Check that given different code models/target features, we do/don't fold.
--- |
@external_linkage = external hidden global i32
@common_linkage = common local_unnamed_addr global i32 0, align 4
@internal_linkage = internal unnamed_addr global i32 0, align 4
@extern_weak_linkage = extern_weak hidden global i32
@dll_import = external dllimport global i32
define void @test_external_linkage() { ret void }
define void @test_internal_linkage() { ret void }
define void @test_common_linkage() { ret void }
define void @test_extern_weak_linkage() { ret void }
define void @never_fold_tagged_globals() #0 { ret void }
define void @test_dll_import() { ret void }
attributes #0 = { "target-features"="+tagged-globals" }
...
---
name: test_external_linkage
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; Large + Mach-O goes via GOT, so we can't fold.
; DEFAULT-LABEL: name: test_external_linkage
; DEFAULT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
; DEFAULT: $x0 = COPY [[GV]](p0)
; DEFAULT: RET_ReallyLR implicit $x0
; LARGE-MACHO-LABEL: name: test_external_linkage
; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @external_linkage
; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
; LARGE-MACHO: RET_ReallyLR implicit $x0
; SMALL-MACHO-LABEL: name: test_external_linkage
; SMALL-MACHO: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
; SMALL-MACHO: $x0 = COPY [[GV]](p0)
; SMALL-MACHO: RET_ReallyLR implicit $x0
; LARGE-ELF-LABEL: name: test_external_linkage
; LARGE-ELF: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
; LARGE-ELF: $x0 = COPY [[GV]](p0)
; LARGE-ELF: RET_ReallyLR implicit $x0
; TINY-LABEL: name: test_external_linkage
; TINY: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
; TINY: $x0 = COPY [[GV]](p0)
; TINY: RET_ReallyLR implicit $x0
; WINDOWS-LABEL: name: test_external_linkage
; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
; WINDOWS: $x0 = COPY [[GV]](p0)
; WINDOWS: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @external_linkage
%imm:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0
...
---
name: test_internal_linkage
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; Large + Mach-O goes via GOT, so we can't fold.
; DEFAULT-LABEL: name: test_internal_linkage
; DEFAULT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
; DEFAULT: $x0 = COPY [[GV]](p0)
; DEFAULT: RET_ReallyLR implicit $x0
; LARGE-MACHO-LABEL: name: test_internal_linkage
; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @internal_linkage
; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
; LARGE-MACHO: RET_ReallyLR implicit $x0
; SMALL-MACHO-LABEL: name: test_internal_linkage
; SMALL-MACHO: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
; SMALL-MACHO: $x0 = COPY [[GV]](p0)
; SMALL-MACHO: RET_ReallyLR implicit $x0
; LARGE-ELF-LABEL: name: test_internal_linkage
; LARGE-ELF: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
; LARGE-ELF: $x0 = COPY [[GV]](p0)
; LARGE-ELF: RET_ReallyLR implicit $x0
; TINY-LABEL: name: test_internal_linkage
; TINY: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
; TINY: $x0 = COPY [[GV]](p0)
; TINY: RET_ReallyLR implicit $x0
; WINDOWS-LABEL: name: test_internal_linkage
; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @internal_linkage + 1
; WINDOWS: $x0 = COPY [[GV]](p0)
; WINDOWS: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @internal_linkage
%imm:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0
...
---
name: test_common_linkage
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; DEFAULT-LABEL: name: test_common_linkage
; DEFAULT: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
; DEFAULT: %imm:_(s64) = G_CONSTANT i64 1
; DEFAULT: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; DEFAULT: $x0 = COPY %ptr_add(p0)
; DEFAULT: RET_ReallyLR implicit $x0
; LARGE-MACHO-LABEL: name: test_common_linkage
; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
; LARGE-MACHO: RET_ReallyLR implicit $x0
; SMALL-MACHO-LABEL: name: test_common_linkage
; SMALL-MACHO: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
; SMALL-MACHO: %imm:_(s64) = G_CONSTANT i64 1
; SMALL-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; SMALL-MACHO: $x0 = COPY %ptr_add(p0)
; SMALL-MACHO: RET_ReallyLR implicit $x0
; LARGE-ELF-LABEL: name: test_common_linkage
; LARGE-ELF: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
; LARGE-ELF: %imm:_(s64) = G_CONSTANT i64 1
; LARGE-ELF: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; LARGE-ELF: $x0 = COPY %ptr_add(p0)
; LARGE-ELF: RET_ReallyLR implicit $x0
; TINY-LABEL: name: test_common_linkage
; TINY: %global:_(p0) = G_GLOBAL_VALUE @common_linkage
; TINY: %imm:_(s64) = G_CONSTANT i64 1
; TINY: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; TINY: $x0 = COPY %ptr_add(p0)
; TINY: RET_ReallyLR implicit $x0
; WINDOWS-LABEL: name: test_common_linkage
; WINDOWS: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @common_linkage + 1
; WINDOWS: $x0 = COPY [[GV]](p0)
; WINDOWS: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @common_linkage
%imm:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0
...
---
name: test_extern_weak_linkage
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; DEFAULT-LABEL: name: test_extern_weak_linkage
; DEFAULT: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
; DEFAULT: %imm:_(s64) = G_CONSTANT i64 1
; DEFAULT: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; DEFAULT: $x0 = COPY %ptr_add(p0)
; DEFAULT: RET_ReallyLR implicit $x0
; LARGE-MACHO-LABEL: name: test_extern_weak_linkage
; LARGE-MACHO: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
; LARGE-MACHO: %imm:_(s64) = G_CONSTANT i64 1
; LARGE-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; LARGE-MACHO: $x0 = COPY %ptr_add(p0)
; LARGE-MACHO: RET_ReallyLR implicit $x0
; SMALL-MACHO-LABEL: name: test_extern_weak_linkage
; SMALL-MACHO: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
; SMALL-MACHO: %imm:_(s64) = G_CONSTANT i64 1
; SMALL-MACHO: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; SMALL-MACHO: $x0 = COPY %ptr_add(p0)
; SMALL-MACHO: RET_ReallyLR implicit $x0
; LARGE-ELF-LABEL: name: test_extern_weak_linkage
; LARGE-ELF: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
; LARGE-ELF: %imm:_(s64) = G_CONSTANT i64 1
; LARGE-ELF: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; LARGE-ELF: $x0 = COPY %ptr_add(p0)
; LARGE-ELF: RET_ReallyLR implicit $x0
; TINY-LABEL: name: test_extern_weak_linkage
; TINY: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
; TINY: %imm:_(s64) = G_CONSTANT i64 1
; TINY: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; TINY: $x0 = COPY %ptr_add(p0)
; TINY: RET_ReallyLR implicit $x0
; WINDOWS-LABEL: name: test_extern_weak_linkage
; WINDOWS: %global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
; WINDOWS: %imm:_(s64) = G_CONSTANT i64 1
; WINDOWS: %ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
; WINDOWS: $x0 = COPY %ptr_add(p0)
; WINDOWS: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @extern_weak_linkage
%imm:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0
...
---
name: never_fold_tagged_globals
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; CHECK-LABEL: name: never_fold_tagged_globals
; CHECK-NOT: %global:_(p0) = G_GLOBAL_VALUE @external_linkage + 1
%global:_(p0) = G_GLOBAL_VALUE @external_linkage
%imm:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0
...
---
name: test_dll_import
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; CHECK-LABEL: name: test_dll_import
; CHECK-NOT: %global:_(p0) = G_GLOBAL_VALUE @dll_import + 1
%global:_(p0) = G_GLOBAL_VALUE @dll_import
%imm:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %imm(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0

View File

@ -0,0 +1,284 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-apple-darwin -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
--- |
@g = external hidden global i32
%opaque = type opaque
@unsized = external hidden global %opaque
define void @one_ptr_add() { ret void }
define void @add_to_offset() { ret void }
define void @two_ptr_adds_same_offset() { ret void }
define void @two_ptr_adds_different_offset() { ret void }
define void @ptr_add_chain() { ret void }
define void @dont_fold_negative_offset() { ret void }
define void @dont_min_offset_less_than_curr_offset() { ret void }
define void @dont_fold_max_offset() { ret void }
define void @dont_fold_offset_larger_than_type_alloc() { ret void }
define void @dont_fold_unsized_type() { ret void }
...
---
name: one_ptr_add
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0
; We should fold the offset 1 into the G_GLOBAL_VALUE.
; CHECK-LABEL: name: one_ptr_add
; CHECK: liveins: $x0
; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1
; CHECK: $x0 = COPY [[GV]](p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g
%offset:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add
RET_ReallyLR implicit $x0
...
---
name: add_to_offset
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0
; We should fold the offset 1 into the G_GLOBAL_VALUE, resulting in a
; final offset of 4.
; CHECK-LABEL: name: add_to_offset
; CHECK: liveins: $x0
; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 4
; CHECK: $x0 = COPY [[GV]](p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g + 3
%offset:_(s64) = G_CONSTANT i64 1
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add
RET_ReallyLR implicit $x0
...
---
name: two_ptr_adds_same_offset
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1
; We're allowed to have more than one G_PTR_ADD use. We should fold 1 into
; the G_GLOBAL_VALUE's offset.
; CHECK-LABEL: name: two_ptr_adds_same_offset
; CHECK: liveins: $x0, $x1
; CHECK: %val1:_(s64) = COPY $x0
; CHECK: %val2:_(s64) = COPY $x1
; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1
; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8)
; CHECK: G_STORE %val2(s64), [[GV]](p0) :: (store 8)
; CHECK: RET_ReallyLR implicit $x0
%val1:_(s64) = COPY $x0
%val2:_(s64) = COPY $x1
%global:_(p0) = G_GLOBAL_VALUE @g
%offset:_(s64) = G_CONSTANT i64 1
%ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64)
%ptr_add2:_(p0) = G_PTR_ADD %global, %offset(s64)
G_STORE %val1:_(s64), %ptr_add1 :: (store 8)
G_STORE %val2:_(s64), %ptr_add2 :: (store 8)
RET_ReallyLR implicit $x0
...
---
name: two_ptr_adds_different_offset
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1
; The lowest offset G_PTR_ADD (2) should be folded into the G_GLOBAL_VALUE.
;
; The other G_PTR_ADD should have its offset decremented by 2.
; CHECK-LABEL: name: two_ptr_adds_different_offset
; CHECK: liveins: $x0, $x1
; CHECK: %val1:_(s64) = COPY $x0
; CHECK: %val2:_(s64) = COPY $x1
; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 2
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK: %ptr_add2:_(p0) = G_PTR_ADD [[GV]], [[C]](s64)
; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8)
; CHECK: G_STORE %val2(s64), %ptr_add2(p0) :: (store 8)
; CHECK: RET_ReallyLR implicit $x0
%val1:_(s64) = COPY $x0
%val2:_(s64) = COPY $x1
%global:_(p0) = G_GLOBAL_VALUE @g
%offset1:_(s64) = G_CONSTANT i64 2
%offset2:_(s64) = G_CONSTANT i64 10
%ptr_add1:_(p0) = G_PTR_ADD %global, %offset1(s64)
%ptr_add2:_(p0) = G_PTR_ADD %global, %offset2(s64)
G_STORE %val1:_(s64), %ptr_add1 :: (store 8)
G_STORE %val2:_(s64), %ptr_add2 :: (store 8)
RET_ReallyLR implicit $x0
...
---
name: ptr_add_chain
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0
; We should be able to fold all of the G_PTR_ADDs, except for the last one
; into the G_GLOBAL_VALUE.
;
; (TypeAllocSize = 4, so the offset on the G_GLOBAL_VALUE can't go above
; that.)
; CHECK-LABEL: name: ptr_add_chain
; CHECK: liveins: $x0
; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: %dont_fold_me:_(p0) = G_PTR_ADD [[GV]], [[C]](s64)
; CHECK: $x0 = COPY %dont_fold_me(p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g
%offset:_(s64) = G_CONSTANT i64 1
%ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64)
%ptr_add2:_(p0) = G_PTR_ADD %ptr_add1, %offset(s64)
%ptr_add3:_(p0) = G_PTR_ADD %ptr_add2, %offset(s64)
%ptr_add4:_(p0) = G_PTR_ADD %ptr_add3, %offset(s64)
%dont_fold_me:_(p0) = G_PTR_ADD %ptr_add4, %offset(s64)
$x0 = COPY %dont_fold_me
RET_ReallyLR implicit $x0
...
---
name: dont_fold_negative_offset
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0
; Do not add negative offsets to G_GLOBAL_VALUE.
; CHECK-LABEL: name: dont_fold_negative_offset
; CHECK: liveins: $x0
; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g
; CHECK: %offset:_(s64) = G_CONSTANT i64 -1
; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
; CHECK: $x0 = COPY %ptr_add(p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g
%offset:_(s64) = G_CONSTANT i64 -1
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add
RET_ReallyLR implicit $x0
...
---
name: dont_min_offset_less_than_curr_offset
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0
; Do not create smaller offsets. Ensures combine termination.
; CHECK-LABEL: name: dont_min_offset_less_than_curr_offset
; CHECK: liveins: $x0
; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g + 3
; CHECK: %offset:_(s64) = G_CONSTANT i64 -1
; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
; CHECK: $x0 = COPY %ptr_add(p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g + 3
%offset:_(s64) = G_CONSTANT i64 -1
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add
RET_ReallyLR implicit $x0
...
---
name: dont_fold_max_offset
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0
; 1 << 21 is the largest offset expressible in all object formats.
; Don't fold it.
; CHECK-LABEL: name: dont_fold_max_offset
; CHECK: liveins: $x0
; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g
; CHECK: %offset:_(s64) = G_CONSTANT i64 4292870144
; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
; CHECK: $x0 = COPY %ptr_add(p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g
%offset:_(s64) = G_CONSTANT i64 4292870144 ; 1 << 21
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add
RET_ReallyLR implicit $x0
...
---
name: dont_fold_offset_larger_than_type_alloc
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; Type alloc size = 4, offset = 16. Don't fold.
; CHECK-LABEL: name: dont_fold_offset_larger_than_type_alloc
; CHECK: %global:_(p0) = G_GLOBAL_VALUE @g
; CHECK: %offset:_(s64) = G_CONSTANT i64 16
; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
; CHECK: $x0 = COPY %ptr_add(p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @g
%offset:_(s64) = G_CONSTANT i64 16
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0
...
---
name: dont_fold_unsized_type
alignment: 4
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; Check that we don't touch unsized globals.
; CHECK-LABEL: name: dont_fold_unsized_type
; CHECK: %global:_(p0) = G_GLOBAL_VALUE @unsized
; CHECK: %offset:_(s64) = G_CONSTANT i64 16
; CHECK: %ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
; CHECK: $x0 = COPY %ptr_add(p0)
; CHECK: RET_ReallyLR implicit $x0
%global:_(p0) = G_GLOBAL_VALUE @unsized
%offset:_(s64) = G_CONSTANT i64 16
%ptr_add:_(p0) = G_PTR_ADD %global, %offset(s64)
$x0 = COPY %ptr_add(p0)
RET_ReallyLR implicit $x0

View File

@ -6,6 +6,7 @@
target triple = "aarch64--"
@var = external global i8
define i8* @test_global() { ret i8* undef }
define i8* @test_global_with_offset() { ret i8* undef }
...
---
name: test_global
@ -17,15 +18,6 @@ body: |
; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code
; model isn't 'Small'.
; CHECK-LABEL: name: test_global
; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
; CMLARGE-LABEL: name: test_global
; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
; CMLARGE: $x0 = COPY [[PTRTOINT]](s64)
; PIC-LABEL: name: test_global
; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
@ -34,3 +26,17 @@ body: |
%1:_(s64) = G_PTRTOINT %0
$x0 = COPY %1
...
---
name: test_global_with_offset
registers:
- { id: 0, class: _ }
body: |
bb.0:
; PIC-LABEL: name: test_global_with_offset
; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + 1
; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
; PIC: $x0 = COPY [[PTRTOINT]](s64)
%0(p0) = G_GLOBAL_VALUE @var + 1
%1:_(s64) = G_PTRTOINT %0
$x0 = COPY %1
...

View File

@ -7,6 +7,7 @@
target triple = "aarch64--"
@var = external dso_local global i8
define i8* @test_global() { ret i8* undef }
define i8* @test_global_with_offset() { ret i8* undef }
...
---
name: test_global
@ -17,16 +18,11 @@ body: |
; We don't want to lower to G_ADD_LOW when we need a GOT access, or when the code
; model isn't 'Small'.
; CHECK-LABEL: name: test_global
; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
; PIC-LABEL: name: test_global
; PIC: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
; PIC: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
; PIC: $x0 = COPY [[PTRTOINT]](s64)
; CMLARGE-LABEL: name: test_global
; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
@ -35,3 +31,23 @@ body: |
%1:_(s64) = G_PTRTOINT %0
$x0 = COPY %1
...
---
name: test_global_with_offset
body: |
bb.0:
; When we legalize into ADRP + G_ADD_LOW, both should inherit the offset
; from the original G_GLOBAL_VALUE.
;
; CHECK-LABEL: name: test_global_with_offset
; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var + 1
; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var + 1
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[ADD_LOW]](p0)
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
; CMLARGE-LABEL: name: test_global_with_offset
; CMLARGE: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + 1
; CMLARGE: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[GV]](p0)
; CMLARGE: $x0 = COPY [[PTRTOINT]](s64)
%0:_(p0) = G_GLOBAL_VALUE @var + 1
%1:_(s64) = G_PTRTOINT %0
$x0 = COPY %1
...

View File

@ -0,0 +1,70 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
@x = external hidden local_unnamed_addr global i32*, align 8
define void @select_add_low_without_offset() { ret void }
define void @select_add_low_with_offset() { ret void }
define void @select_add_low_without_adrp() { ret void }
...
---
name: select_add_low_without_offset
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: select_add_low_without_offset
; CHECK: liveins: $x0
; CHECK: %add_low:gpr64 = MOVaddr target-flags(aarch64-page) @x, target-flags(aarch64-pageoff, aarch64-nc) @x
; CHECK: $x0 = COPY %add_low
; CHECK: RET_ReallyLR implicit $x0
%copy:gpr(p0) = COPY $x0
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
$x0 = COPY %add_low
RET_ReallyLR implicit $x0
...
---
name: select_add_low_with_offset
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: select_add_low_with_offset
; CHECK: liveins: $x0
; CHECK: %add_low:gpr64 = MOVaddr target-flags(aarch64-page) @x + 1, target-flags(aarch64-pageoff, aarch64-nc) @x + 1
; CHECK: $x0 = COPY %add_low
; CHECK: RET_ReallyLR implicit $x0
%copy:gpr(p0) = COPY $x0
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 1
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 1
$x0 = COPY %add_low
RET_ReallyLR implicit $x0
...
---
name: select_add_low_without_adrp
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: select_add_low_without_adrp
; CHECK: liveins: $x0
; CHECK: %ptr:gpr64sp = COPY $x0
; CHECK: %add_low:gpr64sp = ADDXri %ptr, target-flags(aarch64-pageoff, aarch64-nc) @x, 0
; CHECK: $x0 = COPY %add_low
; CHECK: RET_ReallyLR implicit $x0
%ptr:gpr(p0) = COPY $x0
%add_low:gpr(p0) = G_ADD_LOW %ptr(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
$x0 = COPY %add_low
RET_ReallyLR implicit $x0

View File

@ -0,0 +1,38 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64 -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=LARGE
# RUN: llc -mtriple=aarch64 -code-model=small -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=SMALL
# RUN: llc -mtriple=aarch64 -code-model=tiny -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s --check-prefix=TINY
--- |
@g = external hidden global i32
define void @select_gv_with_offset() { ret void }
...
---
name: select_gv_with_offset
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; LARGE-LABEL: name: select_gv_with_offset
; LARGE: liveins: $x0
; LARGE: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @g + 1, 0
; LARGE: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @g + 1, 16
; LARGE: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @g + 1, 32
; LARGE: %g:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @g + 1, 48
; LARGE: $x0 = COPY %g
; LARGE: RET_ReallyLR implicit $x0
; SMALL-LABEL: name: select_gv_with_offset
; SMALL: liveins: $x0
; SMALL: %g:gpr64 = MOVaddr target-flags(aarch64-page) @g + 1, target-flags(aarch64-pageoff, aarch64-nc) @g + 1
; SMALL: $x0 = COPY %g
; SMALL: RET_ReallyLR implicit $x0
; TINY-LABEL: name: select_gv_with_offset
; TINY: liveins: $x0
; TINY: %g:gpr64 = ADR @g + 1
; TINY: $x0 = COPY %g
; TINY: RET_ReallyLR implicit $x0
%g:gpr(p0) = G_GLOBAL_VALUE @g + 1
$x0 = COPY %g(p0)
RET_ReallyLR implicit $x0

View File

@ -41,7 +41,8 @@
@x = external hidden local_unnamed_addr global i32*, align 8
define void @store_adrp_add_low() { ret void }
define void @store_adrp_add_low_foldable_offset() { ret void }
define void @store_adrp_add_low_unfoldable_offset() { ret void }
...
---
@ -622,3 +623,43 @@ body: |
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
...
---
name: store_adrp_add_low_foldable_offset
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: store_adrp_add_low_foldable_offset
; CHECK: liveins: $x0
; CHECK: %copy:gpr64all = COPY $x0
; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x + 8
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy
; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x + 8 :: (store 8 into @x)
%copy:gpr(p0) = COPY $x0
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 8
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 8
G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
...
---
name: store_adrp_add_low_unfoldable_offset
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: store_adrp_add_low_unfoldable_offset
; CHECK: liveins: $x0
; CHECK: %copy:gpr64all = COPY $x0
; CHECK: %add_low:gpr64common = MOVaddr target-flags(aarch64-page) @x + 3, target-flags(aarch64-pageoff, aarch64-nc) @x + 3
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy
; CHECK: STRXui [[COPY]], %add_low, 0 :: (store 8 into @x)
%copy:gpr(p0) = COPY $x0
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3
G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)

View File

@ -1,69 +1,152 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
; RUN: llc < %s -global-isel -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=GISEL
@x1 = external hidden global [2 x i64]
@x2 = external hidden global [16777216 x i64]
@x3 = external hidden global { [9 x i8*], [8 x i8*] }
define i64 @f1() {
; CHECK: f1:
; CHECK: adrp x8, x1+16
; CHECK: ldr x0, [x8, :lo12:x1+16]
; CHECK-LABEL: f1:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, x1+16
; CHECK-NEXT: ldr x0, [x8, :lo12:x1+16]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f1:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, x1+16
; GISEL-NEXT: ldr x0, [x8, :lo12:x1+16]
; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
ret i64 %l
}
define i64 @f2() {
; CHECK: f2:
; CHECK: adrp x8, x1
; CHECK: add x8, x8, :lo12:x1
; CHECK: ldr x0, [x8, #24]
; CHECK-LABEL: f2:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, x1
; CHECK-NEXT: add x8, x8, :lo12:x1
; CHECK-NEXT: ldr x0, [x8, #24]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f2:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, x1
; GISEL-NEXT: add x8, x8, :lo12:x1
; GISEL-NEXT: ldr x0, [x8, #24]
; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
ret i64 %l
}
define i64 @f3() {
; CHECK: f3:
; CHECK: adrp x8, x1+1
; CHECK: add x8, x8, :lo12:x1+1
; CHECK: ldr x0, [x8]
; CHECK-LABEL: f3:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, x1+1
; CHECK-NEXT: add x8, x8, :lo12:x1+1
; CHECK-NEXT: ldr x0, [x8]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f3:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, x1+1
; GISEL-NEXT: add x8, x8, :lo12:x1+1
; GISEL-NEXT: ldr x0, [x8]
; GISEL-NEXT: ret
%l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
ret i64 %l
}
define [2 x i64] @f4() {
; CHECK: f4:
; CHECK: adrp x8, x2+8
; CHECK: add x8, x8, :lo12:x2+8
; CHECK: ldp x0, x1, [x8]
; FIXME: GlobalISel misses the opportunity to form a LDP here.
;
; CHECK-LABEL: f4:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, x2+8
; CHECK-NEXT: add x8, x8, :lo12:x2+8
; CHECK-NEXT: ldp x0, x1, [x8]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f4:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x9, x2+8
; GISEL-NEXT: adrp x8, x2+8
; GISEL-NEXT: add x9, x9, :lo12:x2+8
; GISEL-NEXT: ldr x0, [x8, :lo12:x2+8]
; GISEL-NEXT: ldr x1, [x9, #8]
; GISEL-NEXT: ret
%l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
ret [2 x i64] %l
}
define i64 @f5() {
; CHECK: f5:
; CHECK: adrp x8, x2+2097144
; CHECK: ldr x0, [x8, :lo12:x2+2097144]
; CHECK: ret
; CHECK-LABEL: f5:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, x2+2097144
; CHECK-NEXT: ldr x0, [x8, :lo12:x2+2097144]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f5:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, x2+2097144
; GISEL-NEXT: ldr x0, [x8, :lo12:x2+2097144]
; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
ret i64 %l
}
define i64 @f6() {
; CHECK: f6:
; CHECK: adrp x8, x2
; CHECK: add x8, x8, :lo12:x2
; CHECK: mov w9, #2097152
; CHECK: ldr x0, [x8, x9]
; CHECK: ret
; CHECK-LABEL: f6:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, x2
; CHECK-NEXT: add x8, x8, :lo12:x2
; CHECK-NEXT: mov w9, #2097152
; CHECK-NEXT: ldr x0, [x8, x9]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f6:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x9, x2
; GISEL-NEXT: mov w8, #2097152
; GISEL-NEXT: add x9, x9, :lo12:x2
; GISEL-NEXT: ldr x0, [x9, x8]
; GISEL-NEXT: ret
%l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
ret i64 %l
}
define i32 @f7() {
; FIXME: GlobalISel doesn't handle vectors well.
;
; CHECK-LABEL: f7:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, x3+108
; CHECK-NEXT: ldr w0, [x8, :lo12:x3+108]
; CHECK-NEXT: ret
;
; GISEL-LABEL: f7:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: adrp x8, x3+88
; GISEL-NEXT: add x8, x8, :lo12:x3+88
; GISEL-NEXT: mov v0.d[1], x8
; GISEL-NEXT: mov w9, #64
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: sub x8, x9, #64 // =64
; GISEL-NEXT: fmov x11, d1
; GISEL-NEXT: fmov x10, d0
; GISEL-NEXT: lsl x12, x11, x8
; GISEL-NEXT: cmp x9, #64 // =64
; GISEL-NEXT: lsr x8, x11, x8
; GISEL-NEXT: orr x11, x12, x10, lsr #0
; GISEL-NEXT: csel x8, x11, x8, lo
; GISEL-NEXT: cmp x9, #0 // =0
; GISEL-NEXT: csel x8, x10, x8, eq
; GISEL-NEXT: ldr w0, [x8, #20]
; GISEL-NEXT: ret
entry:
; CHECK: f7
; CHECK: adrp x8, x3+108
; CHECK: ldr w0, [x8, :lo12:x3+108]
%l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5)
ret i32 %l
}