1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[AArch64][GlobalISel] Support truncstorei8/i16 w/ combine to form truncating G_STOREs.

This needs some tablegen changes so that we can actually import the patterns properly.

Differential Revision: https://reviews.llvm.org/D102204
This commit is contained in:
Amara Emerson 2021-01-24 00:35:15 -08:00
parent 555e731f7d
commit ee460561e0
9 changed files with 197 additions and 49 deletions

View File

@ -1114,16 +1114,19 @@ def truncstorei8 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = true;
let MemoryVT = i8;
let IsTruncStore = true;
}
def truncstorei16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = true;
let MemoryVT = i16;
let IsTruncStore = true;
}
def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = true;
let MemoryVT = i32;
let IsTruncStore = true;
}
def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {

View File

@ -182,6 +182,14 @@ def lower_vector_fcmp : GICombineRule<
[{ return lowerVectorFCMP(*${root}, MRI, B); }]),
(apply [{}])>;
def form_truncstore_matchdata : GIDefMatchData<"Register">;
def form_truncstore : GICombineRule<
(defs root:$root, form_truncstore_matchdata:$matchinfo),
(match (wip_match_opcode G_STORE):$root,
[{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }])
>;
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@ -189,7 +197,7 @@ def AArch64PostLegalizerLoweringHelper
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering, build_vector_lowering,
lower_vector_fcmp]> {
lower_vector_fcmp, form_truncstore]> {
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
}

View File

@ -306,11 +306,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_STORE)
.legalForTypesWithMemDesc({{s8, p0, 8, 8},
{s16, p0, 8, 8}, // truncstorei8 from s16
{s32, p0, 8, 8}, // truncstorei8 from s32
{s64, p0, 8, 8}, // truncstorei8 from s64
{s16, p0, 16, 8},
{s32, p0, 16, 8}, // truncstorei16 from s32
{s64, p0, 16, 8}, // truncstorei16 from s64
{s32, p0, 8, 8},
{s32, p0, 16, 8},
{s32, p0, 32, 8},
{s64, p0, 64, 8},
{s64, p0, 32, 8}, // truncstorei32 from s64
{p0, p0, 64, 8},
{s128, p0, 128, 8},
{v16s8, p0, 128, 8},

View File

@ -951,6 +951,27 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
Register DstReg = MI.getOperand(0).getReg();
if (MRI.getType(DstReg).isVector())
return false;
// Match a store of a truncate.
return mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg)));
}
static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
GISelChangeObserver &Observer,
Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
Observer.changingInstr(MI);
MI.getOperand(0).setReg(SrcReg);
Observer.changedInstr(MI);
return true;
}
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS

View File

@ -888,22 +888,18 @@ define void @atomc_store(i32* %p) #0 {
define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw
; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256
; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT: strb w2, [x0, #4095]
; CHECK-NOLSE-O1-NEXT: strb w2, [x0, w1, sxtw]
; CHECK-NOLSE-O1-NEXT: sturb w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: strb w2, [x8]
; CHECK-NOLSE-O1-NEXT: strb w2, [x9]
; CHECK-NOLSE-O1-NEXT: strb w2, [x10]
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: strb w2, [x0, #4095]
; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw
; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256
; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
; CHECK-NOLSE-O0-NEXT: strb w2, [x0, w1, sxtw]
; CHECK-NOLSE-O0-NEXT: sturb w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
@ -911,10 +907,8 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: strb w2, [x0, #4095]
; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw
; CHECK-LSE-O1-NEXT: strb w2, [x8]
; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256
; CHECK-LSE-O1-NEXT: strb w2, [x8]
; CHECK-LSE-O1-NEXT: strb w2, [x0, w1, sxtw]
; CHECK-LSE-O1-NEXT: sturb w2, [x0, #-256]
; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: strb w2, [x8]
; CHECK-LSE-O1-NEXT: ret
@ -922,10 +916,8 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: strb w2, [x0, #4095]
; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw
; CHECK-LSE-O0-NEXT: strb w2, [x8]
; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256
; CHECK-LSE-O0-NEXT: strb w2, [x8]
; CHECK-LSE-O0-NEXT: strb w2, [x0, w1, sxtw]
; CHECK-LSE-O0-NEXT: sturb w2, [x0, #-256]
; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT: strb w2, [x8]
; CHECK-LSE-O0-NEXT: ret
@ -947,22 +939,18 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw #1
; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256
; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT: strh w2, [x0, #8190]
; CHECK-NOLSE-O1-NEXT: strh w2, [x0, w1, sxtw #1]
; CHECK-NOLSE-O1-NEXT: sturh w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: strh w2, [x8]
; CHECK-NOLSE-O1-NEXT: strh w2, [x9]
; CHECK-NOLSE-O1-NEXT: strh w2, [x10]
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: strh w2, [x0, #8190]
; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1
; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256
; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
; CHECK-NOLSE-O0-NEXT: strh w2, [x0, w1, sxtw #1]
; CHECK-NOLSE-O0-NEXT: sturh w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
@ -970,10 +958,8 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: strh w2, [x0, #8190]
; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw #1
; CHECK-LSE-O1-NEXT: strh w2, [x8]
; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256
; CHECK-LSE-O1-NEXT: strh w2, [x8]
; CHECK-LSE-O1-NEXT: strh w2, [x0, w1, sxtw #1]
; CHECK-LSE-O1-NEXT: sturh w2, [x0, #-256]
; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: strh w2, [x8]
; CHECK-LSE-O1-NEXT: ret
@ -981,10 +967,8 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: strh w2, [x0, #8190]
; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw #1
; CHECK-LSE-O0-NEXT: strh w2, [x8]
; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256
; CHECK-LSE-O0-NEXT: strh w2, [x8]
; CHECK-LSE-O0-NEXT: strh w2, [x0, w1, sxtw #1]
; CHECK-LSE-O0-NEXT: sturh w2, [x0, #-256]
; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT: strh w2, [x8]
; CHECK-LSE-O0-NEXT: ret

View File

@ -491,3 +491,29 @@ body: |
%val:_(<4 x s64>) = G_LOAD %ptr(p0) :: (load 32)
G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32)
RET_ReallyLR
...
---
name: test_trunc_store
body: |
bb.0:
liveins: $x0, $w1
; CHECK-LABEL: name: test_trunc_store
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: %val64:_(s64) = COPY $x2
; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 1)
; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 2)
; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 1)
; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 2)
; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 4)
%0:_(p0) = COPY $x0
%1:_(s32) = COPY $w1
%2:_(s8) = G_TRUNC %1(s32)
%val64:_(s64) = COPY $x2
G_STORE %1(s32), %0(p0) :: (store 1)
G_STORE %1(s32), %0(p0) :: (store 2)
G_STORE %val64(s64), %0(p0) :: (store 1)
G_STORE %val64(s64), %0(p0) :: (store 2)
G_STORE %val64(s64), %0(p0) :: (store 4)
...

View File

@ -0,0 +1,34 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s
---
name: truncstore_s8
legalized: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: truncstore_s8
; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: %val:_(s32) = COPY $w1
; CHECK: G_STORE %val(s32), %ptr(p0) :: (store 1)
%ptr:_(p0) = COPY $x0
%val:_(s32) = COPY $w1
%trunc:_(s8) = G_TRUNC %val
G_STORE %trunc(s8), %ptr(p0) :: (store 1)
...
---
name: truncstore_vector
legalized: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: truncstore_vector
; CHECK: %ptr:_(p0) = COPY $x0
; CHECK: %val:_(<4 x s32>) = COPY $q0
; CHECK: %trunc:_(<4 x s8>) = G_TRUNC %val(<4 x s32>)
; CHECK: G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4)
%ptr:_(p0) = COPY $x0
%val:_(<4 x s32>) = COPY $q0
%trunc:_(<4 x s8>) = G_TRUNC %val
G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4)
...

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@ -43,6 +43,8 @@
define void @store_adrp_add_low() { ret void }
define void @store_adrp_add_low_foldable_offset() { ret void }
define void @store_adrp_add_low_unfoldable_offset() { ret void }
define void @truncstores(i8* %addr) { ret void }
...
---
@ -663,3 +665,50 @@ body: |
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3
G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
...
---
name: truncstores
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0, $w1, $x2
; CHECK-LABEL: name: truncstores
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: %val32:gpr32 = COPY $w1
; CHECK: %val64:gpr64 = COPY $x2
; CHECK: STRBBui %val32, [[COPY]], 0 :: (store 1)
; CHECK: STRBBui %val32, [[COPY]], 43 :: (store 1)
; CHECK: STRHHui %val32, [[COPY]], 0 :: (store 2)
; CHECK: STURHHi %val32, [[COPY]], 43 :: (store 2)
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %val64.sub_32
; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2)
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %val64.sub_32
; CHECK: STURHHi [[COPY2]], [[COPY]], 43 :: (store 2)
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY %val64.sub_32
; CHECK: STRWui [[COPY3]], [[COPY]], 0 :: (store 4)
; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY %val64.sub_32
; CHECK: STURWi [[COPY4]], [[COPY]], 43 :: (store 4)
%0:gpr(p0) = COPY $x0
%val32:gpr(s32) = COPY $w1
%val64:gpr(s64) = COPY $x2
G_STORE %val32, %0 :: (store 1)
; unscaled offset:
%cst:gpr(s64) = G_CONSTANT i64 43
%newptr:gpr(p0) = G_PTR_ADD %0, %cst
G_STORE %val32, %newptr :: (store 1)
G_STORE %val32, %0 :: (store 2)
; unscaled offset:
G_STORE %val32, %newptr :: (store 2)
G_STORE %val64, %0 :: (store 2)
; unscaled offset:
G_STORE %val64, %newptr :: (store 2)
G_STORE %val64, %0 :: (store 4)
; unscaled offset:
G_STORE %val64, %newptr :: (store 4)
...

View File

@ -3657,6 +3657,10 @@ private:
Optional<const CodeGenRegisterClass *>
inferRegClassFromPattern(TreePatternNode *N);
/// Return the size of the MemoryVT in this predicate, if possible.
Optional<unsigned>
getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate);
// Add builtin predicates.
Expected<InstructionMatcher &>
addBuiltinPredicates(const Record *SrcGIEquivOrNull,
@ -3769,6 +3773,17 @@ Error GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
return Error::success();
}
Optional<unsigned> GlobalISelEmitter::getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate) {
Optional<LLTCodeGen> MemTyOrNone =
MVTToLLT(getValueType(Predicate.getMemoryVT()));
if (!MemTyOrNone)
return None;
// Align so unusual types like i1 don't get rounded down.
return llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
}
Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
const Record *SrcGIEquivOrNull, const TreePredicateFn &Predicate,
InstructionMatcher &InsnMatcher, bool &HasAddedMatcher) {
@ -3808,9 +3823,18 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
if (Predicate.isStore()) {
if (Predicate.isTruncStore()) {
// FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
if (Predicate.getMemoryVT() != nullptr) {
// FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate);
if (!MemSizeInBits)
return failedImport("MemVT could not be converted to LLT");
InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0, *MemSizeInBits /
8);
} else {
InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
}
return InsnMatcher;
}
if (Predicate.isNonTruncStore()) {
@ -3837,19 +3861,12 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
if (Predicate.getMemoryVT() != nullptr) {
Optional<LLTCodeGen> MemTyOrNone =
MVTToLLT(getValueType(Predicate.getMemoryVT()));
if (!MemTyOrNone)
auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate);
if (!MemSizeInBits)
return failedImport("MemVT could not be converted to LLT");
// MMO's work in bytes so we must take care of unusual types like i1
// don't round down.
unsigned MemSizeInBits =
llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0,
MemSizeInBits / 8);
*MemSizeInBits / 8);
return InsnMatcher;
}
}