1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[DAGCombiner] allow store merging non-i8 truncated ops

We have a gap in our store merging capabilities for shift+truncate
patterns as discussed in:
https://llvm.org/PR46662

I generalized the code/comments for this function in earlier commits,
so we only need ease the type restriction and adjust the address/endian
checking to make this work.

AArch64 lets us switch endian to make sure that patterns are matched
either way.

Differential Revision: https://reviews.llvm.org/D86420
This commit is contained in:
Sanjay Patel 2020-08-26 15:21:54 -04:00
parent 22484c1a0d
commit 03d2b97d01
3 changed files with 187 additions and 108 deletions

View File

@ -6869,8 +6869,9 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
// TODO: Allow unordered atomics when wider type is legal (see D66309)
if (Store->getMemoryVT() != MVT::i8 || !Store->isSimple() ||
Store->isIndexed())
EVT MemVT = Store->getMemoryVT();
if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
!Store->isSimple() || Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@ -6959,12 +6960,6 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
// Check if the bytes of the combined value we are looking at match with
// either big or little endian value store.
Optional<bool> IsBigEndian = isBigEndian(OffsetMap, FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();
// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;
@ -6973,6 +6968,31 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
if (!Allowed || !Fast)
return SDValue();
// Check if the pieces of the value are going to the expected places in memory
// to merge the stores.
auto checkOffsets = [&](bool MatchLittleEndian) {
if (MatchLittleEndian) {
for (unsigned i = 0; i != NumStores; ++i)
if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
return false;
} else { // MatchBigEndian by reversing loop counter.
for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
return false;
}
return true;
};
// Check if the offsets line up for the native data layout of this target.
bool NeedBswap = false;
if (!checkOffsets(Layout.isLittleEndian())) {
// Special-case: check if byte offsets line up for the opposite endian.
// TODO: We could use rotates for 16/32-bit merge pairs.
if (NarrowNumBits != 8 || !checkOffsets(Layout.isBigEndian()))
return SDValue();
NeedBswap = true;
}
SDLoc DL(N);
if (WideVT != SourceValue.getValueType()) {
assert(SourceValue.getValueType().getSizeInBits() > WideNumBits &&
@ -6983,7 +7003,6 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// store and byte shuffling instead of several stores and byte shuffling.
bool NeedBswap = Layout.isBigEndian() != *IsBigEndian;
if (NeedBswap)
SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);

View File

@ -200,12 +200,17 @@ define void @be_i32_to_i8_order(i32 %x, i8* %p0) {
}
define void @le_i32_to_i16(i32 %x, i16* %p0) {
; CHECK-LABEL: le_i32_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w0, [x1]
; CHECK-NEXT: strh w8, [x1, #2]
; CHECK-NEXT: ret
; LE-LABEL: le_i32_to_i16:
; LE: // %bb.0:
; LE-NEXT: str w0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: le_i32_to_i16:
; BE: // %bb.0:
; BE-NEXT: lsr w8, w0, #16
; BE-NEXT: strh w0, [x1]
; BE-NEXT: strh w8, [x1, #2]
; BE-NEXT: ret
%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16
@ -216,12 +221,17 @@ define void @le_i32_to_i16(i32 %x, i16* %p0) {
}
define void @le_i32_to_i16_order(i32 %x, i16* %p0) {
; CHECK-LABEL: le_i32_to_i16_order:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w8, [x1, #2]
; CHECK-NEXT: strh w0, [x1]
; CHECK-NEXT: ret
; LE-LABEL: le_i32_to_i16_order:
; LE: // %bb.0:
; LE-NEXT: str w0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: le_i32_to_i16_order:
; BE: // %bb.0:
; BE-NEXT: lsr w8, w0, #16
; BE-NEXT: strh w8, [x1, #2]
; BE-NEXT: strh w0, [x1]
; BE-NEXT: ret
%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16
@ -232,12 +242,17 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) {
}
define void @be_i32_to_i16(i32 %x, i16* %p0) {
; CHECK-LABEL: be_i32_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w0, [x1, #2]
; CHECK-NEXT: strh w8, [x1]
; CHECK-NEXT: ret
; LE-LABEL: be_i32_to_i16:
; LE: // %bb.0:
; LE-NEXT: lsr w8, w0, #16
; LE-NEXT: strh w0, [x1, #2]
; LE-NEXT: strh w8, [x1]
; LE-NEXT: ret
;
; BE-LABEL: be_i32_to_i16:
; BE: // %bb.0:
; BE-NEXT: str w0, [x1]
; BE-NEXT: ret
%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16
@ -248,12 +263,17 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) {
}
define void @be_i32_to_i16_order(i32 %x, i16* %p0) {
; CHECK-LABEL: be_i32_to_i16_order:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w8, [x1]
; CHECK-NEXT: strh w0, [x1, #2]
; CHECK-NEXT: ret
; LE-LABEL: be_i32_to_i16_order:
; LE: // %bb.0:
; LE-NEXT: lsr w8, w0, #16
; LE-NEXT: strh w8, [x1]
; LE-NEXT: strh w0, [x1, #2]
; LE-NEXT: ret
;
; BE-LABEL: be_i32_to_i16_order:
; BE: // %bb.0:
; BE-NEXT: str w0, [x1]
; BE-NEXT: ret
%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16
@ -440,16 +460,21 @@ define void @be_i64_to_i8_order(i64 %x, i8* %p0) {
}
define void @le_i64_to_i16(i64 %x, i16* %p0) {
; CHECK-LABEL: le_i64_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w0, [x1]
; CHECK-NEXT: strh w8, [x1, #2]
; CHECK-NEXT: strh w9, [x1, #4]
; CHECK-NEXT: strh w10, [x1, #6]
; CHECK-NEXT: ret
; LE-LABEL: le_i64_to_i16:
; LE: // %bb.0:
; LE-NEXT: str x0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: le_i64_to_i16:
; BE: // %bb.0:
; BE-NEXT: lsr x8, x0, #16
; BE-NEXT: lsr x9, x0, #32
; BE-NEXT: lsr x10, x0, #48
; BE-NEXT: strh w0, [x1]
; BE-NEXT: strh w8, [x1, #2]
; BE-NEXT: strh w9, [x1, #4]
; BE-NEXT: strh w10, [x1, #6]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48
@ -468,16 +493,21 @@ define void @le_i64_to_i16(i64 %x, i16* %p0) {
}
define void @le_i64_to_i16_order(i64 %x, i16* %p0) {
; CHECK-LABEL: le_i64_to_i16_order:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w0, [x1]
; CHECK-NEXT: strh w8, [x1, #2]
; CHECK-NEXT: strh w10, [x1, #6]
; CHECK-NEXT: strh w9, [x1, #4]
; CHECK-NEXT: ret
; LE-LABEL: le_i64_to_i16_order:
; LE: // %bb.0:
; LE-NEXT: str x0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: le_i64_to_i16_order:
; BE: // %bb.0:
; BE-NEXT: lsr x8, x0, #16
; BE-NEXT: lsr x9, x0, #32
; BE-NEXT: lsr x10, x0, #48
; BE-NEXT: strh w0, [x1]
; BE-NEXT: strh w8, [x1, #2]
; BE-NEXT: strh w10, [x1, #6]
; BE-NEXT: strh w9, [x1, #4]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48
@ -496,16 +526,21 @@ define void @le_i64_to_i16_order(i64 %x, i16* %p0) {
}
define void @be_i64_to_i16(i64 %x, i16* %p0) {
; CHECK-LABEL: be_i64_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w0, [x1, #6]
; CHECK-NEXT: strh w8, [x1, #4]
; CHECK-NEXT: strh w9, [x1, #2]
; CHECK-NEXT: strh w10, [x1]
; CHECK-NEXT: ret
; LE-LABEL: be_i64_to_i16:
; LE: // %bb.0:
; LE-NEXT: lsr x8, x0, #16
; LE-NEXT: lsr x9, x0, #32
; LE-NEXT: lsr x10, x0, #48
; LE-NEXT: strh w0, [x1, #6]
; LE-NEXT: strh w8, [x1, #4]
; LE-NEXT: strh w9, [x1, #2]
; LE-NEXT: strh w10, [x1]
; LE-NEXT: ret
;
; BE-LABEL: be_i64_to_i16:
; BE: // %bb.0:
; BE-NEXT: str x0, [x1]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48
@ -524,16 +559,21 @@ define void @be_i64_to_i16(i64 %x, i16* %p0) {
}
define void @be_i64_to_i16_order(i64 %x, i16* %p0) {
; CHECK-LABEL: be_i64_to_i16_order:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w0, [x1, #6]
; CHECK-NEXT: strh w10, [x1]
; CHECK-NEXT: strh w9, [x1, #2]
; CHECK-NEXT: strh w8, [x1, #4]
; CHECK-NEXT: ret
; LE-LABEL: be_i64_to_i16_order:
; LE: // %bb.0:
; LE-NEXT: lsr x8, x0, #16
; LE-NEXT: lsr x9, x0, #32
; LE-NEXT: lsr x10, x0, #48
; LE-NEXT: strh w0, [x1, #6]
; LE-NEXT: strh w10, [x1]
; LE-NEXT: strh w9, [x1, #2]
; LE-NEXT: strh w8, [x1, #4]
; LE-NEXT: ret
;
; BE-LABEL: be_i64_to_i16_order:
; BE: // %bb.0:
; BE-NEXT: str x0, [x1]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48
@ -552,11 +592,16 @@ define void @be_i64_to_i16_order(i64 %x, i16* %p0) {
}
define void @le_i64_to_i32(i64 %x, i32* %p0) {
; CHECK-LABEL: le_i64_to_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32
; CHECK-NEXT: stp w0, w8, [x1]
; CHECK-NEXT: ret
; LE-LABEL: le_i64_to_i32:
; LE: // %bb.0:
; LE-NEXT: str x0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: le_i64_to_i32:
; BE: // %bb.0:
; BE-NEXT: lsr x8, x0, #32
; BE-NEXT: stp w0, w8, [x1]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32
@ -567,11 +612,16 @@ define void @le_i64_to_i32(i64 %x, i32* %p0) {
}
define void @le_i64_to_i32_order(i64 %x, i32* %p0) {
; CHECK-LABEL: le_i64_to_i32_order:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32
; CHECK-NEXT: stp w0, w8, [x1]
; CHECK-NEXT: ret
; LE-LABEL: le_i64_to_i32_order:
; LE: // %bb.0:
; LE-NEXT: str x0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: le_i64_to_i32_order:
; BE: // %bb.0:
; BE-NEXT: lsr x8, x0, #32
; BE-NEXT: stp w0, w8, [x1]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32
@ -582,11 +632,16 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) {
}
define void @be_i64_to_i32(i64 %x, i32* %p0) {
; CHECK-LABEL: be_i64_to_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32
; CHECK-NEXT: stp w8, w0, [x1]
; CHECK-NEXT: ret
; LE-LABEL: be_i64_to_i32:
; LE: // %bb.0:
; LE-NEXT: lsr x8, x0, #32
; LE-NEXT: stp w8, w0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: be_i64_to_i32:
; BE: // %bb.0:
; BE-NEXT: str x0, [x1]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32
@ -597,11 +652,16 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) {
}
define void @be_i64_to_i32_order(i64 %x, i32* %p0) {
; CHECK-LABEL: be_i64_to_i32_order:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32
; CHECK-NEXT: stp w8, w0, [x1]
; CHECK-NEXT: ret
; LE-LABEL: be_i64_to_i32_order:
; LE: // %bb.0:
; LE-NEXT: lsr x8, x0, #32
; LE-NEXT: stp w8, w0, [x1]
; LE-NEXT: ret
;
; BE-LABEL: be_i64_to_i32_order:
; BE: // %bb.0:
; BE-NEXT: str x0, [x1]
; BE-NEXT: ret
%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32
@ -611,6 +671,8 @@ define void @be_i64_to_i32_order(i64 %x, i32* %p0) {
ret void
}
; Negative test - not consecutive addresses
define void @i64_to_i32_wrong_addr(i64 %x, i32* %p0) {
; CHECK-LABEL: i64_to_i32_wrong_addr:
; CHECK: // %bb.0:
@ -627,6 +689,8 @@ define void @i64_to_i32_wrong_addr(i64 %x, i32* %p0) {
ret void
}
; Negative test - addresses don't line up with shift amounts
define void @i64_to_i16_wrong_order(i64 %x, i16* %p0) {
; CHECK-LABEL: i64_to_i16_wrong_order:
; CHECK: // %bb.0:
@ -655,6 +719,8 @@ define void @i64_to_i16_wrong_order(i64 %x, i16* %p0) {
ret void
}
; Negative test - no store of 't1'
define void @i32_to_i8_incomplete(i32 %x, i8* %p0) {
; CHECK-LABEL: i32_to_i8_incomplete:
; CHECK: // %bb.0:
@ -680,6 +746,8 @@ define void @i32_to_i8_incomplete(i32 %x, i8* %p0) {
ret void
}
; Negative test - no store of 't3'
define void @i64_to_i8_incomplete(i64 %x, i8* %p0) {
; CHECK-LABEL: i64_to_i8_incomplete:
; CHECK: // %bb.0:
@ -729,6 +797,8 @@ define void @i64_to_i8_incomplete(i64 %x, i8* %p0) {
ret void
}
; Negative test - not consecutive addresses
define void @i32_to_i16_wrong_addr(i32 %x, i16* %p0) {
; CHECK-LABEL: i32_to_i16_wrong_addr:
; CHECK: // %bb.0:
@ -745,6 +815,8 @@ define void @i32_to_i16_wrong_addr(i32 %x, i16* %p0) {
ret void
}
; Negative test - addresses don't line up with shift amounts
define void @i32_to_i8_wrong_order(i32 %x, i8* %p0) {
; CHECK-LABEL: i32_to_i8_wrong_order:
; CHECK: // %bb.0:

View File

@ -468,9 +468,7 @@ define void @trunc_i32_to_i8(i32 %x, i8* %p) {
define void @trunc_i32_to_i16(i32 %x, i16* %p) {
; CHECK-LABEL: trunc_i32_to_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: movw %di, (%rsi)
; CHECK-NEXT: shrl $16, %edi
; CHECK-NEXT: movw %di, 2(%rsi)
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: retq
%t1 = trunc i32 %x to i16
%sh = lshr i32 %x, 16
@ -522,15 +520,7 @@ define void @trunc_i64_to_i8(i64 %x, i8* %p) {
define void @trunc_i64_to_i16(i64 %x, i16* %p) {
; CHECK-LABEL: trunc_i64_to_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movq %rdi, %rcx
; CHECK-NEXT: movw %di, (%rsi)
; CHECK-NEXT: shrq $16, %rdi
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: shrq $48, %rcx
; CHECK-NEXT: movw %di, 2(%rsi)
; CHECK-NEXT: movw %ax, 4(%rsi)
; CHECK-NEXT: movw %cx, 6(%rsi)
; CHECK-NEXT: movq %rdi, (%rsi)
; CHECK-NEXT: retq
%t1 = trunc i64 %x to i16
%sh1 = lshr i64 %x, 16
@ -552,9 +542,7 @@ define void @trunc_i64_to_i16(i64 %x, i16* %p) {
define void @trunc_i64_to_i32(i64 %x, i32* %p) {
; CHECK-LABEL: trunc_i64_to_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: shrq $32, %rdi
; CHECK-NEXT: movl %edi, 4(%rsi)
; CHECK-NEXT: movq %rdi, (%rsi)
; CHECK-NEXT: retq
%t1 = trunc i64 %x to i32
%sh = lshr i64 %x, 32