1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[GlobalISel] Fix load-or combine moving loads across potential aliasing stores.

Although this combine checks that there's no load folding barriers between
the loads that it's trying to merge, it was inserting the load at the
MIRBuilder's default insertion point, which is the G_OR use inst.

This was causing a miscompile in the test suite's
SingleSource/Regression/C/gcc-c-torture/execute/GCC-C-execute-bswap-2

Differential Revision: https://reviews.llvm.org/D106251
This commit is contained in:
Amara Emerson 2021-07-18 23:34:09 -07:00
parent 8a42745952
commit 49ad61c372
3 changed files with 81 additions and 13 deletions

View File

@ -596,8 +596,10 @@ private:
/// at to the index of the load.
/// \param [in] MemSizeInBits - The number of bits each load should produce.
///
/// \returns The lowest-index load found and the lowest index on success.
Optional<std::pair<GZExtLoad *, int64_t>> findLoadOffsetsForLoadOrCombine(
/// \returns On success, a 3-tuple containing lowest-index load found, the
/// lowest index, and the last load in the sequence.
Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit,
const unsigned MemSizeInBits);

View File

@ -28,6 +28,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@ -3455,7 +3456,7 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
return std::make_pair(Load, Shift / MemSizeInBits);
}
Optional<std::pair<GZExtLoad *, int64_t>>
Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
CombinerHelper::findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
@ -3478,10 +3479,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
const MachineMemOperand *MMO = nullptr;
// Earliest instruction-order load in the pattern.
MachineInstr *EarliestLoad = nullptr;
GZExtLoad *EarliestLoad = nullptr;
// Latest instruction-order load in the pattern.
MachineInstr *LatestLoad = nullptr;
GZExtLoad *LatestLoad = nullptr;
// Base pointer which every load should share.
Register BasePtr;
@ -3586,7 +3587,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
return None;
}
return std::make_pair(LowestIdxLoad, LowestIdx);
return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
}
bool CombinerHelper::matchLoadOrCombine(
@ -3634,13 +3635,13 @@ bool CombinerHelper::matchLoadOrCombine(
// Also verify that each of these ends up putting a[i] into the same memory
// offset as a load into a wide type would.
SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
GZExtLoad *LowestIdxLoad;
GZExtLoad *LowestIdxLoad, *LatestLoad;
int64_t LowestIdx;
auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
if (!MaybeLoadInfo)
return false;
std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
// We have a bunch of loads being OR'd together. Using the addresses + offsets
// we found before, check if this corresponds to a big or little endian byte
@ -3695,6 +3696,7 @@ bool CombinerHelper::matchLoadOrCombine(
return false;
MatchInfo = [=](MachineIRBuilder &MIB) {
MIB.setInstrAndDebugLoc(*LatestLoad);
Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
MIB.buildLoad(LoadDst, Ptr, *NewMMO);
if (NeedsBSwap)

View File

@ -1397,9 +1397,6 @@ body: |
name: dont_combine_store_between_different_mbb
tracksRegLiveness: true
body: |
; There is a store between the two loads, hidden away in a different MBB.
; We should not combine here.
; LITTLE-LABEL: name: dont_combine_store_between_different_mbb
; LITTLE: bb.0:
; LITTLE: successors: %bb.1(0x80000000)
@ -1444,6 +1441,9 @@ body: |
; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
; There is a store between the two loads, hidden away in a different MBB.
; We should not combine here.
bb.0:
successors: %bb.1(0x80000000)
@ -1479,8 +1479,6 @@ body: |
name: different_mbb
tracksRegLiveness: true
body: |
; It should be possible to combine here, but it's not supported right now.
; LITTLE-LABEL: name: different_mbb
; LITTLE: bb.0:
; LITTLE: successors: %bb.1(0x80000000)
@ -1513,6 +1511,8 @@ body: |
; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
; It should be possible to combine here, but it's not supported right now.
bb.0:
successors: %bb.1(0x80000000)
@ -1569,3 +1569,67 @@ body: |
%full_load:_(s32) = G_OR %low_half, %high_half
$w1 = COPY %full_load(s32)
RET_ReallyLR implicit $w1
...
---
name: store_between_loads_and_or
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$x1' }
frameInfo:
maxAlignment: 1
body: |
bb.1:
liveins: $x0, $x1
; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR.
; We could have a G_STORE in between which may not be safe to move the load across.
liveins: $x0, $x1
; LITTLE-LABEL: name: store_between_loads_and_or
; LITTLE: liveins: $x0, $x1, $x0, $x1
; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
; LITTLE: $w0 = COPY [[LOAD]](s32)
; LITTLE: RET_ReallyLR implicit $w0
; BIG-LABEL: name: store_between_loads_and_or
; BIG: liveins: $x0, $x1, $x0, $x1
; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]]
; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
; BIG: $w0 = COPY [[BSWAP]](s32)
; BIG: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%12:_(s8) = G_CONSTANT i8 1
%15:_(s32) = G_CONSTANT i32 8
%19:_(s32) = G_CONSTANT i32 16
%23:_(s32) = G_CONSTANT i32 24
%13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8))
%3:_(s64) = G_CONSTANT i64 1
%4:_(p0) = G_PTR_ADD %0:_, %3:_(s64)
%14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8))
%6:_(s64) = G_CONSTANT i64 2
%7:_(p0) = G_PTR_ADD %0:_, %6:_(s64)
%18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8))
%9:_(s64) = G_CONSTANT i64 3
%10:_(p0) = G_PTR_ADD %0:_, %9:_(s64)
%22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8))
G_STORE %12:_(s8), %1:_(p0) :: (store (s8))
%16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32)
%17:_(s32) = G_OR %16:_, %13:_
%20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32)
%21:_(s32) = G_OR %17:_, %20:_
%24:_(s32) = nuw G_SHL %22:_, %23:_(s32)
%25:_(s32) = G_OR %21:_, %24:_
$w0 = COPY %25:_(s32)
RET_ReallyLR implicit $w0
...