mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[IROutliner] Adapting to hoisted bitcasts in CodeExtractor
In commit 700d2417d8281ea56dfd7ac72d1a1473d03d2d59 the CodeExtractor was updated so that bitcasts that have lifetime markers that beginning outside of the region are deduplicated outside the region and are not used as an output. This caused a discrepancy in the IROutliner, where in these cases there were arguments added to the aggregate function that were not needed causing assertion errors. The IROutliner queries the CodeExtractor twice to determine the inputs and outputs, before and after `findAllocas` is called with the same ValueSet for the outputs causing the duplication. This has been fixed with a dummy ValueSet for the first call. However, the additional bitcasts prevent us from using the same similarity relationships that were previously defined by the IR Similarity Analysis Pass. In these cases, we check whether the initial version of the region being analyzed for outlining is still the same as it was previously. If it is not, i.e. because of the additional bitcast instructions from the CodeExtractor, we discard the region. Reviewers: yroux Differential Revision: https://reviews.llvm.org/D94303
This commit is contained in:
parent
2639acbee3
commit
63c50a23a3
@ -510,13 +510,16 @@ static void getCodeExtractorArguments(
|
||||
// outlined region. PremappedInputs are the arguments found by the
|
||||
// CodeExtractor, removing conditions such as sunken allocas, but that
|
||||
// may need to be remapped due to the extracted output values replacing
|
||||
// the original values.
|
||||
SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands;
|
||||
// the original values. We use DummyOutputs for this first run of finding
|
||||
// inputs and outputs since the outputs could change during findAllocas,
|
||||
// the correct set of extracted outputs will be in the final Outputs ValueSet.
|
||||
SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
|
||||
DummyOutputs;
|
||||
|
||||
// Use the code extractor to get the inputs and outputs, without sunken
|
||||
// allocas or removing llvm.assumes.
|
||||
CodeExtractor *CE = Region.CE;
|
||||
CE->findInputsOutputs(OverallInputs, Outputs, SinkCands);
|
||||
CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
|
||||
assert(Region.StartBB && "Region must have a start BasicBlock!");
|
||||
Function *OrigF = Region.StartBB->getParent();
|
||||
CodeExtractorAnalysisCache CEAC(*OrigF);
|
||||
@ -1263,6 +1266,16 @@ void IROutliner::pruneIncompatibleRegions(
|
||||
continue;
|
||||
|
||||
bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
|
||||
// We check if there is a discrepancy between the InstructionDataList
|
||||
// and the actual next instruction in the module. If there is, it means
|
||||
// that an extra instruction was added, likely by the CodeExtractor.
|
||||
|
||||
// Since we do not have any similarity data about this particular
|
||||
// instruction, we cannot confidently outline it, and must discard this
|
||||
// candidate.
|
||||
if (std::next(ID.getIterator())->Inst !=
|
||||
ID.Inst->getNextNonDebugInstruction())
|
||||
return true;
|
||||
return !this->InstructionClassifier.visit(ID.Inst);
|
||||
});
|
||||
|
||||
|
105
test/Transforms/IROutliner/outlining-bitcasts.ll
Normal file
105
test/Transforms/IROutliner/outlining-bitcasts.ll
Normal file
@ -0,0 +1,105 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
|
||||
|
||||
; This test ensures that an extra output is not added when there is a bitcast
|
||||
; that is relocated to outside of the extraction due to a starting lifetime
|
||||
; instruction outside of the extracted region.
|
||||
|
||||
; Additionally, we check that the newly added bitcast instruction is excluded in
|
||||
; further extractions.
|
||||
|
||||
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
|
||||
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
|
||||
|
||||
define void @outline_bitcast_base() {
|
||||
; CHECK-LABEL: @outline_bitcast_base(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
%d = alloca i32, align 4
|
||||
store i32 2, i32* %a, align 4
|
||||
store i32 3, i32* %b, align 4
|
||||
store i32 4, i32* %c, align 4
|
||||
%X = bitcast i32* %d to i8*
|
||||
%al = load i32, i32* %a
|
||||
%bl = load i32, i32* %b
|
||||
%cl = load i32, i32* %c
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @outline_bitcast_removed() {
|
||||
; CHECK-LABEL: @outline_bitcast_removed(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4
|
||||
; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]])
|
||||
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
|
||||
; CHECK-NEXT: [[AM:%.*]] = load i32, i32* [[B]], align 4
|
||||
; CHECK-NEXT: [[BM:%.*]] = load i32, i32* [[A]], align 4
|
||||
; CHECK-NEXT: [[CM:%.*]] = load i32, i32* [[C]], align 4
|
||||
; CHECK-NEXT: [[AS:%.*]] = add i32 [[AM]], [[BM]]
|
||||
; CHECK-NEXT: [[BS:%.*]] = add i32 [[BM]], [[AM]]
|
||||
; CHECK-NEXT: [[CS:%.*]] = add i32 [[BM]], [[CM]]
|
||||
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[D]] to i8*
|
||||
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
%d = alloca i32, align 4
|
||||
store i32 2, i32* %a, align 4
|
||||
store i32 3, i32* %b, align 4
|
||||
store i32 4, i32* %c, align 4
|
||||
%X = bitcast i32* %d to i8*
|
||||
%al = load i32, i32* %a
|
||||
%bl = load i32, i32* %b
|
||||
%cl = load i32, i32* %c
|
||||
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %X)
|
||||
%am = load i32, i32* %b
|
||||
%bm = load i32, i32* %a
|
||||
%cm = load i32, i32* %c
|
||||
%as = add i32 %am, %bm
|
||||
%bs = add i32 %bm, %am
|
||||
%cs = add i32 %bm, %cm
|
||||
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %X)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @outline_bitcast_base2(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @outline_bitcast_base2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[AL:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[BL:%.*]] = add i32 [[B]], [[A]]
|
||||
; CHECK-NEXT: [[CL:%.*]] = add i32 [[B]], [[C:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%al = add i32 %a, %b
|
||||
%bl = add i32 %b, %a
|
||||
%cl = add i32 %b, %c
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]])
|
||||
; CHECK: entry_to_outline:
|
||||
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
|
||||
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
|
||||
; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
|
||||
; CHECK-NEXT: [[X:%.*]] = bitcast i32* [[ARG3]] to i8*
|
||||
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
|
||||
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
|
||||
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
|
Loading…
Reference in New Issue
Block a user