mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[InstCombine] Avoid Bitcast-GEP fusion for pointers directly from allocation functions
Elimination of bitcasts with void pointer arguments results in GEPs with pure byte indexes. These GEPs do not preserve struct/array information and interrupt phi address translation in later pipeline stages. Here is the original motivation for this patch: ``` #include<stdio.h> #include<malloc.h> typedef struct __Node{ double f; struct __Node *next; } Node; void foo () { Node *a = (Node*) malloc (sizeof(Node)); a->next = NULL; a->f = 11.5f; Node *ptr = a; double sum = 0.0f; while (ptr) { sum += ptr->f; ptr = ptr->next; } printf("%f\n", sum); } ``` By explicit assignment `a->next = NULL`, we can infer the length of the link list is `1`. In this case we can eliminate while loop traversal entirely. This elimination is supposed to be performed by GVN/MemoryDependencyAnalysis/PhiTranslation . The final IR before this patch: ``` define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 { entry: %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2 %next = getelementptr inbounds i8, i8* %call, i64 8 %0 = bitcast i8* %next to %struct.__Node** store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2 %f = bitcast i8* %call to double* store double 1.150000e+01, double* %f, align 8, !tbaa !8 %tobool12 = icmp eq i8* %call, null br i1 %tobool12, label %while.end, label %while.body.lr.ph while.body.lr.ph: ; preds = %entry %1 = bitcast i8* %call to %struct.__Node* br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body %sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ] %ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ] %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0 %2 = load double, double* %f1, align 8, !tbaa !8 %add = fadd contract double %sum.014, %2 %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1 %3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2 %tobool = icmp eq %struct.__Node* %3, null br i1 %tobool, label %while.end, label %while.body while.end: ; preds = %while.body, %entry %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %while.body ] %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa) ret void } ``` Final IR after this patch: ``` ; Function Attrs: nofree nounwind define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 { while.end: %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 1.150000e+01) ret void } ``` IR before GVN before this patch: ``` define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 { entry: %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2 %next = getelementptr inbounds i8, i8* %call, i64 8 %0 = bitcast i8* %next to %struct.__Node** store %struct.__Node* null, %struct.__Node** %0, align 8, !tbaa !2 %f = bitcast i8* %call to double* store double 1.150000e+01, double* %f, align 8, !tbaa !8 %tobool12 = icmp eq i8* %call, null br i1 %tobool12, label %while.end, label %while.body.lr.ph while.body.lr.ph: ; preds = %entry %1 = bitcast i8* %call to %struct.__Node* br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body %sum.014 = phi double [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ] %ptr.013 = phi %struct.__Node* [ %1, %while.body.lr.ph ], [ %3, %while.body ] %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0 %2 = load double, double* %f1, align 8, !tbaa !8 %add = fadd contract double %sum.014, %2 %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1 %3 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2 %tobool = icmp eq %struct.__Node* %3, null br i1 %tobool, label %while.end.loopexit, label %while.body while.end.loopexit: ; preds = %while.body %add.lcssa = phi double [ %add, %while.body ] br label %while.end while.end: ; preds = %while.end.loopexit, %entry %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa) ret void } ``` IR before GVN after this patch: ``` define dso_local void @foo(i32* nocapture readnone %r) local_unnamed_addr #0 { entry: %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) #2 %0 = bitcast i8* %call to %struct.__Node* %next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1 store %struct.__Node* null, %struct.__Node** %next, align 8, !tbaa !2 %f = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 0 store double 1.150000e+01, double* %f, align 8, !tbaa !8 %tobool12 = icmp eq i8* %call, null br i1 %tobool12, label %while.end, label %while.body.preheader while.body.preheader: ; preds = %entry br label %while.body while.body: ; preds = %while.body.preheader, %while.body %sum.014 = phi double [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] %ptr.013 = phi %struct.__Node* [ %2, %while.body ], [ %0, %while.body.preheader ] %f1 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 0 %1 = load double, double* %f1, align 8, !tbaa !8 %add = fadd contract double %sum.014, %1 %next2 = getelementptr inbounds %struct.__Node, %struct.__Node* %ptr.013, i64 0, i32 1 %2 = load %struct.__Node*, %struct.__Node** %next2, align 8, !tbaa !2 %tobool = icmp eq %struct.__Node* %2, null br i1 %tobool, label %while.end.loopexit, label %while.body while.end.loopexit: ; preds = %while.body %add.lcssa = phi double [ %add, %while.body ] br label %while.end while.end: ; preds = %while.end.loopexit, %entry %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %sum.0.lcssa) ret void } ``` The phi translation fails before this patch and it prevents GVN to remove the loop. The reason for this failure is in InstCombine. When the Instruction combining pass decides to convert: ``` %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) %0 = bitcast i8* %call to %struct.__Node* %next = getelementptr inbounds %struct.__Node, %struct.__Node* %0, i64 0, i32 1 store %struct.__Node* null, %struct.__Node** %next ``` to ``` %call = tail call noalias dereferenceable_or_null(16) i8* @malloc(i64 16) %next = getelementptr inbounds i8, i8* %call, i64 8 %0 = bitcast i8* %next to %struct.__Node** store %struct.__Node* null, %struct.__Node** %0 ``` GEP instructions with pure byte indexes (e.g. `getelementptr inbounds i8, i8* %call, i64 8`) are obstacles for address translation. address translation is looking for structural similarity between GEPs and these GEPs usually do not match since they have different structure. This change will cause couple of failures in LLVM-tests. However, in all cases we need to change expected result by the test. I will update those tests as soon as I get green light on this patch. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D96881
This commit is contained in:
parent
ad0007c85e
commit
0efd7ce0e4
@ -2425,13 +2425,21 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
||||
// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
|
||||
unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType);
|
||||
APInt Offset(OffsetBits, 0);
|
||||
if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset)) {
|
||||
|
||||
// If the bitcast argument is an allocation, The bitcast is for convertion
|
||||
// to actual type of allocation. Removing such bitcasts, results in having
|
||||
// GEPs with i8* base and pure byte offsets. That means GEP is not aware of
|
||||
// struct or array hierarchy.
|
||||
// By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
|
||||
// a better chance to succeed.
|
||||
if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
|
||||
!isAllocationFn(SrcOp, &TLI)) {
|
||||
// If this GEP instruction doesn't move the pointer, just replace the GEP
|
||||
// with a bitcast of the real input to the dest type.
|
||||
if (!Offset) {
|
||||
// If the bitcast is of an allocation, and the allocation will be
|
||||
// converted to match the type of the cast, don't touch this.
|
||||
if (isa<AllocaInst>(SrcOp) || isAllocationFn(SrcOp, &TLI)) {
|
||||
if (isa<AllocaInst>(SrcOp)) {
|
||||
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
|
||||
if (Instruction *I = visitBitCast(*BCI)) {
|
||||
if (I != BCI) {
|
||||
|
@ -1305,4 +1305,23 @@ define i8* @D98588(i8* %c1, i64 %offset) {
|
||||
ret i8* %gep
|
||||
}
|
||||
|
||||
declare noalias i8* @malloc(i64) nounwind
|
||||
|
||||
define i32 @test_gep_bitcast_malloc(%struct.A* %a) {
|
||||
; CHECK-LABEL: @test_gep_bitcast_malloc(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call noalias dereferenceable_or_null(16) i8* @malloc(i64 16)
|
||||
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.A*
|
||||
; CHECK-NEXT: [[G3:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.A* [[B]], i64 0, i32 2
|
||||
; CHECK-NEXT: [[A_C:%.*]] = load i32, i32* [[G3]], align 4
|
||||
; CHECK-NEXT: ret i32 [[A_C]]
|
||||
;
|
||||
entry:
|
||||
%call = call noalias i8* @malloc(i64 16) #2
|
||||
%B = bitcast i8* %call to %struct.A*
|
||||
%g3 = getelementptr %struct.A, %struct.A* %B, i32 0, i32 2
|
||||
%a_c = load i32, i32* %g3, align 4
|
||||
ret i32 %a_c
|
||||
}
|
||||
|
||||
!0 = !{!"branch_weights", i32 2, i32 10}
|
||||
|
Loading…
x
Reference in New Issue
Block a user