mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
0b6b012562
(it was introduced in https://lists.llvm.org/pipermail/llvm-dev/2015-January/080956.html) This canonicalization seems dubious. Most importantly, while it does not create `inttoptr` casts by itself, it may cause them to appear later, see e.g. D88788. I think it's pretty obvious that it is an undesirable outcome, by now we've established that seemingly no-op `inttoptr`/`ptrtoint` casts are not no-op, and are no longer eager to look past them. Which e.g. means that given ``` %a = load i32 %b = inttoptr %a %c = inttoptr %a ``` we likely won't be able to tell that `%b` and `%c` is the same thing. As we can see in D88789 / D88788 / D88806 / D75505, we can't really teach SCEV about this (not without the https://bugs.llvm.org/show_bug.cgi?id=47592 at least) And we can't recover the situation post-inlining in instcombine. So it really does look like this fold is actively breaking otherwise-good IR, in a way that is not recoverable. And that means, this fold isn't helpful in exposing the passes that are otherwise unaware of these patterns it produces. Thusly, i propose to simply not perform such a canonicalization. The original motivational RFC does not state what larger problem that canonicalization was trying to solve, so i'm not sure how this plays out in the larger picture. On vanilla llvm test-suite + RawSpeed, this results in increase of asm instructions and final object size by ~+0.05% decreases final count of bitcasts by -4.79% (-28990), ptrtoint casts by -15.41% (-3423), and of inttoptr casts by -25.59% (-6919, *sic*). Overall, there's -0.04% less IR blocks, -0.39% instructions. See https://bugs.llvm.org/show_bug.cgi?id=47592 Differential Revision: https://reviews.llvm.org/D88789
188 lines
6.5 KiB
LLVM
188 lines
6.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -instcombine -S < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
define i32 @test_load_cast_combine_tbaa(float* %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
|
|
; CHECK-LABEL: @test_load_cast_combine_tbaa(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32*
|
|
; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, [[TBAA0:!tbaa !.*]]
|
|
; CHECK-NEXT: ret i32 [[L1]]
|
|
;
|
|
entry:
|
|
%l = load float, float* %ptr, !tbaa !0
|
|
%c = bitcast float %l to i32
|
|
ret i32 %c
|
|
}
|
|
|
|
define i32 @test_load_cast_combine_noalias(float* %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_noalias(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32*
|
|
; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, !alias.scope !3, !noalias !4
|
|
; CHECK-NEXT: ret i32 [[L1]]
|
|
;
|
|
entry:
|
|
%l = load float, float* %ptr, !alias.scope !3, !noalias !4
|
|
%c = bitcast float %l to i32
|
|
ret i32 %c
|
|
}
|
|
|
|
define float @test_load_cast_combine_range(i32* %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It
|
|
; would be nice to preserve or update it somehow but this is hard when moving
|
|
; between types.
|
|
; CHECK-LABEL: @test_load_cast_combine_range(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to float*
|
|
; CHECK-NEXT: [[L1:%.*]] = load float, float* [[TMP0]], align 4
|
|
; CHECK-NEXT: ret float [[L1]]
|
|
;
|
|
entry:
|
|
%l = load i32, i32* %ptr, !range !5
|
|
%c = bitcast i32 %l to float
|
|
ret float %c
|
|
}
|
|
|
|
define i32 @test_load_cast_combine_invariant(float* %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_invariant(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32*
|
|
; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, !invariant.load !7
|
|
; CHECK-NEXT: ret i32 [[L1]]
|
|
;
|
|
entry:
|
|
%l = load float, float* %ptr, !invariant.load !6
|
|
%c = bitcast float %l to i32
|
|
ret i32 %c
|
|
}
|
|
|
|
define i32 @test_load_cast_combine_nontemporal(float* %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
|
|
; metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_nontemporal(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[PTR:%.*]] to i32*
|
|
; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP0]], align 4, !nontemporal !8
|
|
; CHECK-NEXT: ret i32 [[L1]]
|
|
;
|
|
entry:
|
|
%l = load float, float* %ptr, !nontemporal !7
|
|
%c = bitcast float %l to i32
|
|
ret i32 %c
|
|
}
|
|
|
|
define i8* @test_load_cast_combine_align(i32** %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves align
|
|
; metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_align(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PTR:%.*]] to i8**
|
|
; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[TMP0]], align 8, !align !9
|
|
; CHECK-NEXT: ret i8* [[L1]]
|
|
;
|
|
entry:
|
|
%l = load i32*, i32** %ptr, !align !8
|
|
%c = bitcast i32* %l to i8*
|
|
ret i8* %c
|
|
}
|
|
|
|
define i8* @test_load_cast_combine_deref(i32** %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable
|
|
; metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_deref(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PTR:%.*]] to i8**
|
|
; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[TMP0]], align 8, !dereferenceable !9
|
|
; CHECK-NEXT: ret i8* [[L1]]
|
|
;
|
|
entry:
|
|
%l = load i32*, i32** %ptr, !dereferenceable !8
|
|
%c = bitcast i32* %l to i8*
|
|
ret i8* %c
|
|
}
|
|
|
|
define i8* @test_load_cast_combine_deref_or_null(i32** %ptr) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves
|
|
; dereferenceable_or_null metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32** [[PTR:%.*]] to i8**
|
|
; CHECK-NEXT: [[L1:%.*]] = load i8*, i8** [[TMP0]], align 8, !dereferenceable_or_null !9
|
|
; CHECK-NEXT: ret i8* [[L1]]
|
|
;
|
|
entry:
|
|
%l = load i32*, i32** %ptr, !dereferenceable_or_null !8
|
|
%c = bitcast i32* %l to i8*
|
|
ret i8* %c
|
|
}
|
|
|
|
define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
|
|
; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
|
|
; metadata.
|
|
; CHECK-LABEL: @test_load_cast_combine_loop(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64
|
|
; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[I]] to i64
|
|
; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[SRC_GEP]] to i32*
|
|
; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !6
|
|
; CHECK-NEXT: store i32 [[L1]], i32* [[DST_GEP]], align 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]], [[LOOP1:!llvm.loop !.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
|
|
%src.gep = getelementptr inbounds float, float* %src, i32 %i
|
|
%dst.gep = getelementptr inbounds i32, i32* %dst, i32 %i
|
|
%l = load float, float* %src.gep, !llvm.access.group !9
|
|
%c = bitcast float %l to i32
|
|
store i32 %c, i32* %dst.gep
|
|
%i.next = add i32 %i, 1
|
|
%cmp = icmp slt i32 %i.next, %n
|
|
br i1 %cmp, label %loop, label %exit, !llvm.loop !1
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @test_load_cast_combine_nonnull(float** %ptr) {
|
|
; CHECK-LABEL: @test_load_cast_combine_nonnull(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P:%.*]] = load float*, float** [[PTR:%.*]], align 8, !nonnull !7
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr float*, float** [[PTR]], i64 42
|
|
; CHECK-NEXT: store float* [[P]], float** [[GEP]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p = load float*, float** %ptr, !nonnull !6
|
|
%gep = getelementptr float*, float** %ptr, i32 42
|
|
store float* %p, float** %gep
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!1, !1, i64 0}
|
|
!1 = !{!"scalar type", !2}
|
|
!2 = !{!"root"}
|
|
!3 = distinct !{!3, !4}
|
|
!4 = distinct !{!4, !{!"llvm.loop.parallel_accesses", !9}}
|
|
!5 = !{i32 0, i32 42}
|
|
!6 = !{}
|
|
!7 = !{i32 1}
|
|
!8 = !{i64 8}
|
|
!9 = distinct !{}
|