mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-22 20:43:44 +02:00
2e307633f3
Re-recommiting after landing DAG extension-crash fix. Recommiting after adding check to avoid miscomputing alias information on addresses of the same base but different subindices. Memory accesses offset from frame indices may alias, e.g., we may merge write from function arguments passed on the stack when they are contiguous. As a result, when checking aliasing, we consider the underlying frame index's offset from the stack pointer. Static allocs are realized as stack objects in SelectionDAG, but its offset is not set until post-DAG causing DAGCombiner's alias check to consider access to static allocas to frequently alias. Modify isAlias to consider access between static allocas and access from other frame objects to be considered aliasing. Many test changes are included here. Most are fixes for tests which indirectly relied on our aliasing ability and needed to be modified to preserve their original intent. The remaining tests have minor improvements due to relaxed ordering. The exception is CodeGen/X86/2011-10-19-widen_vselect.ll which has a minor degradation dispite though the pre-legalized DAG is improved. Reviewers: rnk, mkuper, jonpa, hfinkel, uweigand Reviewed By: rnk Subscribers: sdardis, nemanjai, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33345 llvm-svn: 308350
179 lines
7.4 KiB
LLVM
179 lines
7.4 KiB
LLVM
; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
|
|
; REQUIRES: asserts
|
|
|
|
target triple = "x86_64-pc-linux-gnu"
|
|
|
|
; Can we lower a single vector?
|
|
define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
|
|
entry:
|
|
; CHECK-LABEL: @test
|
|
; CHECK: subq $24, %rsp
|
|
; CHECK: movaps %xmm0, (%rsp)
|
|
; CHECK: callq do_safepoint
|
|
; CHECK: movaps (%rsp), %xmm0
|
|
; CHECK: addq $24, %rsp
|
|
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
|
|
%obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
|
|
ret <2 x i8 addrspace(1)*> %obj.relocated
|
|
}
|
|
|
|
; Can we lower the base, derived pairs if both are vectors?
|
|
define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" {
|
|
entry:
|
|
; CHECK-LABEL: @test2
|
|
; CHECK: subq $40, %rsp
|
|
; CHECK: movq %rdi, %xmm1
|
|
; CHECK: pshufd $68, %xmm1, %xmm1 # xmm1 = xmm1[0,1,0,1]
|
|
; CHECK: paddq %xmm0, %xmm1
|
|
; CHECK: movdqa %xmm0, 16(%rsp)
|
|
; CHECK: movdqa %xmm1, (%rsp)
|
|
; CHECK: callq do_safepoint
|
|
; CHECK: movaps (%rsp), %xmm0
|
|
; CHECK: addq $40, %rsp
|
|
%derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
|
|
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived)
|
|
%derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived)
|
|
ret <2 x i8 addrspace(1)*> %derived.relocated
|
|
}
|
|
|
|
; Originally, this was just a variant of @test2 above, but it ends up
|
|
; covering a bunch of interesting missed optimizations. Specifically:
|
|
; - We waste a stack slot for a value that a backend transform pass
|
|
; CSEd to another spilled one.
|
|
; - We don't remove the testb even though it serves no purpose
|
|
; - We could in principal reuse the argument memory (%rsi) and do away
|
|
; with stack slots entirely.
|
|
define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
|
|
entry:
|
|
; CHECK-LABEL: @test3
|
|
; CHECK: subq $40, %rsp
|
|
; CHECK: testb $1, %dil
|
|
; CHECK: movaps (%rsi), %xmm0
|
|
; CHECK-DAG: movaps %xmm0, (%rsp)
|
|
; CHECK-DAG: movaps %xmm0, 16(%rsp)
|
|
; CHECK: callq do_safepoint
|
|
; CHECK: movaps (%rsp), %xmm0
|
|
; CHECK: addq $40, %rsp
|
|
br i1 %cnd, label %taken, label %untaken
|
|
|
|
taken: ; preds = %entry
|
|
%obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
|
|
br label %merge
|
|
|
|
untaken: ; preds = %entry
|
|
%objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
|
|
br label %merge
|
|
|
|
merge: ; preds = %untaken, %taken
|
|
%obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
|
|
%obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
|
|
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base)
|
|
%obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj)
|
|
%obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*>
|
|
%obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base)
|
|
%obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*>
|
|
ret <2 x i64 addrspace(1)*> %obj.relocated.casted
|
|
}
|
|
|
|
; Can we handle vector constants? At the moment, we don't appear to actually
|
|
; get selection dag nodes for these.
|
|
define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" {
|
|
entry:
|
|
; CHECK-LABEL: @test4
|
|
; CHECK: subq $24, %rsp
|
|
; CHECK: xorps %xmm0, %xmm0
|
|
; CHECK: movaps %xmm0, (%rsp)
|
|
; CHECK: callq do_safepoint
|
|
; CHECK: movaps (%rsp), %xmm0
|
|
; CHECK: addq $24, %rsp
|
|
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer)
|
|
%obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
|
|
ret <2 x i8 addrspace(1)*> %obj.relocated
|
|
}
|
|
|
|
; Check that we can lower a constant typed as i128 correctly. Note that the
|
|
; actual value is representable in 64 bits. We don't have a representation
|
|
; of larger than 64 bit constant in the StackMap format.
|
|
define void @test5() gc "statepoint-example" {
|
|
entry:
|
|
; CHECK-LABEL: @test5
|
|
; CHECK: push
|
|
; CHECK: callq do_safepoint
|
|
; CHECK: pop
|
|
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0)
|
|
ret void
|
|
}
|
|
|
|
; CHECK: __LLVM_StackMaps:
|
|
|
|
; CHECK: .Ltmp0-test
|
|
; Check for the two spill slots
|
|
; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
|
|
; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 0
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 0
|
|
|
|
; CHECK: .Ltmp1-test2
|
|
; Check for the two spill slots
|
|
; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
|
|
; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 16
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 0
|
|
|
|
; CHECK: .Ltmp2-test3
|
|
; Check for the four spill slots
|
|
; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
|
|
; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
|
|
; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
|
|
; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 16
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 16
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 16
|
|
; CHECK: .byte 3
|
|
; CHECK: .byte 0
|
|
; CHECK: .short 16
|
|
; CHECK: .short 7
|
|
; CHECK: .short 0
|
|
; CHECK: .long 0
|
|
|
|
declare void @do_safepoint()
|
|
|
|
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
|
|
declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
|
|
declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
|