mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[InstCombine] tmp alloca bypass: ensure that the replacement dominates all alloca uses
After 077bff39d46364035a5dcfa32fc69910ad0975d0, isDereferenceableForAllocaSize() can recurse into selects, which is causing a problem for the new test case, reduced from https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20210412/904154.html because the replacement (the select) is defined after the first use of an alloca, so we'd end up with a verifier error. Now, this new check is too restrictive. We likely can handle *some* cases, by trying to sink all uses of an alloca to after the the def.
This commit is contained in:
parent
199c80bef9
commit
f71031c386
@ -406,7 +406,11 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
|
||||
Align SourceAlign = getOrEnforceKnownAlignment(
|
||||
TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
|
||||
if (AllocaAlign <= SourceAlign &&
|
||||
isDereferenceableForAllocaSize(TheSrc, &AI, DL)) {
|
||||
isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
|
||||
(!isa<Instruction>(TheSrc) || all_of(AI.users(), [&](const User *U) {
|
||||
return DT.dominates(cast<Instruction>(TheSrc), cast<Instruction>(U));
|
||||
}))) {
|
||||
// FIXME: can the dominance restriction be relaxed by sinking instrns?
|
||||
LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
|
||||
LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
|
||||
unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
|
||||
|
@ -135,10 +135,12 @@ define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca_too_many_byte
|
||||
; Simple memcpy to alloca from constant address space intrinsic call
|
||||
define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(1)* %out, i32 %idx) {
|
||||
; CHECK-LABEL: @memcpy_constant_intrinsic_ptr_to_alloca(
|
||||
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
|
||||
; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* noundef align 4 dereferenceable(32) [[ALLOCA_CAST]], i8 addrspace(4)* noundef align 16 dereferenceable(32) [[KERNARG_SEGMENT_PTR]], i64 32, i1 false)
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1
|
||||
; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
35
test/Transforms/InstCombine/tmp-alloca-bypass.ll
Normal file
35
test/Transforms/InstCombine/tmp-alloca-bypass.ll
Normal file
@ -0,0 +1,35 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
; See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20210412/904154.html
|
||||
; When replacing an allocation that is only modified by a memcpy/memmove from
|
||||
; a constant whose alignment is equal to or exceeds that of the allocation,
|
||||
; we also need to ensure that we actually can replace all uses of an alloca
|
||||
; with said constant. This matters because it could be e.g. a select between
|
||||
; two constants, that happens after the first use of an alloca.
|
||||
|
||||
%t0 = type { i8*, i64 }
|
||||
|
||||
@g0 = external constant %t0
|
||||
@g1 = external constant %t0
|
||||
define void @test(i8*%out) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: [[I0:%.*]] = alloca [[T0:%.*]], align 8
|
||||
; CHECK-NEXT: [[I1:%.*]] = bitcast %t0* [[I0]] to i8*
|
||||
; CHECK-NEXT: [[I2:%.*]] = call i1 @get_cond()
|
||||
; CHECK-NEXT: [[I3:%.*]] = select i1 [[I2]], i8* bitcast (%t0* @g0 to i8*), i8* bitcast (%t0* @g1 to i8*)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(16) [[I1]], i8* noundef nonnull align 8 dereferenceable(16) [[I3]], i64 16, i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(16) [[OUT:%.*]], i8* noundef nonnull align 8 dereferenceable(16) [[I1]], i64 16, i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%i0 = alloca %t0
|
||||
%i1 = bitcast %t0* %i0 to i8*
|
||||
%i2 = call i1 @get_cond()
|
||||
%i3 = select i1 %i2, i8* bitcast (%t0* @g0 to i8*), i8* bitcast (%t0* @g1 to i8*)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i1, i8* %i3, i64 16, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %i1, i64 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i1 @get_cond()
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
|
Loading…
x
Reference in New Issue
Block a user