mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[VectorCombine] Avoid crossing address space boundaries.
We can not bitcast pointers across different address spaces, and VectorCombine should be careful when it attempts to find the original source of the loaded data. Differential Revision: https://reviews.llvm.org/D89577
This commit is contained in:
parent
7bd9c67ef6
commit
c59b73d34c
@ -111,6 +111,13 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
|
||||
// TODO: Extend this to match GEP with constant offsets.
|
||||
Value *PtrOp = Load->getPointerOperand()->stripPointerCasts();
|
||||
assert(isa<PointerType>(PtrOp->getType()) && "Expected a pointer type");
|
||||
unsigned AS = Load->getPointerAddressSpace();
|
||||
|
||||
// If original AS != Load's AS, we can't bitcast the original pointer and have
|
||||
// to use Load's operand instead. Ideally we would want to strip pointer casts
|
||||
// without changing AS, but there's no API to do that ATM.
|
||||
if (AS != PtrOp->getType()->getPointerAddressSpace())
|
||||
PtrOp = Load->getPointerOperand();
|
||||
|
||||
Type *ScalarTy = Scalar->getType();
|
||||
uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
|
||||
@ -126,7 +133,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
|
||||
if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT))
|
||||
return false;
|
||||
|
||||
unsigned AS = Load->getPointerAddressSpace();
|
||||
|
||||
// Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0
|
||||
int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS);
|
||||
|
36
test/Transforms/VectorCombine/AMDGPU/as-transition.ll
Normal file
36
test/Transforms/VectorCombine/AMDGPU/as-transition.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
; ModuleID = 'load-as-transition.ll'
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
%struct.hoge = type { float }
|
||||
|
||||
define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture nonnull %resultptr) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: @load_from_other_as(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
|
||||
; CHECK-NEXT: [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
|
||||
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%a = alloca %struct.hoge, align 4, addrspace(5)
|
||||
%b = addrspacecast %struct.hoge addrspace(5)* %a to %struct.hoge*
|
||||
%c = getelementptr inbounds %struct.hoge, %struct.hoge* %b, i64 0, i32 0
|
||||
%d = load float, float* %c, align 4
|
||||
%e = insertelement <4 x float> undef, float %d, i32 0
|
||||
store <4 x float> %e, <4 x float>* %resultptr, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "use-soft-float"="false" }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = !{!"clang version 12.0.0"}
|
2
test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
Normal file
2
test/Transforms/VectorCombine/AMDGPU/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
Loading…
x
Reference in New Issue
Block a user