mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[VectorCombine] loosen alignment constraint for load transform
As discussed in D93229, we only need a minimal alignment constraint when querying whether a hypothetical vector load is safe. We still pass/use the potentially stronger alignment attribute when checking costs and creating the new load. There's already a test that changes with the minimum code change, so splitting this off as a preliminary commit independent of any gep/offset enhancements. Differential Revision: https://reviews.llvm.org/D93397
This commit is contained in:
parent
04f2a7afd9
commit
7287b9953b
@ -134,13 +134,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
|
||||
return false;
|
||||
|
||||
// Check safety of replacing the scalar load with a larger vector load.
|
||||
// We use minimal alignment (maximum flexibility) because we only care about
|
||||
// the dereferenceable region. When calculating cost and creating a new op,
|
||||
// we may use a larger value based on alignment attributes.
|
||||
unsigned MinVecNumElts = MinVectorSize / ScalarSize;
|
||||
auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
|
||||
Align Alignment = Load->getAlign();
|
||||
if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT))
|
||||
if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
|
||||
return false;
|
||||
|
||||
// Original pattern: insertelt undef, load [free casts of] PtrOp, 0
|
||||
Align Alignment = Load->getAlign();
|
||||
Type *LoadTy = Load->getType();
|
||||
int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
|
||||
APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
|
||||
|
@ -403,12 +403,14 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; Negative test? - pointer is not as aligned as load.
|
||||
; Pointer is not as aligned as load, but that's ok.
|
||||
; The new load uses the larger alignment value.
|
||||
|
||||
define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) {
|
||||
; CHECK-LABEL: @load_f32_insert_v4f32_align(
|
||||
; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4
|
||||
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x float> [[R]]
|
||||
;
|
||||
%s = load float, float* %p, align 4
|
||||
|
Loading…
Reference in New Issue
Block a user