From 7287b9953b2ed17d22ae908d9a1bd2fd02cc7559 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Dec 2020 12:11:57 -0500 Subject: [PATCH] [VectorCombine] loosen alignment constraint for load transform As discussed in D93229, we only need a minimal alignment constraint when querying whether a hypothetical vector load is safe. We still pass/use the potentially stronger alignment attribute when checking costs and creating the new load. There's already a test that changes with the minimum code change, so splitting this off as a preliminary commit independent of any gep/offset enhancements. Differential Revision: https://reviews.llvm.org/D93397 --- lib/Transforms/Vectorize/VectorCombine.cpp | 7 +++++-- test/Transforms/VectorCombine/X86/load.ll | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/Transforms/Vectorize/VectorCombine.cpp b/lib/Transforms/Vectorize/VectorCombine.cpp index 89b60045ce9..086169c55c8 100644 --- a/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/lib/Transforms/Vectorize/VectorCombine.cpp @@ -134,13 +134,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; // Check safety of replacing the scalar load with a larger vector load. + // We use minimal alignment (maximum flexibility) because we only care about + // the dereferenceable region. When calculating cost and creating a new op, + // we may use a larger value based on alignment attributes. unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); - Align Alignment = Load->getAlign(); - if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT)) + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) return false; // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 + Align Alignment = Load->getAlign(); Type *LoadTy = Load->getType(); int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); diff --git a/test/Transforms/VectorCombine/X86/load.ll b/test/Transforms/VectorCombine/X86/load.ll index d28d2876163..f5a962dd7cf 100644 --- a/test/Transforms/VectorCombine/X86/load.ll +++ b/test/Transforms/VectorCombine/X86/load.ll @@ -403,12 +403,14 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab ret <4 x float> %r } -; Negative test? - pointer is not as aligned as load. +; Pointer is not as aligned as load, but that's ok. +; The new load uses the larger alignment value. define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32_align( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, float* %p, align 4