mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[LoopVectorize] Fix assertion failure in Fcmp vectorization
Summary: When vectorizing fcmps we can trip on incorrect cast assertion when setting the FastMathFlags after generating the vectorized FCmp. This can happen if the FCmp can be folded to true or false directly. The fix here is to set the FastMathFlag using the FastMathFlagBuilder *before* creating the FCmp Instruction. This is what's done by other optimizations such as InstCombine. Added a test case which trips on cast assertion without this patch. Reviewers: Ayal, mssimpso, mkuper, gilr Reviewed by: Ayal, mssimpso Subscribers: llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D36244 llvm-svn: 310389
This commit is contained in:
parent
3a6ef98b8f
commit
ce71877c5e
@ -4863,8 +4863,10 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
|
||||
Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part);
|
||||
Value *C = nullptr;
|
||||
if (FCmp) {
|
||||
// Propagate fast math flags.
|
||||
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
|
||||
Builder.setFastMathFlags(Cmp->getFastMathFlags());
|
||||
C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
|
||||
cast<FCmpInst>(C)->copyFastMathFlags(Cmp);
|
||||
} else {
|
||||
C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
|
||||
}
|
||||
|
25
test/Transforms/LoopVectorize/fcmp-vectorize.ll
Normal file
25
test/Transforms/LoopVectorize/fcmp-vectorize.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
|
||||
|
||||
; Avoid crashing while trying to vectorize fcmp that can be folded to vector of
|
||||
; i1 true.
|
||||
define void @test1() {
|
||||
; CHECK-LABEL: test1(
|
||||
; CHECK-LABEL: vector.body:
|
||||
; CHECK-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
; CHECK-NEXT: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
|
||||
; CHECK: %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK: %index.next = add i32 %index, 4
|
||||
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %loop, %entry
|
||||
%iv = phi i32 [ 0, %entry ], [ %ivnext, %loop ]
|
||||
%fcmp = fcmp uno float 0.000000e+00, 0.000000e+00
|
||||
%ivnext = add nsw i32 %iv, 1
|
||||
%cnd = icmp sgt i32 %iv, 142
|
||||
br i1 %cnd, label %exit, label %loop
|
||||
|
||||
exit: ; preds = %loop
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user