mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
Fix an issue where fast math flags were dropped during scalarization.
Most portions of InstCombine properly propagate fast math flags, but apparently the vector scalarization section was overlooked. llvm-svn: 262376
This commit is contained in:
parent
fb2b660171
commit
999a9f171c
@ -386,6 +386,15 @@ public:
|
||||
}
|
||||
#include "llvm/IR/Instruction.def"
|
||||
|
||||
static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc,
|
||||
Value *V1, Value *V2,
|
||||
BinaryOperator *CopyBO,
|
||||
const Twine &Name = "") {
|
||||
BinaryOperator *BO = Create(Opc, V1, V2, Name);
|
||||
BO->copyIRFlags(CopyBO);
|
||||
return BO;
|
||||
}
|
||||
|
||||
static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
|
||||
const Twine &Name = "") {
|
||||
BinaryOperator *BO = Create(Opc, V1, V2, Name);
|
||||
|
@ -106,7 +106,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
|
||||
B0->getOperand(opId)->getName() + ".Elt"),
|
||||
*B0);
|
||||
Value *newPHIUser = InsertNewInstWith(
|
||||
BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
|
||||
BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
|
||||
scalarPHI, Op, B0), *B0);
|
||||
scalarPHI->addIncoming(newPHIUser, inBB);
|
||||
} else {
|
||||
// Scalarize PHI input:
|
||||
@ -193,7 +194,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
|
||||
Value *newEI1 =
|
||||
Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
|
||||
EI.getName()+".rhs");
|
||||
return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
|
||||
return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
|
||||
newEI0, newEI1, BO);
|
||||
}
|
||||
} else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
|
||||
// Extracting the inserted element?
|
||||
|
39
test/Transforms/InstCombine/fast-math-scalarization.ll
Normal file
39
test/Transforms/InstCombine/fast-math-scalarization.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: opt -instcombine -S < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test_scalarize_phi
|
||||
; CHECK: fmul fast float
|
||||
define void @test_scalarize_phi(i32 * %n, float * %inout) {
|
||||
entry:
|
||||
%0 = load volatile float, float * %inout, align 4
|
||||
%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
|
||||
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%splat.splatinsert1 = insertelement <4 x float> undef, float 3.0, i32 0
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%x.0 = phi <4 x float> [ %splat.splat, %entry ], [ %mul, %for.body ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%1 = load i32, i32 * %n, align 4
|
||||
%cmp = icmp ne i32 %i.0, %1
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body:
|
||||
%2 = extractelement <4 x float> %x.0, i32 1
|
||||
store volatile float %2, float * %inout, align 4
|
||||
%mul = fmul fast <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
|
||||
%inc = add nsw i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_extract_element_fastmath
|
||||
; CHECK: fadd fast float
|
||||
define float @test_extract_element_fastmath(<4 x float> %x) #0 {
|
||||
entry:
|
||||
%add = fadd fast <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
|
||||
%0 = extractelement <4 x float> %add, i32 2
|
||||
ret float %0
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user