mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-28 14:32:51 +01:00
6ce96b0ff0
This patch fixes bugs that were exposed by the addition of fast-math-flags in the DAG: r237046 ( http://reviews.llvm.org/rL237046 ): 1. When replacing a division node, it's not enough to RAUW. We should call CombineTo() to delete dead nodes and combine again. 2. Because we are changing the DAG, we can't return an empty SDValue after the transform. As the code comments say: Visitation implementation - Implement dag node combining for different node types. The semantics are as follows: Return Value: SDValue.getNode() == 0 - No change was made SDValue.getNode() == N - N was replaced, is dead and has been handled. otherwise - N should be replaced by the returned Operand. The new test case shows no difference with or without this patch, but it will crash if we re-apply r237046 or enable FMF via the current -enable-fmf-dag cl::opt. Differential Revision: http://reviews.llvm.org/D9893 llvm-svn: 241826
49 lines
1.5 KiB
LLVM
49 lines
1.5 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
|
|
|
|
; Anything more than one division using a single divisor operand
|
|
; should be converted into a reciprocal and multiplication.
|
|
|
|
define float @div1_arcp(float %x, float %y, float %z) #0 {
|
|
; CHECK-LABEL: div1_arcp:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: divss %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%div1 = fdiv arcp float %x, %y
|
|
ret float %div1
|
|
}
|
|
|
|
define float @div2_arcp(float %x, float %y, float %z) #0 {
|
|
; CHECK-LABEL: div2_arcp:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
; CHECK-NEXT: divss %xmm2, %xmm3
|
|
; CHECK-NEXT: mulss %xmm1, %xmm0
|
|
; CHECK-NEXT: mulss %xmm3, %xmm0
|
|
; CHECK-NEXT: mulss %xmm3, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%div1 = fdiv arcp float %x, %z
|
|
%mul = fmul arcp float %div1, %y
|
|
%div2 = fdiv arcp float %mul, %z
|
|
ret float %div2
|
|
}
|
|
|
|
; If the reciprocal is already calculated, we should not
|
|
; generate an extra multiplication by 1.0.
|
|
|
|
define double @div3_arcp(double %x, double %y, double %z) #0 {
|
|
; CHECK-LABEL: div3_arcp:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: movsd{{.*#+}} xmm2 = mem[0],zero
|
|
; CHECK-NEXT: divsd %xmm1, %xmm2
|
|
; CHECK-NEXT: mulsd %xmm2, %xmm0
|
|
; CHECK-NEXT: addsd %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%div1 = fdiv fast double 1.0, %y
|
|
%div2 = fdiv fast double %x, %y
|
|
%ret = fadd fast double %div2, %div1
|
|
ret double %ret
|
|
}
|
|
|
|
; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
|
|
attributes #0 = { "unsafe-fp-math"="true" }
|