1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/X86/machine-combiner.ll
Florian Hahn 23d2ec44ef [MachineCombiner] Add check for optimal pattern order.
In D41587, @mssimpso discovered that the order of some patterns for
AArch64 was sub-optimal. I thought a bit about how we could avoid that
case in the future. I do not think there is a need for evaluating all
patterns for now. But this patch adds an extra (expensive) check, that
evaluates the latencies of all patterns, and ensures that the latency
saved decreases for subsequent patterns.

This catches the sub-optimal order fixed in D41587, but I am not
entirely happy with the check, as it only applies to sub-optimal
patterns seen while building with EXPENSIVE_CHECKS on. It did not
discover any other sub-optimal pattern ordering.

Reviewers: Gerolf, spatel, mssimpso

Reviewed By: Gerolf, mssimpso

Differential Revision: https://reviews.llvm.org/D41766

llvm-svn: 323873
2018-01-31 13:54:30 +00:00

679 lines
22 KiB
LLVM

; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX
; Incremental updates of the instruction depths should be enough for this test
; case.
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_adds1:
; SSE: # %bb.0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds1:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
ret float %t2
}
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_adds2:
; SSE: # %bb.0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds2:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %t1, %x3
ret float %t2
}
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_adds3:
; SSE: # %bb.0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds3:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %x3, %t1
ret float %t2
}
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_adds4:
; SSE: # %bb.0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds4:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
; produced because that would cost more compile time.
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
; SSE-LABEL: reassociate_adds5:
; SSE: # %bb.0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: addss %xmm5, %xmm4
; SSE-NEXT: addss %xmm6, %xmm4
; SSE-NEXT: addss %xmm4, %xmm0
; SSE-NEXT: addss %xmm7, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds5:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
%t3 = fadd float %t2, %x4
%t4 = fadd float %t3, %x5
%t5 = fadd float %t4, %x6
%t6 = fadd float %t5, %x7
ret float %t6
}
; Verify that we only need two associative operations to reassociate the operands.
; Also, we should reassociate such that the result of the high latency division
; is used by the final 'add' rather than reassociating the %x3 operand with the
; division. The latter reassociation would not improve anything.
define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_adds6:
; SSE: # %bb.0:
; SSE-NEXT: divss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds6:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_muls1:
; SSE: # %bb.0:
; SSE-NEXT: divss %xmm1, %xmm0
; SSE-NEXT: mulss %xmm3, %xmm2
; SSE-NEXT: mulss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_muls1:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%t1 = fmul float %x2, %t0
%t2 = fmul float %x3, %t1
ret float %t2
}
; Verify that SSE and AVX scalar double-precision adds are reassociated.
define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
; SSE-LABEL: reassociate_adds_double:
; SSE: # %bb.0:
; SSE-NEXT: divsd %xmm1, %xmm0
; SSE-NEXT: addsd %xmm3, %xmm2
; SSE-NEXT: addsd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%t1 = fadd double %x2, %t0
%t2 = fadd double %x3, %t1
ret double %t2
}
; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
; SSE-LABEL: reassociate_muls_double:
; SSE: # %bb.0:
; SSE-NEXT: divsd %xmm1, %xmm0
; SSE-NEXT: mulsd %xmm3, %xmm2
; SSE-NEXT: mulsd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_muls_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%t1 = fmul double %x2, %t0
%t2 = fmul double %x3, %t1
ret double %t2
}
; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; SSE-LABEL: reassociate_adds_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: mulps %xmm1, %xmm0
; SSE-NEXT: addps %xmm3, %xmm2
; SSE-NEXT: addps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fmul <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
; SSE-LABEL: reassociate_adds_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: addpd %xmm3, %xmm2
; SSE-NEXT: addpd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fmul <2 x double> %x0, %x1
%t1 = fadd <2 x double> %x2, %t0
%t2 = fadd <2 x double> %x3, %t1
ret <2 x double> %t2
}
; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; SSE-LABEL: reassociate_muls_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: addps %xmm1, %xmm0
; SSE-NEXT: mulps %xmm3, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_muls_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%t1 = fmul <4 x float> %x2, %t0
%t2 = fmul <4 x float> %x3, %t1
ret <4 x float> %t2
}
; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
; SSE-LABEL: reassociate_muls_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: addpd %xmm1, %xmm0
; SSE-NEXT: mulpd %xmm3, %xmm2
; SSE-NEXT: mulpd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_muls_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%t1 = fmul <2 x double> %x2, %t0
%t2 = fmul <2 x double> %x3, %t1
ret <2 x double> %t2
}
; Verify that AVX 256-bit vector single-precision adds are reassociated.
define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
; AVX-LABEL: reassociate_adds_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vaddps %ymm3, %ymm2, %ymm1
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fmul <8 x float> %x0, %x1
%t1 = fadd <8 x float> %x2, %t0
%t2 = fadd <8 x float> %x3, %t1
ret <8 x float> %t2
}
; Verify that AVX 256-bit vector double-precision adds are reassociated.
define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
; AVX-LABEL: reassociate_adds_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm1
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fmul <4 x double> %x0, %x1
%t1 = fadd <4 x double> %x2, %t0
%t2 = fadd <4 x double> %x3, %t1
ret <4 x double> %t2
}
; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
; AVX-LABEL: reassociate_muls_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%t1 = fmul <8 x float> %x2, %t0
%t2 = fmul <8 x float> %x3, %t1
ret <8 x float> %t2
}
; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
; AVX-LABEL: reassociate_muls_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%t1 = fmul <4 x double> %x2, %t0
%t2 = fmul <4 x double> %x3, %t1
ret <4 x double> %t2
}
; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_mins_single:
; SSE: # %bb.0:
; SSE-NEXT: divss %xmm1, %xmm0
; SSE-NEXT: minss %xmm3, %xmm2
; SSE-NEXT: minss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_mins_single:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%cmp1 = fcmp olt float %x2, %t0
%sel1 = select i1 %cmp1, float %x2, float %t0
%cmp2 = fcmp olt float %x3, %sel1
%sel2 = select i1 %cmp2, float %x3, float %sel1
ret float %sel2
}
; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_maxs_single:
; SSE: # %bb.0:
; SSE-NEXT: divss %xmm1, %xmm0
; SSE-NEXT: maxss %xmm3, %xmm2
; SSE-NEXT: maxss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_maxs_single:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%cmp1 = fcmp ogt float %x2, %t0
%sel1 = select i1 %cmp1, float %x2, float %t0
%cmp2 = fcmp ogt float %x3, %sel1
%sel2 = select i1 %cmp2, float %x3, float %sel1
ret float %sel2
}
; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
; SSE-LABEL: reassociate_mins_double:
; SSE: # %bb.0:
; SSE-NEXT: divsd %xmm1, %xmm0
; SSE-NEXT: minsd %xmm3, %xmm2
; SSE-NEXT: minsd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_mins_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%cmp1 = fcmp olt double %x2, %t0
%sel1 = select i1 %cmp1, double %x2, double %t0
%cmp2 = fcmp olt double %x3, %sel1
%sel2 = select i1 %cmp2, double %x3, double %sel1
ret double %sel2
}
; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
; SSE-LABEL: reassociate_maxs_double:
; SSE: # %bb.0:
; SSE-NEXT: divsd %xmm1, %xmm0
; SSE-NEXT: maxsd %xmm3, %xmm2
; SSE-NEXT: maxsd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_maxs_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%cmp1 = fcmp ogt double %x2, %t0
%sel1 = select i1 %cmp1, double %x2, double %t0
%cmp2 = fcmp ogt double %x3, %sel1
%sel2 = select i1 %cmp2, double %x3, double %sel1
ret double %sel2
}
; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; SSE-LABEL: reassociate_mins_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: addps %xmm1, %xmm0
; SSE-NEXT: minps %xmm3, %xmm2
; SSE-NEXT: minps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_mins_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%cmp1 = fcmp olt <4 x float> %x2, %t0
%sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
%cmp2 = fcmp olt <4 x float> %x3, %sel1
%sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
ret <4 x float> %sel2
}
; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; SSE-LABEL: reassociate_maxs_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: addps %xmm1, %xmm0
; SSE-NEXT: maxps %xmm3, %xmm2
; SSE-NEXT: maxps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_maxs_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%cmp1 = fcmp ogt <4 x float> %x2, %t0
%sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
%cmp2 = fcmp ogt <4 x float> %x3, %sel1
%sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
ret <4 x float> %sel2
}
; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
; SSE-LABEL: reassociate_mins_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: addpd %xmm1, %xmm0
; SSE-NEXT: minpd %xmm3, %xmm2
; SSE-NEXT: minpd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_mins_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%cmp1 = fcmp olt <2 x double> %x2, %t0
%sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
%cmp2 = fcmp olt <2 x double> %x3, %sel1
%sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
ret <2 x double> %sel2
}
; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
; SSE-LABEL: reassociate_maxs_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: addpd %xmm1, %xmm0
; SSE-NEXT: maxpd %xmm3, %xmm2
; SSE-NEXT: maxpd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_maxs_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%cmp1 = fcmp ogt <2 x double> %x2, %t0
%sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
%cmp2 = fcmp ogt <2 x double> %x3, %sel1
%sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
ret <2 x double> %sel2
}
; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
; AVX-LABEL: reassociate_mins_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%cmp1 = fcmp olt <8 x float> %x2, %t0
%sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
%cmp2 = fcmp olt <8 x float> %x3, %sel1
%sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
ret <8 x float> %sel2
}
; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
; AVX-LABEL: reassociate_maxs_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%cmp1 = fcmp ogt <8 x float> %x2, %t0
%sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
%cmp2 = fcmp ogt <8 x float> %x3, %sel1
%sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
ret <8 x float> %sel2
}
; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
; AVX-LABEL: reassociate_mins_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%cmp1 = fcmp olt <4 x double> %x2, %t0
%sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
%cmp2 = fcmp olt <4 x double> %x3, %sel1
%sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
ret <4 x double> %sel2
}
; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
; AVX-LABEL: reassociate_maxs_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%cmp1 = fcmp ogt <4 x double> %x2, %t0
%sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
%cmp2 = fcmp ogt <4 x double> %x3, %sel1
%sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
ret <4 x double> %sel2
}
; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
; Verify that reassociation is not happening needlessly or wrongly.
declare double @bar()
define double @reassociate_adds_from_calls() {
; AVX-LABEL: reassociate_adds_from_calls:
; AVX: callq bar
; AVX-NEXT: vmovsd %xmm0, 16(%rsp)
; AVX-NEXT: callq bar
; AVX-NEXT: vmovsd %xmm0, 8(%rsp)
; AVX-NEXT: callq bar
; AVX-NEXT: vmovsd %xmm0, (%rsp)
; AVX-NEXT: callq bar
; AVX-NEXT: vmovsd 8(%rsp), %xmm1
; AVX: vaddsd 16(%rsp), %xmm1, %xmm1
; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
%x0 = call double @bar()
%x1 = call double @bar()
%x2 = call double @bar()
%x3 = call double @bar()
%t0 = fadd double %x0, %x1
%t1 = fadd double %t0, %x2
%t2 = fadd double %t1, %x3
ret double %t2
}
define double @already_reassociated() {
; AVX-LABEL: already_reassociated:
; AVX: callq bar
; AVX-NEXT: vmovsd %xmm0, 16(%rsp)
; AVX-NEXT: callq bar
; AVX-NEXT: vmovsd %xmm0, 8(%rsp)
; AVX-NEXT: callq bar
; AVX-NEXT: vmovsd %xmm0, (%rsp)
; AVX-NEXT: callq bar
; AVX-NEXT: vmovsd 8(%rsp), %xmm1
; AVX: vaddsd 16(%rsp), %xmm1, %xmm1
; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
%x0 = call double @bar()
%x1 = call double @bar()
%x2 = call double @bar()
%x3 = call double @bar()
%t0 = fadd double %x0, %x1
%t1 = fadd double %x2, %x3
%t2 = fadd double %t0, %t1
ret double %t2
}