mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
7b6e711e86
The code changes here are hopefully straightforward: 1. Use MachineInstruction flags to decide if FP ops can be reassociated (use both "reassoc" and "nsz" to be consistent with IR transforms; we probably don't need "nsz", but that's a safer interpretation of the FMF). 2. Check that both nodes allow reassociation to change instructions. This is a stronger requirement than we've usually implemented in IR/DAG, but this is needed to solve the motivating bug (see below), and it seems unlikely to impede optimization at this late stage. 3. Intersect/propagate MachineIR flags to enable further reassociation in MachineCombiner. We managed to make MachineCombiner flexible enough that no changes are needed to that pass itself. So this patch should only affect x86 (assuming no other targets have implemented the hooks using MachineIR flags yet). The motivating example in PR43609 is another case of fast-math transforms interacting badly with special FP ops created during lowering: https://bugs.llvm.org/show_bug.cgi?id=43609 The special fadd ops used for converting int to FP assume that they will not be altered, so those are created without FMF. However, the MachineCombiner pass was being enabled for FP ops using the global/function-level TargetOption for "UnsafeFPMath". We managed to run instruction/node-level FMF all the way down to MachineIR sometime in the last 1-2 years though, so we can do better now. The test diffs require some explanation: 1. llvm/test/CodeGen/X86/fmf-flags.ll - no target option for unsafe math was specified here, so MachineCombiner kicks in where it did not previously; to make it behave consistently, we need to specify a CPU schedule model, so use the default model, and there are no code diffs. 2. llvm/test/CodeGen/X86/machine-combiner.ll - replace the target option for unsafe math with the equivalent IR-level flags, and there are no code diffs; we can't remove the NaN/nsz options because those are still used to drive x86 fmin/fmax codegen (special SDAG opcodes). 3. llvm/test/CodeGen/X86/pow.ll - similar to #1 4. llvm/test/CodeGen/X86/sqrt-fastmath.ll - similar to #1, but MachineCombiner does some reassociation of the estimate sequence ops; presumably these are perf wins based on latency/throughput (and we get some reduction of move instructions too); I'm not sure how it affects numerical accuracy, but the test reflects reality better now because we would expect MachineCombiner to be enabled if the IR was generated via something like "-ffast-math" with clang. 5. llvm/test/CodeGen/X86/vec_int_to_fp.ll - this is the test added to model PR43609; the fadds are not reassociated now, so we should get the expected results. 6. llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll - similar to #1 7. llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll - similar to #1 Differential Revision: https://reviews.llvm.org/D74851
1174 lines
41 KiB
LLVM
1174 lines
41 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
|
|
|
|
; Incremental updates of the instruction depths should be enough for this test
|
|
; case.
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=sse -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx512vl -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
|
|
|
|
; Verify that the first two adds are independent regardless of how the inputs are
|
|
; commuted. The destination registers are used as source registers for the third add.
|
|
|
|
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_adds1:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addss %xmm1, %xmm0
|
|
; SSE-NEXT: addss %xmm3, %xmm2
|
|
; SSE-NEXT: addss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds1:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz float %x0, %x1
|
|
%t1 = fadd reassoc nsz float %t0, %x2
|
|
%t2 = fadd reassoc nsz float %t1, %x3
|
|
ret float %t2
|
|
}
|
|
|
|
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_adds2:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addss %xmm1, %xmm0
|
|
; SSE-NEXT: addss %xmm3, %xmm2
|
|
; SSE-NEXT: addss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds2:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz float %x0, %x1
|
|
%t1 = fadd reassoc nsz float %x2, %t0
|
|
%t2 = fadd reassoc nsz float %t1, %x3
|
|
ret float %t2
|
|
}
|
|
|
|
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_adds3:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addss %xmm1, %xmm0
|
|
; SSE-NEXT: addss %xmm3, %xmm2
|
|
; SSE-NEXT: addss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds3:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz float %x0, %x1
|
|
%t1 = fadd reassoc nsz float %t0, %x2
|
|
%t2 = fadd reassoc nsz float %x3, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_adds4:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addss %xmm1, %xmm0
|
|
; SSE-NEXT: addss %xmm3, %xmm2
|
|
; SSE-NEXT: addss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds4:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz float %x0, %x1
|
|
%t1 = fadd reassoc nsz float %x2, %t0
|
|
%t2 = fadd reassoc nsz float %x3, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
|
|
; produced because that would cost more compile time.
|
|
|
|
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
|
|
; SSE-LABEL: reassociate_adds5:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addss %xmm1, %xmm0
|
|
; SSE-NEXT: addss %xmm3, %xmm2
|
|
; SSE-NEXT: addss %xmm2, %xmm0
|
|
; SSE-NEXT: addss %xmm5, %xmm4
|
|
; SSE-NEXT: addss %xmm6, %xmm4
|
|
; SSE-NEXT: addss %xmm4, %xmm0
|
|
; SSE-NEXT: addss %xmm7, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds5:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
|
|
; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz float %x0, %x1
|
|
%t1 = fadd reassoc nsz float %t0, %x2
|
|
%t2 = fadd reassoc nsz float %t1, %x3
|
|
%t3 = fadd reassoc nsz float %t2, %x4
|
|
%t4 = fadd reassoc nsz float %t3, %x5
|
|
%t5 = fadd reassoc nsz float %t4, %x6
|
|
%t6 = fadd reassoc nsz float %t5, %x7
|
|
ret float %t6
|
|
}
|
|
|
|
; Verify that we only need two associative operations to reassociate the operands.
|
|
; Also, we should reassociate such that the result of the high latency division
|
|
; is used by the final 'add' rather than reassociating the %x3 operand with the
|
|
; division. The latter reassociation would not improve anything.
|
|
|
|
define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_adds6:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divss %xmm1, %xmm0
|
|
; SSE-NEXT: addss %xmm3, %xmm2
|
|
; SSE-NEXT: addss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds6:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv reassoc nsz float %x0, %x1
|
|
%t1 = fadd reassoc nsz float %x2, %t0
|
|
%t2 = fadd reassoc nsz float %x3, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
|
|
|
|
define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_muls1:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divss %xmm1, %xmm0
|
|
; SSE-NEXT: mulss %xmm3, %xmm2
|
|
; SSE-NEXT: mulss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_muls1:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv reassoc nsz float %x0, %x1
|
|
%t1 = fmul reassoc nsz float %x2, %t0
|
|
%t2 = fmul reassoc nsz float %x3, %t1
|
|
ret float %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar double-precision adds are reassociated.
|
|
|
|
define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
|
|
; SSE-LABEL: reassociate_adds_double:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divsd %xmm1, %xmm0
|
|
; SSE-NEXT: addsd %xmm3, %xmm2
|
|
; SSE-NEXT: addsd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds_double:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv reassoc nsz double %x0, %x1
|
|
%t1 = fadd reassoc nsz double %x2, %t0
|
|
%t2 = fadd reassoc nsz double %x3, %t1
|
|
ret double %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
|
|
|
|
define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
|
|
; SSE-LABEL: reassociate_muls_double:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divsd %xmm1, %xmm0
|
|
; SSE-NEXT: mulsd %xmm3, %xmm2
|
|
; SSE-NEXT: mulsd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_muls_double:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv reassoc nsz double %x0, %x1
|
|
%t1 = fmul reassoc nsz double %x2, %t0
|
|
%t2 = fmul reassoc nsz double %x3, %t1
|
|
ret double %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
|
|
|
|
define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
|
; SSE-LABEL: reassociate_adds_v4f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulps %xmm1, %xmm0
|
|
; SSE-NEXT: addps %xmm3, %xmm2
|
|
; SSE-NEXT: addps %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_adds_v4f32:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
|
; AVX1-NEXT: vaddps %xmm3, %xmm2, %xmm1
|
|
; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_adds_v4f32:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
|
; AVX512-NEXT: vaddps %xmm0, %xmm3, %xmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fmul reassoc nsz <4 x float> %x0, %x1
|
|
%t1 = fadd reassoc nsz <4 x float> %x2, %t0
|
|
%t2 = fadd reassoc nsz <4 x float> %x3, %t1
|
|
ret <4 x float> %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
|
|
|
|
define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
|
|
; SSE-LABEL: reassociate_adds_v2f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulpd %xmm1, %xmm0
|
|
; SSE-NEXT: addpd %xmm3, %xmm2
|
|
; SSE-NEXT: addpd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_adds_v2f64:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0
|
|
; AVX1-NEXT: vaddpd %xmm3, %xmm2, %xmm1
|
|
; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_adds_v2f64:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
|
; AVX512-NEXT: vaddpd %xmm0, %xmm3, %xmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fmul reassoc nsz <2 x double> %x0, %x1
|
|
%t1 = fadd reassoc nsz <2 x double> %x2, %t0
|
|
%t2 = fadd reassoc nsz <2 x double> %x3, %t1
|
|
ret <2 x double> %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
|
|
|
|
define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
|
; SSE-LABEL: reassociate_muls_v4f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm1, %xmm0
|
|
; SSE-NEXT: mulps %xmm3, %xmm2
|
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_muls_v4f32:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
|
|
%t1 = fmul reassoc nsz <4 x float> %x2, %t0
|
|
%t2 = fmul reassoc nsz <4 x float> %x3, %t1
|
|
ret <4 x float> %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
|
|
|
|
define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
|
|
; SSE-LABEL: reassociate_muls_v2f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm1, %xmm0
|
|
; SSE-NEXT: mulpd %xmm3, %xmm2
|
|
; SSE-NEXT: mulpd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_muls_v2f64:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz <2 x double> %x0, %x1
|
|
%t1 = fmul reassoc nsz <2 x double> %x2, %t0
|
|
%t2 = fmul reassoc nsz <2 x double> %x3, %t1
|
|
ret <2 x double> %t2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector single-precision adds are reassociated.
|
|
|
|
define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
|
|
; SSE-LABEL: reassociate_adds_v8f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
|
; SSE-NEXT: mulps %xmm3, %xmm1
|
|
; SSE-NEXT: addps %xmm6, %xmm4
|
|
; SSE-NEXT: addps %xmm4, %xmm0
|
|
; SSE-NEXT: addps %xmm7, %xmm5
|
|
; SSE-NEXT: addps %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_adds_v8f32:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddps %ymm3, %ymm2, %ymm1
|
|
; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_adds_v8f32:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
|
; AVX512-NEXT: vaddps %ymm0, %ymm3, %ymm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fmul reassoc nsz <8 x float> %x0, %x1
|
|
%t1 = fadd reassoc nsz <8 x float> %x2, %t0
|
|
%t2 = fadd reassoc nsz <8 x float> %x3, %t1
|
|
ret <8 x float> %t2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector double-precision adds are reassociated.
|
|
|
|
define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
|
|
; SSE-LABEL: reassociate_adds_v4f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulpd %xmm2, %xmm0
|
|
; SSE-NEXT: mulpd %xmm3, %xmm1
|
|
; SSE-NEXT: addpd %xmm6, %xmm4
|
|
; SSE-NEXT: addpd %xmm4, %xmm0
|
|
; SSE-NEXT: addpd %xmm7, %xmm5
|
|
; SSE-NEXT: addpd %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_adds_v4f64:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddpd %ymm3, %ymm2, %ymm1
|
|
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_adds_v4f64:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
|
; AVX512-NEXT: vaddpd %ymm0, %ymm3, %ymm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fmul reassoc nsz <4 x double> %x0, %x1
|
|
%t1 = fadd reassoc nsz <4 x double> %x2, %t0
|
|
%t2 = fadd reassoc nsz <4 x double> %x3, %t1
|
|
ret <4 x double> %t2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
|
|
|
|
define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
|
|
; SSE-LABEL: reassociate_muls_v8f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm2, %xmm0
|
|
; SSE-NEXT: addps %xmm3, %xmm1
|
|
; SSE-NEXT: mulps %xmm6, %xmm4
|
|
; SSE-NEXT: mulps %xmm4, %xmm0
|
|
; SSE-NEXT: mulps %xmm7, %xmm5
|
|
; SSE-NEXT: mulps %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_muls_v8f32:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1
|
|
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz <8 x float> %x0, %x1
|
|
%t1 = fmul reassoc nsz <8 x float> %x2, %t0
|
|
%t2 = fmul reassoc nsz <8 x float> %x3, %t1
|
|
ret <8 x float> %t2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
|
|
|
|
define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
|
|
; SSE-LABEL: reassociate_muls_v4f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm2, %xmm0
|
|
; SSE-NEXT: addpd %xmm3, %xmm1
|
|
; SSE-NEXT: mulpd %xmm6, %xmm4
|
|
; SSE-NEXT: mulpd %xmm4, %xmm0
|
|
; SSE-NEXT: mulpd %xmm7, %xmm5
|
|
; SSE-NEXT: mulpd %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_muls_v4f64:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1
|
|
; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd reassoc nsz <4 x double> %x0, %x1
|
|
%t1 = fmul reassoc nsz <4 x double> %x2, %t0
|
|
%t2 = fmul reassoc nsz <4 x double> %x3, %t1
|
|
ret <4 x double> %t2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector single-precision adds are reassociated.
|
|
|
|
define <16 x float> @reassociate_adds_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
|
|
; SSE-LABEL: reassociate_adds_v16f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulps %xmm4, %xmm0
|
|
; SSE-NEXT: mulps %xmm5, %xmm1
|
|
; SSE-NEXT: mulps %xmm6, %xmm2
|
|
; SSE-NEXT: mulps %xmm7, %xmm3
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_adds_v16f32:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vaddps %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddps %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_adds_v16f32:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
|
; AVX512-NEXT: vaddps %zmm0, %zmm3, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fmul reassoc nsz <16 x float> %x0, %x1
|
|
%t1 = fadd reassoc nsz <16 x float> %x2, %t0
|
|
%t2 = fadd reassoc nsz <16 x float> %x3, %t1
|
|
ret <16 x float> %t2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector double-precision adds are reassociated.
|
|
|
|
define <8 x double> @reassociate_adds_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
|
|
; SSE-LABEL: reassociate_adds_v8f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulpd %xmm4, %xmm0
|
|
; SSE-NEXT: mulpd %xmm5, %xmm1
|
|
; SSE-NEXT: mulpd %xmm6, %xmm2
|
|
; SSE-NEXT: mulpd %xmm7, %xmm3
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_adds_v8f64:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vaddpd %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddpd %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_adds_v8f64:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
|
; AVX512-NEXT: vaddpd %zmm0, %zmm3, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fmul reassoc nsz <8 x double> %x0, %x1
|
|
%t1 = fadd reassoc nsz <8 x double> %x2, %t0
|
|
%t2 = fadd reassoc nsz <8 x double> %x3, %t1
|
|
ret <8 x double> %t2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector single-precision multiplies are reassociated.
|
|
|
|
define <16 x float> @reassociate_muls_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
|
|
; SSE-LABEL: reassociate_muls_v16f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm4, %xmm0
|
|
; SSE-NEXT: addps %xmm5, %xmm1
|
|
; SSE-NEXT: addps %xmm6, %xmm2
|
|
; SSE-NEXT: addps %xmm7, %xmm3
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_muls_v16f32:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vmulps %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vmulps %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vmulps %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_muls_v16f32:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: vmulps %zmm3, %zmm2, %zmm1
|
|
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fadd reassoc nsz <16 x float> %x0, %x1
|
|
%t1 = fmul reassoc nsz <16 x float> %x2, %t0
|
|
%t2 = fmul reassoc nsz <16 x float> %x3, %t1
|
|
ret <16 x float> %t2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector double-precision multiplies are reassociated.
|
|
|
|
define <8 x double> @reassociate_muls_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
|
|
; SSE-LABEL: reassociate_muls_v8f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm4, %xmm0
|
|
; SSE-NEXT: addpd %xmm5, %xmm1
|
|
; SSE-NEXT: addpd %xmm6, %xmm2
|
|
; SSE-NEXT: addpd %xmm7, %xmm3
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_muls_v8f64:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vmulpd %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vmulpd %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vmulpd %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_muls_v8f64:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: vmulpd %zmm3, %zmm2, %zmm1
|
|
; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fadd reassoc nsz <8 x double> %x0, %x1
|
|
%t1 = fmul reassoc nsz <8 x double> %x2, %t0
|
|
%t2 = fmul reassoc nsz <8 x double> %x3, %t1
|
|
ret <8 x double> %t2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
|
|
|
|
define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_mins_single:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divss %xmm1, %xmm0
|
|
; SSE-NEXT: minss %xmm3, %xmm2
|
|
; SSE-NEXT: minss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_mins_single:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv float %x0, %x1
|
|
%cmp1 = fcmp olt float %x2, %t0
|
|
%sel1 = select i1 %cmp1, float %x2, float %t0
|
|
%cmp2 = fcmp olt float %x3, %sel1
|
|
%sel2 = select i1 %cmp2, float %x3, float %sel1
|
|
ret float %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
|
|
|
|
define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
|
|
; SSE-LABEL: reassociate_maxs_single:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divss %xmm1, %xmm0
|
|
; SSE-NEXT: maxss %xmm3, %xmm2
|
|
; SSE-NEXT: maxss %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_maxs_single:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv float %x0, %x1
|
|
%cmp1 = fcmp ogt float %x2, %t0
|
|
%sel1 = select i1 %cmp1, float %x2, float %t0
|
|
%cmp2 = fcmp ogt float %x3, %sel1
|
|
%sel2 = select i1 %cmp2, float %x3, float %sel1
|
|
ret float %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
|
|
|
|
define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
|
|
; SSE-LABEL: reassociate_mins_double:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divsd %xmm1, %xmm0
|
|
; SSE-NEXT: minsd %xmm3, %xmm2
|
|
; SSE-NEXT: minsd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_mins_double:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv double %x0, %x1
|
|
%cmp1 = fcmp olt double %x2, %t0
|
|
%sel1 = select i1 %cmp1, double %x2, double %t0
|
|
%cmp2 = fcmp olt double %x3, %sel1
|
|
%sel2 = select i1 %cmp2, double %x3, double %sel1
|
|
ret double %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
|
|
|
|
define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
|
|
; SSE-LABEL: reassociate_maxs_double:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: divsd %xmm1, %xmm0
|
|
; SSE-NEXT: maxsd %xmm3, %xmm2
|
|
; SSE-NEXT: maxsd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_maxs_double:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fdiv double %x0, %x1
|
|
%cmp1 = fcmp ogt double %x2, %t0
|
|
%sel1 = select i1 %cmp1, double %x2, double %t0
|
|
%cmp2 = fcmp ogt double %x3, %sel1
|
|
%sel2 = select i1 %cmp2, double %x3, double %sel1
|
|
ret double %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
|
|
|
|
define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
|
; SSE-LABEL: reassociate_mins_v4f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm1, %xmm0
|
|
; SSE-NEXT: minps %xmm3, %xmm2
|
|
; SSE-NEXT: minps %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_mins_v4f32:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <4 x float> %x0, %x1
|
|
%cmp1 = fcmp olt <4 x float> %x2, %t0
|
|
%sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
|
|
%cmp2 = fcmp olt <4 x float> %x3, %sel1
|
|
%sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
|
|
ret <4 x float> %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
|
|
|
|
define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
|
; SSE-LABEL: reassociate_maxs_v4f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm1, %xmm0
|
|
; SSE-NEXT: maxps %xmm3, %xmm2
|
|
; SSE-NEXT: maxps %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_maxs_v4f32:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <4 x float> %x0, %x1
|
|
%cmp1 = fcmp ogt <4 x float> %x2, %t0
|
|
%sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
|
|
%cmp2 = fcmp ogt <4 x float> %x3, %sel1
|
|
%sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
|
|
ret <4 x float> %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
|
|
|
|
define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
|
|
; SSE-LABEL: reassociate_mins_v2f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm1, %xmm0
|
|
; SSE-NEXT: minpd %xmm3, %xmm2
|
|
; SSE-NEXT: minpd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_mins_v2f64:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <2 x double> %x0, %x1
|
|
%cmp1 = fcmp olt <2 x double> %x2, %t0
|
|
%sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
|
|
%cmp2 = fcmp olt <2 x double> %x3, %sel1
|
|
%sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
|
|
ret <2 x double> %sel2
|
|
}
|
|
|
|
; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
|
|
|
|
define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
|
|
; SSE-LABEL: reassociate_maxs_v2f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm1, %xmm0
|
|
; SSE-NEXT: maxpd %xmm3, %xmm2
|
|
; SSE-NEXT: maxpd %xmm2, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_maxs_v2f64:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1
|
|
; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <2 x double> %x0, %x1
|
|
%cmp1 = fcmp ogt <2 x double> %x2, %t0
|
|
%sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
|
|
%cmp2 = fcmp ogt <2 x double> %x3, %sel1
|
|
%sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
|
|
ret <2 x double> %sel2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
|
|
|
|
define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
|
|
; SSE-LABEL: reassociate_mins_v8f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm2, %xmm0
|
|
; SSE-NEXT: addps %xmm3, %xmm1
|
|
; SSE-NEXT: minps %xmm6, %xmm4
|
|
; SSE-NEXT: minps %xmm4, %xmm0
|
|
; SSE-NEXT: minps %xmm7, %xmm5
|
|
; SSE-NEXT: minps %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_mins_v8f32:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1
|
|
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <8 x float> %x0, %x1
|
|
%cmp1 = fcmp olt <8 x float> %x2, %t0
|
|
%sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
|
|
%cmp2 = fcmp olt <8 x float> %x3, %sel1
|
|
%sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
|
|
ret <8 x float> %sel2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
|
|
|
|
define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
|
|
; SSE-LABEL: reassociate_maxs_v8f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm2, %xmm0
|
|
; SSE-NEXT: addps %xmm3, %xmm1
|
|
; SSE-NEXT: maxps %xmm6, %xmm4
|
|
; SSE-NEXT: maxps %xmm4, %xmm0
|
|
; SSE-NEXT: maxps %xmm7, %xmm5
|
|
; SSE-NEXT: maxps %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_maxs_v8f32:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1
|
|
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <8 x float> %x0, %x1
|
|
%cmp1 = fcmp ogt <8 x float> %x2, %t0
|
|
%sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
|
|
%cmp2 = fcmp ogt <8 x float> %x3, %sel1
|
|
%sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
|
|
ret <8 x float> %sel2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
|
|
|
|
define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
|
|
; SSE-LABEL: reassociate_mins_v4f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm2, %xmm0
|
|
; SSE-NEXT: addpd %xmm3, %xmm1
|
|
; SSE-NEXT: minpd %xmm6, %xmm4
|
|
; SSE-NEXT: minpd %xmm4, %xmm0
|
|
; SSE-NEXT: minpd %xmm7, %xmm5
|
|
; SSE-NEXT: minpd %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_mins_v4f64:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1
|
|
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <4 x double> %x0, %x1
|
|
%cmp1 = fcmp olt <4 x double> %x2, %t0
|
|
%sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
|
|
%cmp2 = fcmp olt <4 x double> %x3, %sel1
|
|
%sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
|
|
ret <4 x double> %sel2
|
|
}
|
|
|
|
; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
|
|
|
|
define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
|
|
; SSE-LABEL: reassociate_maxs_v4f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm2, %xmm0
|
|
; SSE-NEXT: addpd %xmm3, %xmm1
|
|
; SSE-NEXT: maxpd %xmm6, %xmm4
|
|
; SSE-NEXT: maxpd %xmm4, %xmm0
|
|
; SSE-NEXT: maxpd %xmm7, %xmm5
|
|
; SSE-NEXT: maxpd %xmm5, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_maxs_v4f64:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1
|
|
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
|
|
; AVX-NEXT: retq
|
|
%t0 = fadd <4 x double> %x0, %x1
|
|
%cmp1 = fcmp ogt <4 x double> %x2, %t0
|
|
%sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
|
|
%cmp2 = fcmp ogt <4 x double> %x3, %sel1
|
|
%sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
|
|
ret <4 x double> %sel2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector single-precision minimum ops are reassociated.
|
|
|
|
define <16 x float> @reassociate_mins_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
|
|
; SSE-LABEL: reassociate_mins_v16f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm4, %xmm0
|
|
; SSE-NEXT: addps %xmm5, %xmm1
|
|
; SSE-NEXT: addps %xmm6, %xmm2
|
|
; SSE-NEXT: addps %xmm7, %xmm3
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_mins_v16f32:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vminps %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vminps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vminps %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_mins_v16f32:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: vminps %zmm3, %zmm2, %zmm1
|
|
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fadd <16 x float> %x0, %x1
|
|
%cmp1 = fcmp olt <16 x float> %x2, %t0
|
|
%sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
|
|
%cmp2 = fcmp olt <16 x float> %x3, %sel1
|
|
%sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
|
|
ret <16 x float> %sel2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector single-precision maximum ops are reassociated.
|
|
|
|
define <16 x float> @reassociate_maxs_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
|
|
; SSE-LABEL: reassociate_maxs_v16f32:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addps %xmm4, %xmm0
|
|
; SSE-NEXT: addps %xmm5, %xmm1
|
|
; SSE-NEXT: addps %xmm6, %xmm2
|
|
; SSE-NEXT: addps %xmm7, %xmm3
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_maxs_v16f32:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vmaxps %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vmaxps %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vmaxps %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vmaxps %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_maxs_v16f32:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: vmaxps %zmm3, %zmm2, %zmm1
|
|
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fadd <16 x float> %x0, %x1
|
|
%cmp1 = fcmp ogt <16 x float> %x2, %t0
|
|
%sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
|
|
%cmp2 = fcmp ogt <16 x float> %x3, %sel1
|
|
%sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
|
|
ret <16 x float> %sel2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector double-precision minimum ops are reassociated.
|
|
|
|
define <8 x double> @reassociate_mins_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
|
|
; SSE-LABEL: reassociate_mins_v8f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm4, %xmm0
|
|
; SSE-NEXT: addpd %xmm5, %xmm1
|
|
; SSE-NEXT: addpd %xmm6, %xmm2
|
|
; SSE-NEXT: addpd %xmm7, %xmm3
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_mins_v8f64:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vminpd %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vminpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vminpd %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vminpd %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_mins_v8f64:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: vminpd %zmm3, %zmm2, %zmm1
|
|
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fadd <8 x double> %x0, %x1
|
|
%cmp1 = fcmp olt <8 x double> %x2, %t0
|
|
%sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
|
|
%cmp2 = fcmp olt <8 x double> %x3, %sel1
|
|
%sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
|
|
ret <8 x double> %sel2
|
|
}
|
|
|
|
; Verify that AVX512 512-bit vector double-precision maximum ops are reassociated.
|
|
|
|
define <8 x double> @reassociate_maxs_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
|
|
; SSE-LABEL: reassociate_maxs_v8f64:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: addpd %xmm4, %xmm0
|
|
; SSE-NEXT: addpd %xmm5, %xmm1
|
|
; SSE-NEXT: addpd %xmm6, %xmm2
|
|
; SSE-NEXT: addpd %xmm7, %xmm3
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm0
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm1
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm2
|
|
; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm3
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX1-LABEL: reassociate_maxs_v8f64:
|
|
; AVX1: # %bb.0:
|
|
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
|
; AVX1-NEXT: vmaxpd %ymm6, %ymm4, %ymm2
|
|
; AVX1-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
|
|
; AVX1-NEXT: vmaxpd %ymm7, %ymm5, %ymm2
|
|
; AVX1-NEXT: vmaxpd %ymm2, %ymm1, %ymm1
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: reassociate_maxs_v8f64:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: vmaxpd %zmm3, %zmm2, %zmm1
|
|
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
|
|
; AVX512-NEXT: retq
|
|
%t0 = fadd <8 x double> %x0, %x1
|
|
%cmp1 = fcmp ogt <8 x double> %x2, %t0
|
|
%sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
|
|
%cmp2 = fcmp ogt <8 x double> %x3, %sel1
|
|
%sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
|
|
ret <8 x double> %sel2
|
|
}
|
|
|
|
; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
|
|
; Verify that reassociation is not happening needlessly or wrongly.
|
|
|
|
declare double @bar()
|
|
|
|
define double @reassociate_adds_from_calls() {
|
|
; SSE-LABEL: reassociate_adds_from_calls:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: subq $24, %rsp
|
|
; SSE-NEXT: .cfi_def_cfa_offset 32
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
|
; SSE-NEXT: # xmm1 = mem[0],zero
|
|
; SSE-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
|
|
; SSE-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
|
|
; SSE-NEXT: addsd %xmm1, %xmm0
|
|
; SSE-NEXT: addq $24, %rsp
|
|
; SSE-NEXT: .cfi_def_cfa_offset 8
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: reassociate_adds_from_calls:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: subq $24, %rsp
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
|
; AVX-NEXT: # xmm1 = mem[0],zero
|
|
; AVX-NEXT: vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
|
|
; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
|
|
; AVX-NEXT: addq $24, %rsp
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
; AVX-NEXT: retq
|
|
|
|
%x0 = call double @bar()
|
|
%x1 = call double @bar()
|
|
%x2 = call double @bar()
|
|
%x3 = call double @bar()
|
|
%t0 = fadd reassoc nsz double %x0, %x1
|
|
%t1 = fadd reassoc nsz double %t0, %x2
|
|
%t2 = fadd reassoc nsz double %t1, %x3
|
|
ret double %t2
|
|
}
|
|
|
|
define double @already_reassociated() {
|
|
; SSE-LABEL: already_reassociated:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: subq $24, %rsp
|
|
; SSE-NEXT: .cfi_def_cfa_offset 32
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
|
|
; SSE-NEXT: callq bar
|
|
; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
|
; SSE-NEXT: # xmm1 = mem[0],zero
|
|
; SSE-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
|
|
; SSE-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
|
|
; SSE-NEXT: addsd %xmm1, %xmm0
|
|
; SSE-NEXT: addq $24, %rsp
|
|
; SSE-NEXT: .cfi_def_cfa_offset 8
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: already_reassociated:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: subq $24, %rsp
|
|
; AVX-NEXT: .cfi_def_cfa_offset 32
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
|
|
; AVX-NEXT: callq bar
|
|
; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
|
|
; AVX-NEXT: # xmm1 = mem[0],zero
|
|
; AVX-NEXT: vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
|
|
; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
|
|
; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
|
|
; AVX-NEXT: addq $24, %rsp
|
|
; AVX-NEXT: .cfi_def_cfa_offset 8
|
|
; AVX-NEXT: retq
|
|
|
|
%x0 = call double @bar()
|
|
%x1 = call double @bar()
|
|
%x2 = call double @bar()
|
|
%x3 = call double @bar()
|
|
%t0 = fadd reassoc nsz double %x0, %x1
|
|
%t1 = fadd reassoc nsz double %x2, %x3
|
|
%t2 = fadd reassoc nsz double %t0, %t1
|
|
ret double %t2
|
|
}
|
|
|