1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00
llvm-mirror/test/CodeGen/AArch64
Jim Grosbach 2f665f1cd7 AArch64: Constant fold converting vector setcc results to float.
Since the result of a SETCC for AArch64 is 0 or -1 in each lane, we can
move unary operations, in this case [su]int_to_fp through the mask
operation and constant fold the operation away. Generally speaking:
  UNARYOP(AND(VECTOR_CMP(x,y), constant))
      --> AND(VECTOR_CMP(x,y), constant2)
where constant2 is UNARYOP(constant).

This implements the transform where UNARYOP is [su]int_to_fp.

For example, consider the simple function:
define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %result = sitofp <4 x i32> %ext to <4 x float>
  ret <4 x float> %result
}

Before this change, the code is generated as:
  fcmeq.4s  v0, v0, v1
  movi.4s v1, #0x1        // Integer splat value.
  and.16b v0, v0, v1      // Mask lanes based on the comparison.
  scvtf.4s  v0, v0        // Convert each lane to f32.
  ret

After, the code is improved to:
  fcmeq.4s  v0, v0, v1
  fmov.4s v1, #1.00000000 // f32 splat value.
  and.16b v0, v0, v1      // Mask lanes based on the comparison.
  ret

The svvtf.4s has been constant folded away and the floating point 1.0f
vector lanes are materialized directly via fmov.4s.

Rather than do the folding manually in the target code, teach getNode()
in the generic SelectionDAG to handle folding constant operands of
vector [su]int_to_fp nodes. It is reasonable (as noted in a FIXME) to do
additional constant folding there as well, but I don't have test cases
for those operations, so leaving them for another time when it becomes
appropriate.

rdar://17693791

llvm-svn: 213341
2014-07-18 00:40:52 +00:00
..
128bit_load_store.ll
aarch64-address-type-promotion-assertion.ll
aarch64-address-type-promotion.ll
aarch64-neon-v1i1-setcc.ll
adc.ll
addsub_ext.ll
addsub-shifted.ll
addsub.ll
alloca.ll
analyze-branch.ll
arm64-2011-03-09-CPSRSpill.ll
arm64-2011-03-17-AsmPrinterCrash.ll
arm64-2011-03-21-Unaligned-Frame-Index.ll
arm64-2011-04-21-CPSRBug.ll
arm64-2011-10-18-LdStOptBug.ll
arm64-2012-01-11-ComparisonDAGCrash.ll
arm64-2012-05-07-DAGCombineVectorExtract.ll
arm64-2012-05-07-MemcpyAlignBug.ll
arm64-2012-05-09-LOADgot-bug.ll
arm64-2012-05-22-LdStOptBug.ll
arm64-2012-06-06-FPToUI.ll
arm64-2012-07-11-InstrEmitterBug.ll
arm64-2013-01-13-ffast-fcmp.ll
arm64-2013-01-23-frem-crash.ll
arm64-2013-01-23-sext-crash.ll
arm64-2013-02-12-shufv8i8.ll
arm64-aapcs.ll
arm64-abi_align.ll
arm64-abi-varargs.ll
arm64-abi.ll
arm64-addp.ll
arm64-addr-mode-folding.ll
arm64-addr-type-promotion.ll
arm64-addrmode.ll
arm64-AdvSIMD-Scalar.ll
arm64-alloc-no-stack-realign.ll
arm64-alloca-frame-pointer-offset.ll
arm64-andCmpBrToTBZ.ll
arm64-ands-bad-peephole.ll
arm64-AnInfiniteLoopInDAGCombine.ll
arm64-anyregcc-crash.ll
arm64-anyregcc.ll
arm64-arith-saturating.ll
arm64-arith.ll
arm64-arm64-dead-def-elimination-flag.ll
arm64-atomic-128.ll
arm64-atomic.ll
arm64-basic-pic.ll
arm64-big-endian-bitconverts.ll
arm64-big-endian-eh.ll
arm64-big-endian-varargs.ll
arm64-big-endian-vector-callee.ll
arm64-big-endian-vector-caller.ll
arm64-big-imm-offsets.ll
arm64-big-stack.ll
arm64-bitfield-extract.ll
arm64-blockaddress.ll
arm64-build-vector.ll
arm64-call-tailcalls.ll
arm64-cast-opt.ll
arm64-ccmp-heuristics.ll
arm64-ccmp.ll
arm64-clrsb.ll
arm64-coalesce-ext.ll
arm64-code-model-large-abs.ll
arm64-collect-loh-garbage-crash.ll
arm64-collect-loh-str.ll
arm64-collect-loh.ll
arm64-complex-copy-noneon.ll
arm64-complex-ret.ll
arm64-const-addr.ll
arm64-convert-v4f64.ll
arm64-copy-tuple.ll
arm64-crc32.ll
arm64-crypto.ll
arm64-cse.ll
arm64-csel.ll
arm64-cvt.ll
arm64-dagcombiner-convergence.ll
arm64-dagcombiner-dead-indexed-load.ll
arm64-dagcombiner-indexed-load.ll
arm64-dagcombiner-load-slicing.ll
arm64-dead-def-frame-index.ll
arm64-dead-register-def-bug.ll
arm64-dup.ll
arm64-early-ifcvt.ll
arm64-elf-calls.ll
arm64-elf-constpool.ll
arm64-elf-globals.ll
arm64-EXT-undef-mask.ll
arm64-ext.ll
arm64-extend-int-to-fp.ll
arm64-extend.ll
arm64-extern-weak.ll
arm64-extload-knownzero.ll
arm64-extract_subvector.ll
arm64-extract.ll
arm64-fast-isel-addr-offset.ll
arm64-fast-isel-alloca.ll
arm64-fast-isel-br.ll
arm64-fast-isel-call.ll
arm64-fast-isel-conversion.ll
arm64-fast-isel-fcmp.ll
arm64-fast-isel-gv.ll
arm64-fast-isel-icmp.ll
arm64-fast-isel-indirectbr.ll
arm64-fast-isel-intrinsic.ll
arm64-fast-isel-materialize.ll
arm64-fast-isel-noconvert.ll
arm64-fast-isel-rem.ll
arm64-fast-isel-ret.ll
arm64-fast-isel-select.ll
arm64-fast-isel.ll
arm64-fastcc-tailcall.ll
arm64-fastisel-gep-promote-before-add.ll
arm64-fcmp-opt.ll
arm64-fcopysign.ll
arm64-fixed-point-scalar-cvt-dagcombine.ll
arm64-fmadd.ll
arm64-fmax.ll
arm64-fminv.ll
arm64-fmuladd.ll
arm64-fold-address.ll
arm64-fold-lsl.ll
arm64-fp128-folding.ll
arm64-fp128.ll
arm64-fp-contract-zero.ll
arm64-fp-imm.ll
arm64-fp.ll
arm64-frame-index.ll
arm64-frameaddr.ll
arm64-global-address.ll
arm64-hello.ll
arm64-i16-subreg-extract.ll
arm64-icmp-opt.ll
arm64-illegal-float-ops.ll
arm64-indexed-memory.ll
arm64-indexed-vector-ldst-2.ll
arm64-indexed-vector-ldst.ll
arm64-inline-asm-error-I.ll
arm64-inline-asm-error-J.ll
arm64-inline-asm-error-K.ll
arm64-inline-asm-error-L.ll
arm64-inline-asm-error-M.ll
arm64-inline-asm-error-N.ll
arm64-inline-asm-zero-reg-error.ll
arm64-inline-asm.ll
arm64-join-reserved.ll
arm64-jumptable.ll
arm64-large-frame.ll
arm64-ld1.ll
arm64-ldp.ll
arm64-ldur.ll
arm64-ldxr-stxr.ll
arm64-leaf.ll
arm64-long-shift.ll
arm64-memcpy-inline.ll
arm64-memset-inline.ll
arm64-memset-to-bzero.ll
arm64-misched-basic-A53.ll
arm64-misched-basic-A57.ll
arm64-misched-forwarding-A53.ll
arm64-movi.ll
arm64-mul.ll
arm64-named-reg-alloc.ll
arm64-named-reg-notareg.ll
arm64-neg.ll
arm64-neon-2velem-high.ll
arm64-neon-2velem.ll
arm64-neon-3vdiff.ll
arm64-neon-aba-abd.ll
arm64-neon-across.ll
arm64-neon-add-pairwise.ll
arm64-neon-add-sub.ll
arm64-neon-compare-instructions.ll
arm64-neon-copy.ll
arm64-neon-copyPhysReg-tuple.ll
arm64-neon-mul-div.ll
arm64-neon-scalar-by-elem-mul.ll
arm64-neon-select_cc.ll
arm64-neon-simd-ldst-one.ll
arm64-neon-simd-shift.ll
arm64-neon-simd-vget.ll
arm64-neon-v1i1-setcc.ll
arm64-neon-vector-list-spill.ll
arm64-patchpoint.ll
arm64-pic-local-symbol.ll
arm64-platform-reg.ll
arm64-popcnt.ll
arm64-prefetch.ll
arm64-promote-const.ll
arm64-redzone.ll
arm64-reg-copy-noneon.ll
arm64-register-offset-addressing.ll
arm64-register-pairing.ll
arm64-regress-f128csel-flags.ll
arm64-regress-interphase-shift.ll
arm64-return-vector.ll
arm64-returnaddr.ll
arm64-rev.ll
arm64-rounding.ll
arm64-scaled_iv.ll
arm64-scvt.ll
arm64-setcc-int-to-fp-combine.ll AArch64: Constant fold converting vector setcc results to float. 2014-07-18 00:40:52 +00:00
arm64-shifted-sext.ll
arm64-shrink-v1i64.ll
arm64-simd-scalar-to-vector.ll
arm64-simplest-elf.ll
arm64-sincos.ll
arm64-sitofp-combine-chains.ll
arm64-sli-sri-opt.ll
arm64-smaxv.ll
arm64-sminv.ll
arm64-spill-lr.ll
arm64-spill.ll
arm64-sqshl-uqshl-i64Contant.ll
arm64-st1.ll
arm64-stack-no-frame.ll
arm64-stackmap.ll
arm64-stackpointer.ll
arm64-stacksave.ll
arm64-stp.ll
arm64-strict-align.ll
arm64-stur.ll
arm64-subsections.ll
arm64-subvector-extend.ll
arm64-swizzle-tbl-i16-layout.ll
arm64-tbl.ll
arm64-this-return.ll
arm64-tls-darwin.ll
arm64-tls-dynamic-together.ll
arm64-tls-dynamics.ll
arm64-tls-execs.ll
arm64-trap.ll
arm64-trn.ll
arm64-trunc-store.ll
arm64-umaxv.ll
arm64-uminv.ll
arm64-umov.ll
arm64-unaligned_ldst.ll
arm64-uzp.ll
arm64-vaargs.ll
arm64-vabs.ll
arm64-vadd.ll
arm64-vaddlv.ll
arm64-vaddv.ll
arm64-variadic-aapcs.ll
arm64-vbitwise.ll
arm64-vclz.ll
arm64-vcmp.ll
arm64-vcnt.ll
arm64-vcombine.ll
arm64-vcvt_f32_su32.ll
arm64-vcvt_f.ll
arm64-vcvt_n.ll
arm64-vcvt_su32_f32.ll
arm64-vcvt.ll
arm64-vcvtxd_f32_f64.ll
arm64-vecCmpBr.ll
arm64-vecFold.ll
arm64-vector-ext.ll
arm64-vector-imm.ll
arm64-vector-insertion.ll
arm64-vector-ldst.ll
arm64-vext_reverse.ll
arm64-vext.ll
arm64-vfloatintrinsics.ll
arm64-vhadd.ll
arm64-vhsub.ll
arm64-virtual_base.ll
arm64-vmax.ll
arm64-vminmaxnm.ll
arm64-vmovn.ll
arm64-vmul.ll
arm64-volatile.ll
arm64-vpopcnt.ll
arm64-vqadd.ll
arm64-vqsub.ll
arm64-vselect.ll
arm64-vsetcc_fp.ll
arm64-vshift.ll
arm64-vshr.ll
arm64-vshuffle.ll
arm64-vsqrt.ll
arm64-vsra.ll
arm64-vsub.ll
arm64-weak-reference.ll
arm64-xaluo.ll
arm64-zero-cycle-regmov.ll
arm64-zero-cycle-zeroing.ll
arm64-zext.ll
arm64-zextload-unscaled.ll
arm64-zip.ll
asm-large-immediate.ll
assertion-rc-mismatch.ll
atomic-ops-not-barriers.ll
atomic-ops.ll
basic-pic.ll
bitfield-insert-0.ll
bitfield-insert.ll
bitfield.ll
blockaddress.ll
bool-loads.ll
branch-relax-asm.ll
breg.ll
callee-save.ll
cmpxchg-idioms.ll
code-model-large-abs.ll
compare-branch.ll
compiler-ident.ll
complex-copy-noneon.ll
complex-fp-to-int.ll
complex-int-to-fp.ll
cond-sel.ll
cpus.ll
directcond.ll
dp1.ll
dp2.ll
dp-3source.ll
eliminate-trunc.ll
extern-weak.ll
extract.ll
f16-convert.ll
fast-isel-mul.ll
fastcc-reserved.ll
fastcc.ll
fcmp.ll
fcvt-fixed.ll
fcvt-int.ll
flags-multiuse.ll
floatdp_1source.ll
floatdp_2source.ll
fp128-folding.ll
fp-cond-sel.ll
fp-dp3.ll
fpimm.ll
frameaddr.ll
free-zext.ll
func-argpassing.ll
func-calls.ll
funcptr_cast.ll
global-alignment.ll
global-merge-1.ll
global-merge-2.ll
global-merge-3.ll
global-merge-4.ll
global-merge.ll
got-abuse.ll
hints.ll
i1-contents.ll
i128-align.ll
i128-fast-isel-fallback.ll
illegal-float-ops.ll
init-array.ll
inline-asm-constraints-badI.ll
inline-asm-constraints-badK2.ll
inline-asm-constraints-badK.ll
inline-asm-constraints-badL.ll
inlineasm-ldr-pseudo.ll
intrinsics-memory-barrier.ll
jump-table.ll
large-consts.ll
ldst-opt.ll
ldst-regoffset.ll
ldst-unscaledimm.ll
ldst-unsignedimm.ll
lit.local.cfg
literal_pools_float.ll
local_vars.ll
logical_shifted_reg.ll
logical-imm.ll
mature-mc-support.ll
memcpy-f128.ll
movw-consts.ll
movw-shift-encoding.ll
mul_pow2.ll
mul-lohi.ll
neon-bitcast.ll
neon-bitwise-instructions.ll
neon-compare-instructions.ll
neon-diagnostics.ll
neon-extract.ll
neon-fma.ll
neon-fpround_f128.ll
neon-idiv.ll
neon-mla-mls.ll
neon-mov.ll
neon-or-combine.ll
neon-perm.ll
neon-scalar-by-elem-fma.ll
neon-scalar-copy.ll
neon-shift-left-long.ll
neon-truncStore-extLoad.ll
nzcv-save.ll
pic-eh-stubs.ll
ragreedy-csr.ll
regress-bitcast-formals.ll
regress-f128csel-flags.ll
regress-fp128-livein.ll
regress-tail-livereg.ll
regress-tblgen-chains.ll
regress-w29-reserved-with-fp.ll
returnaddr.ll
setcc-takes-i32.ll
sibling-call.ll
sincos-expansion.ll
sincospow-vector-expansion.ll
tail-call.ll
trunc-v1i64.ll
tst-br.ll
zero-reg.ll