llvm-mirror/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll

; RUN: llc < %s -asm-verbose=false -mtriple=arm64-apple-ios | FileCheck %s

define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
; CHECK-LABEL: foo:
; CHECK-NEXT: fcmeq.4s  v0, v0, v1
; CHECK-NEXT: fmov.4s v1, #1.00000000
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %result = sitofp <4 x i32> %ext to <4 x float>
  ret <4 x float> %result
}
; Make sure the operation doesn't try to get folded when the sizes don't match,
; as that ends up crashing later when trying to form a bitcast operation for
; the folded nodes.
define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind {
; CHECK-LABEL: foo1:
; CHECK: movi.4s
; CHECK: scvtf.2d
; CHECK: scvtf.2d
  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %result = sitofp <4 x i32> %ext to <4 x double>
  store <4 x double> %result, <4 x double>* %p
  ret void
}

; Fold explicit AND operations when the constant isn't a splat of a single
; scalar value like what the zext creates.
define <4 x float> @foo2(<4 x float> %val, <4 x float> %test) nounwind {
; CHECK-LABEL: lCPI2_0:
; CHECK-NEXT: .long 1065353216
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long 1065353216
; CHECK-NEXT: .long 0
; CHECK-LABEL: foo2:
; CHECK: adrp  x8, lCPI2_0@PAGE
; CHECK: ldr q2, [x8, lCPI2_0@PAGEOFF]
; CHECK-NEXT:  fcmeq.4s  v0, v0, v1
; CHECK-NEXT:  and.16b v0, v0, v2
  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %and = and <4 x i32> %ext, <i32 255, i32 256, i32 257, i32 258>
  %result = sitofp <4 x i32> %and to <4 x float>
  ret <4 x float> %result
}
Use an explicit triple in testcase. Make the test work better on non-darwin hosts. Hopefully. llvm-svn: 213801 2014-07-23 22:46:32 +02:00			`; RUN: llc < %s -asm-verbose=false -mtriple=arm64-apple-ios \| FileCheck %s`
AArch64: Constant fold converting vector setcc results to float. Since the result of a SETCC for AArch64 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the code is generated as: fcmeq.4s v0, v0, v1 movi.4s v1, #0x1 // Integer splat value. and.16b v0, v0, v1 // Mask lanes based on the comparison. scvtf.4s v0, v0 // Convert each lane to f32. ret After, the code is improved to: fcmeq.4s v0, v0, v1 fmov.4s v1, #1.00000000 // f32 splat value. and.16b v0, v0, v1 // Mask lanes based on the comparison. ret The svvtf.4s has been constant folded away and the floating point 1.0f vector lanes are materialized directly via fmov.4s. Rather than do the folding manually in the target code, teach getNode() in the generic SelectionDAG to handle folding constant operands of vector [su]int_to_fp nodes. It is reasonable (as noted in a FIXME) to do additional constant folding there as well, but I don't have test cases for those operations, so leaving them for another time when it becomes appropriate. rdar://17693791 llvm-svn: 213341 2014-07-18 02:40:52 +02:00
			`define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {`
			`; CHECK-LABEL: foo:`
			`; CHECK-NEXT: fcmeq.4s v0, v0, v1`
			`; CHECK-NEXT: fmov.4s v1, #1.00000000`
			`; CHECK-NEXT: and.16b v0, v0, v1`
			`; CHECK-NEXT: ret`
			`%cmp = fcmp oeq <4 x float> %val, %test`
			`%ext = zext <4 x i1> %cmp to <4 x i32>`
			`%result = sitofp <4 x i32> %ext to <4 x float>`
			`ret <4 x float> %result`
			`}`
X86: restrict combine to when type sizes are safe. The folding of unary operations through a vector compare and mask operation is only safe if the unary operation result is of the same size as its input. For example, it's not safe for [su]itofp from v4i32 to v4f64. llvm-svn: 213799 2014-07-23 22:41:38 +02:00			`; Make sure the operation doesn't try to get folded when the sizes don't match,`
			`; as that ends up crashing later when trying to form a bitcast operation for`
			`; the folded nodes.`
			`define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind {`
			`; CHECK-LABEL: foo1:`
			`; CHECK: movi.4s`
			`; CHECK: scvtf.2d`
			`; CHECK: scvtf.2d`
			`%cmp = fcmp oeq <4 x float> %val, %test`
			`%ext = zext <4 x i1> %cmp to <4 x i32>`
			`%result = sitofp <4 x i32> %ext to <4 x double>`
			`store <4 x double> %result, <4 x double>* %p`
			`ret void`
			`}`
[X86,AArch64] Extend vcmp w/ unary op combine to work w/ more constants. The transform to constant fold unary operations with an AND across a vector comparison applies when the constant is not a splat of a scalar as well. llvm-svn: 213800 2014-07-23 22:41:43 +02:00
			`; Fold explicit AND operations when the constant isn't a splat of a single`
			`; scalar value like what the zext creates.`
			`define <4 x float> @foo2(<4 x float> %val, <4 x float> %test) nounwind {`
			`; CHECK-LABEL: lCPI2_0:`
			`; CHECK-NEXT: .long 1065353216`
			`; CHECK-NEXT: .long 0`
			`; CHECK-NEXT: .long 1065353216`
			`; CHECK-NEXT: .long 0`
			`; CHECK-LABEL: foo2:`
			`; CHECK: adrp x8, lCPI2_0@PAGE`
			`; CHECK: ldr q2, [x8, lCPI2_0@PAGEOFF]`
			`; CHECK-NEXT: fcmeq.4s v0, v0, v1`
			`; CHECK-NEXT: and.16b v0, v0, v2`
			`%cmp = fcmp oeq <4 x float> %val, %test`
			`%ext = zext <4 x i1> %cmp to <4 x i32>`
			`%and = and <4 x i32> %ext, <i32 255, i32 256, i32 257, i32 258>`
			`%result = sitofp <4 x i32> %and to <4 x float>`
			`ret <4 x float> %result`
			`}`