llvm-mirror/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
; There are no MMX operations here, so we use XMM or i64.

define void @ti8(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to <8 x i8>
        %tmp2 = bitcast double %b to <8 x i8>
        %tmp3 = add <8 x i8> %tmp1, %tmp2
; CHECK:  paddb %xmm1, %xmm0
        store <8 x i8> %tmp3, <8 x i8>* null
        ret void
}

define void @ti16(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to <4 x i16>
        %tmp2 = bitcast double %b to <4 x i16>
        %tmp3 = add <4 x i16> %tmp1, %tmp2
; CHECK:  paddw %xmm1, %xmm0
        store <4 x i16> %tmp3, <4 x i16>* null
        ret void
}

define void @ti32(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to <2 x i32>
        %tmp2 = bitcast double %b to <2 x i32>
        %tmp3 = add <2 x i32> %tmp1, %tmp2
; CHECK:  paddd %xmm1, %xmm0
        store <2 x i32> %tmp3, <2 x i32>* null
        ret void
}

define void @ti64(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to <1 x i64>
        %tmp2 = bitcast double %b to <1 x i64>
        %tmp3 = add <1 x i64> %tmp1, %tmp2
; CHECK:  addq  %rax, %rcx
        store <1 x i64> %tmp3, <1 x i64>* null
        ret void
}

; MMX intrinsics calls get us MMX instructions.

define void @ti8a(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to x86_mmx
; CHECK: movdq2q
        %tmp2 = bitcast double %b to x86_mmx
; CHECK: movdq2q
        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
        store x86_mmx %tmp3, x86_mmx* null
        ret void
}

define void @ti16a(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to x86_mmx
; CHECK: movdq2q
        %tmp2 = bitcast double %b to x86_mmx
; CHECK: movdq2q
        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
        store x86_mmx %tmp3, x86_mmx* null
        ret void
}

define void @ti32a(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to x86_mmx
; CHECK: movdq2q
        %tmp2 = bitcast double %b to x86_mmx
; CHECK: movdq2q
        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
        store x86_mmx %tmp3, x86_mmx* null
        ret void
}

define void @ti64a(double %a, double %b) nounwind {
entry:
        %tmp1 = bitcast double %a to x86_mmx
; CHECK: movdq2q
        %tmp2 = bitcast double %b to x86_mmx
; CHECK: movdq2q
        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
        store x86_mmx %tmp3, x86_mmx* null
        ret void
}
 
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
Per Chris, fuse four trivial tests using grep (r102199) into one that uses FileCheck. llvm-svn: 102216 2010-04-24 00:12:57 +02:00			`; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 \| FileCheck %s`
Massive rewrite of MMX: The x86_mmx type is used for MMX intrinsics, parameters and return values where these use MMX registers, and is also supported in load, store, and bitcast. Only the above operations generate MMX instructions, and optimizations do not operate on or produce MMX intrinsics. MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into smaller pieces. Optimizations may occur on these forms and the result casted back to x86_mmx, provided the result feeds into a previous existing x86_mmx operation. The point of all this is prevent optimizations from introducing MMX operations, which is unsafe due to the EMMS problem. llvm-svn: 115243 2010-10-01 01:57:10 +02:00			`; There are no MMX operations here, so we use XMM or i64.`
Per Chris, fuse four trivial tests using grep (r102199) into one that uses FileCheck. llvm-svn: 102216 2010-04-24 00:12:57 +02:00
			`define void @ti8(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to <8 x i8>`
			`%tmp2 = bitcast double %b to <8 x i8>`
			`%tmp3 = add <8 x i8> %tmp1, %tmp2`
Massive rewrite of MMX: The x86_mmx type is used for MMX intrinsics, parameters and return values where these use MMX registers, and is also supported in load, store, and bitcast. Only the above operations generate MMX instructions, and optimizations do not operate on or produce MMX intrinsics. MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into smaller pieces. Optimizations may occur on these forms and the result casted back to x86_mmx, provided the result feeds into a previous existing x86_mmx operation. The point of all this is prevent optimizations from introducing MMX operations, which is unsafe due to the EMMS problem. llvm-svn: 115243 2010-10-01 01:57:10 +02:00			`; CHECK: paddb %xmm1, %xmm0`
Per Chris, fuse four trivial tests using grep (r102199) into one that uses FileCheck. llvm-svn: 102216 2010-04-24 00:12:57 +02:00			`store <8 x i8> %tmp3, <8 x i8>* null`
			`ret void`
			`}`

			`define void @ti16(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to <4 x i16>`
			`%tmp2 = bitcast double %b to <4 x i16>`
			`%tmp3 = add <4 x i16> %tmp1, %tmp2`
Massive rewrite of MMX: The x86_mmx type is used for MMX intrinsics, parameters and return values where these use MMX registers, and is also supported in load, store, and bitcast. Only the above operations generate MMX instructions, and optimizations do not operate on or produce MMX intrinsics. MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into smaller pieces. Optimizations may occur on these forms and the result casted back to x86_mmx, provided the result feeds into a previous existing x86_mmx operation. The point of all this is prevent optimizations from introducing MMX operations, which is unsafe due to the EMMS problem. llvm-svn: 115243 2010-10-01 01:57:10 +02:00			`; CHECK: paddw %xmm1, %xmm0`
Per Chris, fuse four trivial tests using grep (r102199) into one that uses FileCheck. llvm-svn: 102216 2010-04-24 00:12:57 +02:00			`store <4 x i16> %tmp3, <4 x i16>* null`
			`ret void`
			`}`

			`define void @ti32(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to <2 x i32>`
			`%tmp2 = bitcast double %b to <2 x i32>`
			`%tmp3 = add <2 x i32> %tmp1, %tmp2`
Massive rewrite of MMX: The x86_mmx type is used for MMX intrinsics, parameters and return values where these use MMX registers, and is also supported in load, store, and bitcast. Only the above operations generate MMX instructions, and optimizations do not operate on or produce MMX intrinsics. MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into smaller pieces. Optimizations may occur on these forms and the result casted back to x86_mmx, provided the result feeds into a previous existing x86_mmx operation. The point of all this is prevent optimizations from introducing MMX operations, which is unsafe due to the EMMS problem. llvm-svn: 115243 2010-10-01 01:57:10 +02:00			`; CHECK: paddd %xmm1, %xmm0`
Per Chris, fuse four trivial tests using grep (r102199) into one that uses FileCheck. llvm-svn: 102216 2010-04-24 00:12:57 +02:00			`store <2 x i32> %tmp3, <2 x i32>* null`
			`ret void`
			`}`

			`define void @ti64(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to <1 x i64>`
			`%tmp2 = bitcast double %b to <1 x i64>`
			`%tmp3 = add <1 x i64> %tmp1, %tmp2`
Massive rewrite of MMX: The x86_mmx type is used for MMX intrinsics, parameters and return values where these use MMX registers, and is also supported in load, store, and bitcast. Only the above operations generate MMX instructions, and optimizations do not operate on or produce MMX intrinsics. MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into smaller pieces. Optimizations may occur on these forms and the result casted back to x86_mmx, provided the result feeds into a previous existing x86_mmx operation. The point of all this is prevent optimizations from introducing MMX operations, which is unsafe due to the EMMS problem. llvm-svn: 115243 2010-10-01 01:57:10 +02:00			`; CHECK: addq %rax, %rcx`
Per Chris, fuse four trivial tests using grep (r102199) into one that uses FileCheck. llvm-svn: 102216 2010-04-24 00:12:57 +02:00			`store <1 x i64> %tmp3, <1 x i64>* null`
			`ret void`
			`}`
Massive rewrite of MMX: The x86_mmx type is used for MMX intrinsics, parameters and return values where these use MMX registers, and is also supported in load, store, and bitcast. Only the above operations generate MMX instructions, and optimizations do not operate on or produce MMX intrinsics. MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into smaller pieces. Optimizations may occur on these forms and the result casted back to x86_mmx, provided the result feeds into a previous existing x86_mmx operation. The point of all this is prevent optimizations from introducing MMX operations, which is unsafe due to the EMMS problem. llvm-svn: 115243 2010-10-01 01:57:10 +02:00
			`; MMX intrinsics calls get us MMX instructions.`

			`define void @ti8a(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp2 = bitcast double %b to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)`
			`store x86_mmx %tmp3, x86_mmx* null`
			`ret void`
			`}`

			`define void @ti16a(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp2 = bitcast double %b to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)`
			`store x86_mmx %tmp3, x86_mmx* null`
			`ret void`
			`}`

			`define void @ti32a(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp2 = bitcast double %b to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)`
			`store x86_mmx %tmp3, x86_mmx* null`
			`ret void`
			`}`

			`define void @ti64a(double %a, double %b) nounwind {`
			`entry:`
			`%tmp1 = bitcast double %a to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp2 = bitcast double %b to x86_mmx`
			`; CHECK: movdq2q`
			`%tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)`
			`store x86_mmx %tmp3, x86_mmx* null`
			`ret void`
			`}`

			`declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)`
			`declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)`
			`declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)`
			`declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)`