llvm-mirror/test/CodeGen/ARM/vst4.ll

; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s

define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst4i8:
;Check the alignment value.  Max for this instruction is 256 bits:
;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
	%tmp1 = load <8 x i8>* %B
	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
	ret void
}

;Check for a post-increment updating store with register increment.
define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK: vst4i8_update:
;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
	%A = load i8** %ptr
	%tmp1 = load <8 x i8>* %B
	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
	%tmp2 = getelementptr i8* %A, i32 %inc
	store i8* %tmp2, i8** %ptr
	ret void
}

define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4i16:
;Check the alignment value.  Max for this instruction is 256 bits:
;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
	%tmp0 = bitcast i16* %A to i8*
	%tmp1 = load <4 x i16>* %B
	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
	ret void
}

define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst4i32:
;Check the alignment value.  Max for this instruction is 256 bits:
;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
	%tmp0 = bitcast i32* %A to i8*
	%tmp1 = load <2 x i32>* %B
	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
	ret void
}

define void @vst4f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst4f:
;CHECK: vst4.32
	%tmp0 = bitcast float* %A to i8*
	%tmp1 = load <2 x float>* %B
	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
	ret void
}

define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst4i64:
;Check the alignment value.  Max for this instruction is 256 bits:
;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
	%tmp0 = bitcast i64* %A to i8*
	%tmp1 = load <1 x i64>* %B
	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
	ret void
}

define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst4Qi8:
;Check the alignment value.  Max for this instruction is 256 bits:
;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
	%tmp1 = load <16 x i8>* %B
	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
	ret void
}

define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst4Qi16:
;Check for no alignment specifier.
;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
	%tmp0 = bitcast i16* %A to i8*
	%tmp1 = load <8 x i16>* %B
	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
	ret void
}

define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst4Qi32:
;CHECK: vst4.32
;CHECK: vst4.32
	%tmp0 = bitcast i32* %A to i8*
	%tmp1 = load <4 x i32>* %B
	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
	ret void
}

define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst4Qf:
;CHECK: vst4.32
;CHECK: vst4.32
	%tmp0 = bitcast float* %A to i8*
	%tmp1 = load <4 x float>* %B
	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
	ret void
}

;Check for a post-increment updating store.
define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
;CHECK: vst4Qf_update:
;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
	%A = load float** %ptr
	%tmp0 = bitcast float* %A to i8*
	%tmp1 = load <4 x float>* %B
	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
	%tmp2 = getelementptr float* %A, i32 16
	store float* %tmp2, float** %ptr
	ret void
}

declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind

declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
Eliminate more uses of llvm-as and llvm-dis. llvm-svn: 81293 2009-09-09 02:09:15 +02:00			`; RUN: llc < %s -march=arm -mattr=+neon \| FileCheck %s`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00
			`define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {`
			`;CHECK: vst4i8:`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`;Check the alignment value. Max for this instruction is 256 bits:`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. llvm-svn: 116055 2010-10-08 08:15:13 +02:00			`;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`%tmp1 = load <8 x i8>* %B`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`ret void`
			`}`

Add codegen support for using post-increment NEON load/store instructions. The vld1-lane, vld1-dup and vst1-lane instructions do not yet support using post-increment versions, but all the rest of the NEON load/store instructions should be handled now. llvm-svn: 125014 2011-02-07 18:43:21 +01:00			`;Check for a post-increment updating store with register increment.`
			`define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {`
			`;CHECK: vst4i8_update:`
			`;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2`
			`%A = load i8** %ptr`
			`%tmp1 = load <8 x i8>* %B`
			`call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)`
			`%tmp2 = getelementptr i8* %A, i32 %inc`
			`store i8* %tmp2, i8** %ptr`
			`ret void`
			`}`

Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {`
			`;CHECK: vst4i16:`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`;Check the alignment value. Max for this instruction is 256 bits:`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. llvm-svn: 116055 2010-10-08 08:15:13 +02:00			`;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast i16* %A to i8*`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`%tmp1 = load <4 x i16>* %B`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`ret void`
			`}`

			`define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {`
			`;CHECK: vst4i32:`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`;Check the alignment value. Max for this instruction is 256 bits:`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. llvm-svn: 116055 2010-10-08 08:15:13 +02:00			`;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast i32* %A to i8*`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`%tmp1 = load <2 x i32>* %B`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`ret void`
			`}`

			`define void @vst4f(float* %A, <2 x float>* %B) nounwind {`
			`;CHECK: vst4f:`
			`;CHECK: vst4.32`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast float* %A to i8*`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`%tmp1 = load <2 x float>* %B`
Add alignment arguments to all the NEON load/store intrinsics. Update all the tests using those intrinsics and add support for auto-upgrading bitcode files with the old versions of the intrinsics. llvm-svn: 112271 2010-08-27 19:13:24 +02:00			`call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)`
Implement Neon VST[234] operations. llvm-svn: 78330 2009-08-06 20:47:44 +02:00			`ret void`
			`}`

Add codegen support for NEON vst4 intrinsics with <1 x i64> vectors. llvm-svn: 83526 2009-10-08 07:18:18 +02:00			`define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {`
			`;CHECK: vst4i64:`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`;Check the alignment value. Max for this instruction is 256 bits:`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. llvm-svn: 116055 2010-10-08 08:15:13 +02:00			`;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast i64* %A to i8*`
Add codegen support for NEON vst4 intrinsics with <1 x i64> vectors. llvm-svn: 83526 2009-10-08 07:18:18 +02:00			`%tmp1 = load <1 x i64>* %B`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)`
Add codegen support for NEON vst4 intrinsics with <1 x i64> vectors. llvm-svn: 83526 2009-10-08 07:18:18 +02:00			`ret void`
			`}`

Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {`
			`;CHECK: vst4Qi8:`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`;Check the alignment value. Max for this instruction is 256 bits:`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. llvm-svn: 116055 2010-10-08 08:15:13 +02:00			`;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!`
			`;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`%tmp1 = load <16 x i8>* %B`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`ret void`
			`}`

			`define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {`
			`;CHECK: vst4Qi16:`
Set alignment operand for NEON VST instructions. llvm-svn: 114709 2010-09-24 01:42:37 +02:00			`;Check for no alignment specifier.`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. llvm-svn: 116055 2010-10-08 08:15:13 +02:00			`;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!`
			`;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast i16* %A to i8*`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`%tmp1 = load <8 x i16>* %B`
Add alignment arguments to all the NEON load/store intrinsics. Update all the tests using those intrinsics and add support for auto-upgrading bitcode files with the old versions of the intrinsics. llvm-svn: 112271 2010-08-27 19:13:24 +02:00			`call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`ret void`
			`}`

			`define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {`
			`;CHECK: vst4Qi32:`
			`;CHECK: vst4.32`
			`;CHECK: vst4.32`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast i32* %A to i8*`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`%tmp1 = load <4 x i32>* %B`
Add alignment arguments to all the NEON load/store intrinsics. Update all the tests using those intrinsics and add support for auto-upgrading bitcode files with the old versions of the intrinsics. llvm-svn: 112271 2010-08-27 19:13:24 +02:00			`call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`ret void`
			`}`

			`define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {`
			`;CHECK: vst4Qf:`
			`;CHECK: vst4.32`
			`;CHECK: vst4.32`
Fix tests for Neon load/store intrinsics to match the i8* types expected by the intrinsics. The reason for those i8* types is that the intrinsics are overloaded on the vector type and we don't have a way to declare an intrinsic where one argument is an overloaded vector type and another argument is a pointer to the vector element type. The bitcasts added here will match what the frontend will typically generate when these intrinsics are used. llvm-svn: 101840 2010-04-20 02:17:16 +02:00			`%tmp0 = bitcast float* %A to i8*`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`%tmp1 = load <4 x float>* %B`
Add alignment arguments to all the NEON load/store intrinsics. Update all the tests using those intrinsics and add support for auto-upgrading bitcode files with the old versions of the intrinsics. llvm-svn: 112271 2010-08-27 19:13:24 +02:00			`call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00			`ret void`
			`}`

Add codegen support for using post-increment NEON load/store instructions. The vld1-lane, vld1-dup and vst1-lane instructions do not yet support using post-increment versions, but all the rest of the NEON load/store instructions should be handled now. llvm-svn: 125014 2011-02-07 18:43:21 +01:00			`;Check for a post-increment updating store.`
			`define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {`
			`;CHECK: vst4Qf_update:`
			`;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!`
			`;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!`
			`%A = load float** %ptr`
			`%tmp0 = bitcast float* %A to i8*`
			`%tmp1 = load <4 x float>* %B`
			`call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)`
			`%tmp2 = getelementptr float* %A, i32 16`
			`store float* %tmp2, float** %ptr`
			`ret void`
			`}`

Add alignment arguments to all the NEON load/store intrinsics. Update all the tests using those intrinsics and add support for auto-upgrading bitcode files with the old versions of the intrinsics. llvm-svn: 112271 2010-08-27 19:13:24 +02:00			`declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind`
Add codegen support for NEON vst4 intrinsics with 128-bit vectors. llvm-svn: 83486 2009-10-07 22:49:18 +02:00
Add alignment arguments to all the NEON load/store intrinsics. Update all the tests using those intrinsics and add support for auto-upgrading bitcode files with the old versions of the intrinsics. llvm-svn: 112271 2010-08-27 19:13:24 +02:00			`declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind`
			`declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind`