llvm-mirror/test/CodeGen/CellSPU/stores.ll

; RUN: llc < %s -march=cellspu > %t1.s
; RUN: grep 'stqd.*0($3)'       %t1.s | count 4
; RUN: grep 'stqd.*16($3)'      %t1.s | count 4
; RUN: grep 16256               %t1.s | count 2
; RUN: grep 16384               %t1.s | count 1
; RUN: grep 771                 %t1.s | count 4
; RUN: grep 515                 %t1.s | count 2
; RUN: grep 1799                %t1.s | count 2
; RUN: grep 1543                %t1.s | count 5
; RUN: grep 1029                %t1.s | count 3
; RUN: grep 'shli.*, 4'         %t1.s | count 4
; RUN: grep stqx                %t1.s | count 4
; RUN: grep ilhu                %t1.s | count 11
; RUN: grep iohl                %t1.s | count 8
; RUN: grep shufb               %t1.s | count 15
; RUN: grep frds                %t1.s | count 1
; RUN: llc < %s -march=cellspu | FileCheck %s

; ModuleID = 'stores.bc'
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"

define void @store_v16i8_1(<16 x i8>* %a) nounwind {
entry:
	store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
	ret void
}

define void @store_v16i8_2(<16 x i8>* %a) nounwind {
entry:
	%arrayidx = getelementptr <16 x i8>* %a, i32 1
	store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
	ret void
}

define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
entry:
        %arrayidx = getelementptr <16 x i8>* %a, i32 %i
	store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
        ret void
}

define void @store_v8i16_1(<8 x i16>* %a) nounwind {
entry:
	store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
	ret void
}

define void @store_v8i16_2(<8 x i16>* %a) nounwind {
entry:
	%arrayidx = getelementptr <8 x i16>* %a, i16 1
	store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
	ret void
}

define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
entry:
        %arrayidx = getelementptr <8 x i16>* %a, i32 %i
	store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
        ret void
}

define void @store_v4i32_1(<4 x i32>* %a) nounwind {
entry:
	store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
	ret void
}

define void @store_v4i32_2(<4 x i32>* %a) nounwind {
entry:
	%arrayidx = getelementptr <4 x i32>* %a, i32 1
	store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
	ret void
}

define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
entry:
        %arrayidx = getelementptr <4 x i32>* %a, i32 %i
        store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
        ret void
}

define void @store_v4f32_1(<4 x float>* %a) nounwind {
entry:
	store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
	ret void
}

define void @store_v4f32_2(<4 x float>* %a) nounwind {
entry:
	%arrayidx = getelementptr <4 x float>* %a, i32 1
	store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
	ret void
}

define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
entry:
        %arrayidx = getelementptr <4 x float>* %a, i32 %i
        store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
        ret void
}

; Test truncating stores:

define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
entry:
	%conv = trunc i16 %val to i8
	store i8 %conv, i8* %dest
	ret i8 %conv
}

define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
entry:
	%conv = trunc i32 %val to i8
	store i8 %conv, i8* %dest
	ret i8 %conv
}

define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
entry:
	%conv = trunc i32 %val to i16
	store i16 %conv, i16* %dest
	ret i16 %conv
}

define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
entry:
	%conv = trunc i64 %val to i8
	store i8 %conv, i8* %dest
	ret i8 %conv
}

define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
entry:
	%conv = trunc i64 %val to i16
	store i16 %conv, i16* %dest
	ret i16 %conv
}

define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
entry:
	%conv = trunc i64 %val to i32
	store i32 %conv, i32* %dest
	ret i32 %conv
}

define float @tstore_f64_f32(double %val, float* %dest) nounwind {
entry:
	%conv = fptrunc double %val to float
	store float %conv, float* %dest
	ret float %conv
}

;Check stores that might span two 16 byte memory blocks
define void @store_misaligned( i32 %val, i32* %ptr) {	
;CHECK: store_misaligned
;CHECK: lqd
;CHECK: lqd
;CHECK: stqd
;CHECK: stqd
;CHECK: bi $lr
	store i32 %val, i32*%ptr, align 2
	ret void
}

define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
{
;CHECK: stq
;CHECK: stq
;CHECK: bi $lr
	store <8 x float> %val, <8 x float>* %ptr
	ret void
}

define void @store_null_vec( <4 x i32> %val ) {
; FIXME - this is for some reason compiled into a il+stqd, not a sta. 
;CHECK: stqd
;CHECK: bi $lr
	store <4 x i32> %val, <4 x i32>* null
	ret void
}
Convert more tests to avoid llvm-as. llvm-svn: 81545 2009-09-11 20:36:27 +02:00			`; RUN: llc < %s -march=cellspu > %t1.s`
Convert all tests using TCL-style quoting to use shell-style quoting. This was done through the aid of a terrible Perl creation. I will not paste any of the horrors here. Suffice to say, it require multiple staged rounds of replacements, state carried between, and a few nested-construct-parsing hacks that I'm not proud of. It happens, by luck, to be able to deal with all the TCL-quoting patterns in evidence in the LLVM test suite. If anyone is maintaining large out-of-tree test trees, feel free to poke me and I'll send you the steps I used to convert things, as well as answer any painful questions etc. IRC works best for this type of thing I find. Once converted, switch the LLVM lit config to use ShTests the same as Clang. In addition to being able to delete large amounts of Python code from 'lit', this will also simplify the entire test suite and some of lit's architecture. Finally, the test suite runs 33% faster on Linux now. ;] For my 16-hardware-thread (2x 4-core xeon e5520): 36s -> 24s llvm-svn: 159525 2012-07-02 14:47:22 +02:00			`; RUN: grep 'stqd.*0($3)' %t1.s \| count 4`
			`; RUN: grep 'stqd.*16($3)' %t1.s \| count 4`
CellSPU: (a) Remove conditionally removed code in SelectXAddr. Basically, hope for the best that the A-form and D-form address predicates catch everything before the code decides to emit a X-form address. (b) Expand vector store test cases to include the usual suspects. llvm-svn: 60034 2008-11-25 18:29:43 +01:00			`; RUN: grep 16256 %t1.s \| count 2`
			`; RUN: grep 16384 %t1.s \| count 1`
- Remove Tilmann's custom truncate lowering: it completely hosed over DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. llvm-svn: 61447 2008-12-27 05:51:36 +01:00			`; RUN: grep 771 %t1.s \| count 4`
			`; RUN: grep 515 %t1.s \| count 2`
			`; RUN: grep 1799 %t1.s \| count 2`
Revert 67132. This is breaking some objective-c apps. Also fixes SDISel so it does not force promote return value if the function is not marked signext / zeroext. llvm-svn: 67701 2009-03-25 21:20:11 +01:00			`; RUN: grep 1543 %t1.s \| count 5`
			`; RUN: grep 1029 %t1.s \| count 3`
Convert all tests using TCL-style quoting to use shell-style quoting. This was done through the aid of a terrible Perl creation. I will not paste any of the horrors here. Suffice to say, it require multiple staged rounds of replacements, state carried between, and a few nested-construct-parsing hacks that I'm not proud of. It happens, by luck, to be able to deal with all the TCL-quoting patterns in evidence in the LLVM test suite. If anyone is maintaining large out-of-tree test trees, feel free to poke me and I'll send you the steps I used to convert things, as well as answer any painful questions etc. IRC works best for this type of thing I find. Once converted, switch the LLVM lit config to use ShTests the same as Clang. In addition to being able to delete large amounts of Python code from 'lit', this will also simplify the entire test suite and some of lit's architecture. Finally, the test suite runs 33% faster on Linux now. ;] For my 16-hardware-thread (2x 4-core xeon e5520): 36s -> 24s llvm-svn: 159525 2012-07-02 14:47:22 +02:00			`; RUN: grep 'shli.*, 4' %t1.s \| count 4`
CellSPU: (a) Remove conditionally removed code in SelectXAddr. Basically, hope for the best that the A-form and D-form address predicates catch everything before the code decides to emit a X-form address. (b) Expand vector store test cases to include the usual suspects. llvm-svn: 60034 2008-11-25 18:29:43 +01:00			`; RUN: grep stqx %t1.s \| count 4`
Revert 67132. This is breaking some objective-c apps. Also fixes SDISel so it does not force promote return value if the function is not marked signext / zeroext. llvm-svn: 67701 2009-03-25 21:20:11 +01:00			`; RUN: grep ilhu %t1.s \| count 11`
			`; RUN: grep iohl %t1.s \| count 8`
			`; RUN: grep shufb %t1.s \| count 15`
- Remove Tilmann's custom truncate lowering: it completely hosed over DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. llvm-svn: 61447 2008-12-27 05:51:36 +01:00			`; RUN: grep frds %t1.s \| count 1`
Fix memory access lowering on SPU, adding support for the case where alignment<value size. These cases were silently miscompiled before this patch. Now they are overly verbose -especially storing is- and any front-end should still avoid misaligned memory accesses as much as possible. The bit juggling algorithm added here probably has some room for improvement still. llvm-svn: 118889 2010-11-12 11:14:03 +01:00			`; RUN: llc < %s -march=cellspu \| FileCheck %s`
CellSPU: (a) Fix bgs 3052, 3057 (b) Incorporate Duncan's suggestions re: i1 promotion (c) Indentation updates. llvm-svn: 59790 2008-11-21 03:56:16 +01:00
			`; ModuleID = 'stores.bc'`
			`target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"`
			`target triple = "spu"`

CellSPU: (a) Remove conditionally removed code in SelectXAddr. Basically, hope for the best that the A-form and D-form address predicates catch everything before the code decides to emit a X-form address. (b) Expand vector store test cases to include the usual suspects. llvm-svn: 60034 2008-11-25 18:29:43 +01:00			`define void @store_v16i8_1(<16 x i8>* %a) nounwind {`
			`entry:`
			`store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a`
			`ret void`
			`}`

			`define void @store_v16i8_2(<16 x i8>* %a) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <16 x i8>* %a, i32 1`
			`store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx`
			`ret void`
			`}`

			`define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <16 x i8>* %a, i32 %i`
			`store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx`
			`ret void`
			`}`

			`define void @store_v8i16_1(<8 x i16>* %a) nounwind {`
			`entry:`
			`store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a`
			`ret void`
			`}`

			`define void @store_v8i16_2(<8 x i16>* %a) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <8 x i16>* %a, i16 1`
			`store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx`
			`ret void`
			`}`

			`define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <8 x i16>* %a, i32 %i`
			`store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx`
			`ret void`
			`}`

			`define void @store_v4i32_1(<4 x i32>* %a) nounwind {`
			`entry:`
			`store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a`
			`ret void`
			`}`

			`define void @store_v4i32_2(<4 x i32>* %a) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <4 x i32>* %a, i32 1`
			`store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx`
			`ret void`
			`}`

			`define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <4 x i32>* %a, i32 %i`
			`store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx`
			`ret void`
			`}`

CellSPU: (a) Fix bgs 3052, 3057 (b) Incorporate Duncan's suggestions re: i1 promotion (c) Indentation updates. llvm-svn: 59790 2008-11-21 03:56:16 +01:00			`define void @store_v4f32_1(<4 x float>* %a) nounwind {`
			`entry:`
			`store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a`
			`ret void`
			`}`

			`define void @store_v4f32_2(<4 x float>* %a) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <4 x float>* %a, i32 1`
			`store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx`
			`ret void`
			`}`
CellSPU: (a) Remove conditionally removed code in SelectXAddr. Basically, hope for the best that the A-form and D-form address predicates catch everything before the code decides to emit a X-form address. (b) Expand vector store test cases to include the usual suspects. llvm-svn: 60034 2008-11-25 18:29:43 +01:00
			`define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {`
			`entry:`
			`%arrayidx = getelementptr <4 x float>* %a, i32 %i`
			`store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx`
			`ret void`
			`}`
- Remove Tilmann's custom truncate lowering: it completely hosed over DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. llvm-svn: 61447 2008-12-27 05:51:36 +01:00
			`; Test truncating stores:`

			`define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {`
			`entry:`
			`%conv = trunc i16 %val to i8`
			`store i8 %conv, i8* %dest`
			`ret i8 %conv`
			`}`

			`define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {`
			`entry:`
			`%conv = trunc i32 %val to i8`
			`store i8 %conv, i8* %dest`
			`ret i8 %conv`
			`}`

			`define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {`
			`entry:`
			`%conv = trunc i32 %val to i16`
			`store i16 %conv, i16* %dest`
			`ret i16 %conv`
			`}`

			`define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {`
			`entry:`
			`%conv = trunc i64 %val to i8`
			`store i8 %conv, i8* %dest`
			`ret i8 %conv`
			`}`

			`define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {`
			`entry:`
			`%conv = trunc i64 %val to i16`
			`store i16 %conv, i16* %dest`
			`ret i16 %conv`
			`}`

			`define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {`
			`entry:`
			`%conv = trunc i64 %val to i32`
			`store i32 %conv, i32* %dest`
			`ret i32 %conv`
			`}`

			`define float @tstore_f64_f32(double %val, float* %dest) nounwind {`
			`entry:`
			`%conv = fptrunc double %val to float`
			`store float %conv, float* %dest`
			`ret float %conv`
			`}`
Fix memory access lowering on SPU, adding support for the case where alignment<value size. These cases were silently miscompiled before this patch. Now they are overly verbose -especially storing is- and any front-end should still avoid misaligned memory accesses as much as possible. The bit juggling algorithm added here probably has some room for improvement still. llvm-svn: 118889 2010-11-12 11:14:03 +01:00
			`;Check stores that might span two 16 byte memory blocks`
			`define void @store_misaligned( i32 %val, i32* %ptr) {`
			`;CHECK: store_misaligned`
			`;CHECK: lqd`
			`;CHECK: lqd`
			`;CHECK: stqd`
			`;CHECK: stqd`
			`;CHECK: bi $lr`
			`store i32 %val, i32*%ptr, align 2`
			`ret void`
			`}`
Don't crash SPU BE with memory accesses with big alignmnet. llvm-svn: 123620 2011-01-17 12:59:20 +01:00
			`define void @store_v8( <8 x float> %val, <8 x float>* %ptr )`
			`{`
			`;CHECK: stq`
			`;CHECK: stq`
			`;CHECK: bi $lr`
			`store <8 x float> %val, <8 x float>* %ptr`
			`ret void`
			`}`
Allow load from constant on SPU. A 'load <4 x i32>* null' crashes llc before this fix. llvm-svn: 126995 2011-03-04 13:00:11 +01:00
			`define void @store_null_vec( <4 x i32> %val ) {`
			`; FIXME - this is for some reason compiled into a il+stqd, not a sta.`
			`;CHECK: stqd`
			`;CHECK: bi $lr`
			`store <4 x i32> %val, <4 x i32>* null`
			`ret void`
			`}`