llvm-mirror/test/CodeGen/ARM/ldrd.ll

; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3
; rdar://6949835
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY

; Magic ARM pair hints works best with linearscan / fast.

; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
; register when interrupted or faulted.

@b = external global i64*

define i64 @t(i64 %a) nounwind readonly {
entry:
; A8: t:
; A8:   ldrd r2, r3, [r2]

; M3: t:
; M3-NOT: ldrd

	%0 = load i64** @b, align 4
	%1 = load i64* %0, align 4
	%2 = mul i64 %1, %a
	ret i64 %2
}

; rdar://10435045 mixed LDRi8/LDRi12
;
; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
; able to generate an LDRD pair here, but this is highly sensitive to
; regalloc hinting. So, this doubles as a register allocation
; test. RABasic currently does a better job within the inner loop
; because of its *lack* of hinting ability. Whereas RAGreedy keeps
; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
; destination into R3. We then sensibly split LDRD again rather then
; evict another live range or use callee saved regs. Sorry if the test
; is sensitive to Regalloc changes, but it is an interesting case.
;
; BASIC: @f
; BASIC: %bb
; BASIC: ldrd
; BASIC: str
; GREEDY: @f
; GREEDY: %bb
; GREEDY: ldrd
; GREEDY: str
define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
entry:
  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
  br i1 %1, label %bb, label %return

bb:                                               ; preds = %bb, %entry
  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
  store i32 %4, i32* %scevgep4, align 4
  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
  br i1 %exitcond, label %return, label %bb

return:                                           ; preds = %bb, %entry
  ret void
}
Don't run RAFast in the optimizing regalloc pipeline. The fast register allocator is not supposed to work in the optimizing pipeline. It doesn't make sense to compute live intervals, run full copy coalescing, and then run RAFast. Fast register allocation in the optimizing pipeline is better done by RABasic. llvm-svn: 158242 2012-06-09 01:15:12 +02:00			`; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 \| FileCheck %s -check-prefix=A8`
			`; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 \| FileCheck %s -check-prefix=M3`
Add workaround for Cortex-M3 errata 602117 by replacing ldrd x, y, [x] with ldm or ldr pairs. llvm-svn: 144123 2011-11-08 22:21:09 +01:00			`; rdar://6949835`
ARM Ld/St Optimizer fix. Allow LDRD to be formed from pairs with different LDR encodings. This was the original intention of the pass. Somewhere along the way, the LDR opcodes were refined which broke the optimization. We really don't care what the original opcodes are as long as they both map to the same LDRD and the immediate still fits. Fixes rdar://10435045 ARMLoadStoreOptimization cannot handle mixed LDRi8/LDRi12 llvm-svn: 147922 2012-01-11 04:56:08 +01:00			`; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic \| FileCheck %s -check-prefix=BASIC`
			`; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy \| FileCheck %s -check-prefix=GREEDY`
Part 1. - Change register allocation hint to a pair of unsigned integers. The hint type is zero (which means prefer the register specified as second part of the pair) or entirely target dependent. - Allow targets to specify alternative register allocation orders based on allocation hint. Part 2. - Use the register allocation hint system to implement more aggressive load / store multiple formation. - Aggressively form LDRD / STRD. These are formed before register allocation. It has to be done this way to shorten live interval of base and offset registers. e.g. v1025 = LDR v1024, 0 v1026 = LDR v1024, 0 => v1025,v1026 = LDRD v1024, 0 If this transformation isn't done before allocation, v1024 will overlap v1025 which means it more difficult to allocate a register pair. - Even with the register allocation hint, it may not be possible to get the desired allocation. In that case, the post-allocation load / store multiple pass must fix the ldrd / strd instructions. They can either become ldm / stm instructions or back to a pair of ldr / str instructions. This is work in progress, not yet enabled. llvm-svn: 73381 2009-06-15 10:28:29 +02:00
Add workaround for Cortex-M3 errata 602117 by replacing ldrd x, y, [x] with ldm or ldr pairs. llvm-svn: 144123 2011-11-08 22:21:09 +01:00			`; Magic ARM pair hints works best with linearscan / fast.`

			`; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base`
			`; register when interrupted or faulted.`
Fix ARM tests to be register allocator independent. llvm-svn: 128680 2011-04-01 00:14:03 +02:00
Part 1. - Change register allocation hint to a pair of unsigned integers. The hint type is zero (which means prefer the register specified as second part of the pair) or entirely target dependent. - Allow targets to specify alternative register allocation orders based on allocation hint. Part 2. - Use the register allocation hint system to implement more aggressive load / store multiple formation. - Aggressively form LDRD / STRD. These are formed before register allocation. It has to be done this way to shorten live interval of base and offset registers. e.g. v1025 = LDR v1024, 0 v1026 = LDR v1024, 0 => v1025,v1026 = LDRD v1024, 0 If this transformation isn't done before allocation, v1024 will overlap v1025 which means it more difficult to allocate a register pair. - Even with the register allocation hint, it may not be possible to get the desired allocation. In that case, the post-allocation load / store multiple pass must fix the ldrd / strd instructions. They can either become ldm / stm instructions or back to a pair of ldr / str instructions. This is work in progress, not yet enabled. llvm-svn: 73381 2009-06-15 10:28:29 +02:00			`@b = external global i64*`

			`define i64 @t(i64 %a) nounwind readonly {`
			`entry:`
Add workaround for Cortex-M3 errata 602117 by replacing ldrd x, y, [x] with ldm or ldr pairs. llvm-svn: 144123 2011-11-08 22:21:09 +01:00			`; A8: t:`
			`; A8: ldrd r2, r3, [r2]`
Convert test to filecheck. llvm-svn: 82835 2009-09-26 04:41:17 +02:00
Add workaround for Cortex-M3 errata 602117 by replacing ldrd x, y, [x] with ldm or ldr pairs. llvm-svn: 144123 2011-11-08 22:21:09 +01:00			`; M3: t:`
			`; M3-NOT: ldrd`
Convert test to filecheck. llvm-svn: 82835 2009-09-26 04:41:17 +02:00
Part 1. - Change register allocation hint to a pair of unsigned integers. The hint type is zero (which means prefer the register specified as second part of the pair) or entirely target dependent. - Allow targets to specify alternative register allocation orders based on allocation hint. Part 2. - Use the register allocation hint system to implement more aggressive load / store multiple formation. - Aggressively form LDRD / STRD. These are formed before register allocation. It has to be done this way to shorten live interval of base and offset registers. e.g. v1025 = LDR v1024, 0 v1026 = LDR v1024, 0 => v1025,v1026 = LDRD v1024, 0 If this transformation isn't done before allocation, v1024 will overlap v1025 which means it more difficult to allocate a register pair. - Even with the register allocation hint, it may not be possible to get the desired allocation. In that case, the post-allocation load / store multiple pass must fix the ldrd / strd instructions. They can either become ldm / stm instructions or back to a pair of ldr / str instructions. This is work in progress, not yet enabled. llvm-svn: 73381 2009-06-15 10:28:29 +02:00			`%0 = load i64** @b, align 4`
			`%1 = load i64* %0, align 4`
			`%2 = mul i64 %1, %a`
			`ret i64 %2`
			`}`
ARM Ld/St Optimizer fix. Allow LDRD to be formed from pairs with different LDR encodings. This was the original intention of the pass. Somewhere along the way, the LDR opcodes were refined which broke the optimization. We really don't care what the original opcodes are as long as they both map to the same LDRD and the immediate still fits. Fixes rdar://10435045 ARMLoadStoreOptimization cannot handle mixed LDRi8/LDRi12 llvm-svn: 147922 2012-01-11 04:56:08 +01:00
			`; rdar://10435045 mixed LDRi8/LDRi12`
			`;`
			`; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be`
			`; able to generate an LDRD pair here, but this is highly sensitive to`
			`; regalloc hinting. So, this doubles as a register allocation`
			`; test. RABasic currently does a better job within the inner loop`
			`; because of its lack of hinting ability. Whereas RAGreedy keeps`
			`; R0/R1/R2 live as the three arguments, forcing the LDRD's odd`
			`; destination into R3. We then sensibly split LDRD again rather then`
			`; evict another live range or use callee saved regs. Sorry if the test`
			`; is sensitive to Regalloc changes, but it is an interesting case.`
			`;`
			`; BASIC: @f`
			`; BASIC: %bb`
			`; BASIC: ldrd`
			`; BASIC: str`
			`; GREEDY: @f`
			`; GREEDY: %bb`
Allocate virtual registers in ascending order. This is just the fallback tie-breaker ordering, the main allocation order is still descending size. Patch by Shamil Kurmangaleev! llvm-svn: 153904 2012-04-03 00:30:39 +02:00			`; GREEDY: ldrd`
ARM Ld/St Optimizer fix. Allow LDRD to be formed from pairs with different LDR encodings. This was the original intention of the pass. Somewhere along the way, the LDR opcodes were refined which broke the optimization. We really don't care what the original opcodes are as long as they both map to the same LDRD and the immediate still fits. Fixes rdar://10435045 ARMLoadStoreOptimization cannot handle mixed LDRi8/LDRi12 llvm-svn: 147922 2012-01-11 04:56:08 +01:00			`; GREEDY: str`
			`define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {`
			`entry:`
			`%0 = add nsw i32 %n, -1 ; <i32> [#uses=2]`
			`%1 = icmp sgt i32 %0, 0 ; <i1> [#uses=1]`
			`br i1 %1, label %bb, label %return`

			`bb: ; preds = %bb, %entry`
			`%i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ] ; <i32> [#uses=3]`
			`%scevgep = getelementptr i32* %a, i32 %i.03 ; <i32*> [#uses=1]`
			`%scevgep4 = getelementptr i32* %b, i32 %i.03 ; <i32*> [#uses=1]`
			`%tmp = add i32 %i.03, 1 ; <i32> [#uses=3]`
			`%scevgep5 = getelementptr i32* %a, i32 %tmp ; <i32*> [#uses=1]`
			`%2 = load i32* %scevgep, align 4 ; <i32> [#uses=1]`
			`%3 = load i32* %scevgep5, align 4 ; <i32> [#uses=1]`
			`%4 = add nsw i32 %3, %2 ; <i32> [#uses=1]`
			`store i32 %4, i32* %scevgep4, align 4`
			`%exitcond = icmp eq i32 %tmp, %0 ; <i1> [#uses=1]`
			`br i1 %exitcond, label %return, label %bb`

			`return: ; preds = %bb, %entry`
			`ret void`
			`}`