1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-25 05:52:53 +02:00
llvm-mirror/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
Peter Collingbourne 688547e6fc Thumb2: Modify codegen for memcpy intrinsic to prefer LDM/STM.
We were previously codegen'ing these as regular load/store operations and
hoping that the register allocator would allocate registers in ascending order
so that we could apply an LDM/STM combine after register allocation. According
to the commit that first introduced this code (r37179), we planned to teach
the register allocator to allocate the registers in ascending order. This
never got implemented, and up to now we've been stuck with very poor codegen.

A much simpler approach for achiveing better codegen is to create LDM/STM
instructions with identical sets of virtual registers, let the register
allocator pick arbitrary registers and order register lists when printing an
MCInst. This approach also avoids the need to repeatedly calculate offsets
which ultimately ought to be eliminated pre-RA in order to decrease register
pressure.

This is implemented by lowering the memcpy intrinsic to a series of SD-only
MCOPY pseudo-instructions which performs a memory copy using a given number
of registers. During SD->MI lowering, we lower MCOPY to LDM/STM. This is a
little unusual, but it avoids the need to encode register lists in the SD,
and we can take advantage of SD use lists to decide whether to use the _UPD
variant of the instructions.

Fixes PR9199.

Differential Revision: http://reviews.llvm.org/D9508

llvm-svn: 238473
2015-05-28 20:02:45 +00:00

97 lines
3.9 KiB
LLVM

; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv6m-none--eabi"
@a = external global i32*
@b = external global i32*
; Function Attrs: nounwind
define void @foo24() #0 {
entry:
; CHECK-LABEL: foo24:
; CHECK: ldr r[[LB:[0-9]]], .LCPI
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
; CHECK: ldr r[[SB:[0-9]]], .LCPI
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
%0 = load i32*, i32** @a, align 4
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
%1 = bitcast i32* %arrayidx to i8*
%2 = load i32*, i32** @b, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
%3 = bitcast i32* %arrayidx1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
ret void
}
define void @foo28() #0 {
entry:
; CHECK-LABEL: foo28:
; CHECK: ldr r[[LB:[0-9]]], .LCPI
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
; CHECK: ldr r[[SB:[0-9]]], .LCPI
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
%0 = load i32*, i32** @a, align 4
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
%1 = bitcast i32* %arrayidx to i8*
%2 = load i32*, i32** @b, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
%3 = bitcast i32* %arrayidx1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
ret void
}
define void @foo32() #0 {
entry:
; CHECK-LABEL: foo32:
; CHECK: ldr r[[LB:[0-9]]], .LCPI
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
; CHECK: ldr r[[SB:[0-9]]], .LCPI
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
%0 = load i32*, i32** @a, align 4
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
%1 = bitcast i32* %arrayidx to i8*
%2 = load i32*, i32** @b, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
%3 = bitcast i32* %arrayidx1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
ret void
}
define void @foo36() #0 {
entry:
; CHECK-LABEL: foo36:
; CHECK: ldr r[[LB:[0-9]]], .LCPI
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
; CHECK: ldr r[[SB:[0-9]]], .LCPI
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
%0 = load i32*, i32** @a, align 4
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
%1 = bitcast i32* %arrayidx to i8*
%2 = load i32*, i32** @b, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
%3 = bitcast i32* %arrayidx1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1