1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-tp-loop.mir
David Green 5c433e70b6 [ARM] Fix inline memcpy trip count sequence
The trip count for a memcpy/memset will be n/16 rounded up to the
nearest integer. So (n+15)>>4. The old code was including a BIC too, to
clear one of the bits, which does not seem correct. This remove the
extra BIC.

Note that ideally this would never actually be generated, as in the
creation of a tail predicated loop we will DCE that setup code, letting
the WLSTP perform the trip count calculation. So this doesn't usually
come up in testing (and apparently the ARMLowOverheadLoops pass does not
do any sort of validation on the tripcount). Only if the generation of
the WLTP fails will it use the incorrect BIC instructions.

Differential Revision: https://reviews.llvm.org/D102629
2021-05-24 11:01:58 +01:00

236 lines
10 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -simplify-mir --verify-machineinstrs -run-pass=finalize-isel %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "arm-arm-none-eabi"
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
define void @test1(i32* noalias %X, i32* noalias readonly %Y, i32 %n) {
entry:
%0 = bitcast i32* %X to i8*
%1 = bitcast i32* %Y to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %n, i1 false)
ret void
}
define void @test2(i32* noalias %X, i32* noalias readonly %Y, i32 %n) {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
%X.bits = bitcast i32* %X to i8*
%Y.bits = bitcast i32* %Y to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %X.bits, i8* align 4 %Y.bits, i32 %n, i1 false)
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body.preheader, %entry
ret void
}
define void @test3(i32* nocapture %X, i8 zeroext %c, i32 %n) {
entry:
%0 = bitcast i32* %X to i8*
tail call void @llvm.memset.p0i8.i32(i8* align 4 %0, i8 %c, i32 %n, i1 false)
ret void
}
define void @test4(i8* nocapture %X, i8 zeroext %c, i32 %n) {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
call void @llvm.memset.p0i8.i32(i8* align 1 %X, i8 %c, i32 %n, i1 false)
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body.preheader, %entry
ret void
}
...
---
name: test1
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r0, $r1, $r2
; CHECK-LABEL: name: test1
; CHECK: liveins: $r0, $r1, $r2
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
; CHECK: .1:
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.0, %7, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %9, %bb.1
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %11, %bb.1
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %13, %bb.1
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]]
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[MVE_VLDRBU8_post1]], [[PHI1]], 16, 1, [[MVE_VCTP8_]]
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI2]], 1
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.1, implicit-def $cpsr
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
; CHECK: .2.entry:
; CHECK: tBX_RET 14 /* CC::al */, $noreg
%2:rgpr = COPY $r2
%1:rgpr = COPY $r1
%0:rgpr = COPY $r0
MVE_MEMCPYLOOPINST %0, %1, %2, implicit-def $cpsr
tBX_RET 14 /* CC::al */, $noreg
...
---
name: test2
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test2
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
; CHECK: bb.1.for.body.preheader:
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
; CHECK: bb.3:
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.1, %7, %bb.3
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %9, %bb.3
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %11, %bb.3
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %13, %bb.3
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]]
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[MVE_VLDRBU8_post1]], [[PHI1]], 16, 1, [[MVE_VCTP8_]]
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI2]], 1
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def $cpsr
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK: bb.4.for.body.preheader:
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
; CHECK: bb.2.for.cond.cleanup:
; CHECK: tBX_RET 14 /* CC::al */, $noreg
bb.0.entry:
successors: %bb.1(0x50000000), %bb.2(0x30000000)
liveins: $r0, $r1, $r2
%2:rgpr = COPY $r2
%1:rgpr = COPY $r1
%0:rgpr = COPY $r0
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
t2B %bb.1, 14 /* CC::al */, $noreg
bb.1.for.body.preheader:
successors: %bb.2(0x80000000)
MVE_MEMCPYLOOPINST %0, %1, %2, implicit-def $cpsr
bb.2.for.cond.cleanup:
tBX_RET 14 /* CC::al */, $noreg
...
---
name: test3
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r0, $r1, $r2
; CHECK-LABEL: name: test3
; CHECK: liveins: $r0, $r1, $r2
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
; CHECK: .1:
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %7, %bb.1
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %9, %bb.1
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %11, %bb.1
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]]
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI1]], 1
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.1, implicit-def $cpsr
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
; CHECK: .2.entry:
; CHECK: tBX_RET 14 /* CC::al */, $noreg
%2:rgpr = COPY $r2
%1:mqpr = COPY $r1
%0:rgpr = COPY $r0
MVE_MEMSETLOOPINST %0, %1, %2, implicit-def $cpsr
tBX_RET 14 /* CC::al */, $noreg
...
---
name: test4
alignment: 2
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test4
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
; CHECK: bb.1.for.body.preheader:
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
; CHECK: bb.3:
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %7, %bb.3
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %9, %bb.3
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %11, %bb.3
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]]
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI1]], 1
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def $cpsr
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK: bb.4.for.body.preheader:
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
; CHECK: bb.2.for.cond.cleanup:
; CHECK: tBX_RET 14 /* CC::al */, $noreg
bb.0.entry:
successors: %bb.1(0x50000000), %bb.2(0x30000000)
liveins: $r0, $r1, $r2
%2:rgpr = COPY $r2
%1:mqpr = COPY $r1
%0:rgpr = COPY $r0
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
t2B %bb.1, 14 /* CC::al */, $noreg
bb.1.for.body.preheader:
MVE_MEMSETLOOPINST %0, %1, %2, implicit-def $cpsr
bb.2.for.cond.cleanup:
tBX_RET 14 /* CC::al */, $noreg
...