mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
5c433e70b6
The trip count for a memcpy/memset will be n/16 rounded up to the nearest integer. So (n+15)>>4. The old code was including a BIC too, to clear one of the bits, which does not seem correct. This remove the extra BIC. Note that ideally this would never actually be generated, as in the creation of a tail predicated loop we will DCE that setup code, letting the WLSTP perform the trip count calculation. So this doesn't usually come up in testing (and apparently the ARMLowOverheadLoops pass does not do any sort of validation on the tripcount). Only if the generation of the WLTP fails will it use the incorrect BIC instructions. Differential Revision: https://reviews.llvm.org/D102629
236 lines
10 KiB
YAML
236 lines
10 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -simplify-mir --verify-machineinstrs -run-pass=finalize-isel %s -o - | FileCheck %s
|
|
--- |
|
|
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
target triple = "arm-arm-none-eabi"
|
|
|
|
; Function Attrs: argmemonly nofree nosync nounwind willreturn
|
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
|
|
; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
|
|
|
|
define void @test1(i32* noalias %X, i32* noalias readonly %Y, i32 %n) {
|
|
entry:
|
|
%0 = bitcast i32* %X to i8*
|
|
%1 = bitcast i32* %Y to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %n, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @test2(i32* noalias %X, i32* noalias readonly %Y, i32 %n) {
|
|
entry:
|
|
%cmp6 = icmp sgt i32 %n, 0
|
|
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%X.bits = bitcast i32* %X to i8*
|
|
%Y.bits = bitcast i32* %Y to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %X.bits, i8* align 4 %Y.bits, i32 %n, i1 false)
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @test3(i32* nocapture %X, i8 zeroext %c, i32 %n) {
|
|
entry:
|
|
%0 = bitcast i32* %X to i8*
|
|
tail call void @llvm.memset.p0i8.i32(i8* align 4 %0, i8 %c, i32 %n, i1 false)
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test4(i8* nocapture %X, i8 zeroext %c, i32 %n) {
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
call void @llvm.memset.p0i8.i32(i8* align 1 %X, i8 %c, i32 %n, i1 false)
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
...
|
|
---
|
|
name: test1
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $r0, $r1, $r2
|
|
|
|
; CHECK-LABEL: name: test1
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
|
|
; CHECK: .1:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.0, %7, %bb.1
|
|
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %9, %bb.1
|
|
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %11, %bb.1
|
|
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %13, %bb.1
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[MVE_VLDRBU8_post1]], [[PHI1]], 16, 1, [[MVE_VCTP8_]]
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI2]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.1, implicit-def $cpsr
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: .2.entry:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
%2:rgpr = COPY $r2
|
|
%1:rgpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
MVE_MEMCPYLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|
|
---
|
|
name: test2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test2
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.1.for.body.preheader:
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
|
|
; CHECK: bb.3:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.1, %7, %bb.3
|
|
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %9, %bb.3
|
|
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %11, %bb.3
|
|
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %13, %bb.3
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[MVE_VLDRBU8_post1]], [[PHI1]], 16, 1, [[MVE_VCTP8_]]
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI2]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def $cpsr
|
|
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.4.for.body.preheader:
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.2.for.cond.cleanup:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
bb.0.entry:
|
|
successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
liveins: $r0, $r1, $r2
|
|
|
|
%2:rgpr = COPY $r2
|
|
%1:rgpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
t2B %bb.1, 14 /* CC::al */, $noreg
|
|
|
|
bb.1.for.body.preheader:
|
|
successors: %bb.2(0x80000000)
|
|
|
|
MVE_MEMCPYLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
|
|
bb.2.for.cond.cleanup:
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|
|
---
|
|
name: test3
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $r0, $r1, $r2
|
|
|
|
; CHECK-LABEL: name: test3
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
|
|
; CHECK: .1:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %7, %bb.1
|
|
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %9, %bb.1
|
|
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %11, %bb.1
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI1]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.1, implicit-def $cpsr
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: .2.entry:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
%2:rgpr = COPY $r2
|
|
%1:mqpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
MVE_MEMSETLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|
|
---
|
|
name: test4
|
|
alignment: 2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test4
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.1.for.body.preheader:
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
|
|
; CHECK: bb.3:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %7, %bb.3
|
|
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %9, %bb.3
|
|
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %11, %bb.3
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]]
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI1]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def $cpsr
|
|
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.4.for.body.preheader:
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.2.for.cond.cleanup:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
bb.0.entry:
|
|
successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
liveins: $r0, $r1, $r2
|
|
|
|
%2:rgpr = COPY $r2
|
|
%1:mqpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
t2B %bb.1, 14 /* CC::al */, $noreg
|
|
|
|
bb.1.for.body.preheader:
|
|
MVE_MEMSETLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
|
|
bb.2.for.cond.cleanup:
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|