mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
278c5147ca
When emitting something like 'add x, 1000' if we remat the 1000 then we should be able to mark the vreg containing 1000 as killed. Given that we go bottom up in fast-isel, a later use of 1000 will be higher up in the BB and won't kill it, or be impacted by the lower kill. However, rematerialised constant expressions aren't generated bottom up. The local value save area grows downwards. This means that if you remat 2 constant expressions which both use 1000 then the first will kill it, then the second, which is *lower* in the BB will read a killed register. This is the case in the attached test where the 2 GEPs both need to generate 'add x, 6680' for the constant offset. Note that this commit only makes kill flag generation conservative. There's nothing else obviously wrong with the local value save area growing downwards, and in fact it needs to for handling arbitrarily complex constant expressions. However, it would be nice if there was a solution which would let us generate more accurate kill flags, or just kill flags completely. llvm-svn: 236922
30 lines
1.3 KiB
LLVM
30 lines
1.3 KiB
LLVM
; RUN: llc %s -o - -fast-isel=true -O0 -verify-machineinstrs | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
|
|
target triple = "thumbv7-apple-ios8.0.0"
|
|
|
|
; This test failed with verify machine instrs due to incorrect kill flags on the add instructions
|
|
; generated by the GEPs. The first add generated killed the vreg for the #6680 constant which should
|
|
; be correct. However, the second add is also a constant expression and the local value save area grows
|
|
; down. This meant the next use of the vreg for #6680 was after the first which had killed it.
|
|
|
|
; CHECK: #6680
|
|
|
|
%struct.RD_8x8DATA = type { i32, [16 x [16 x i32]], [16 x [16 x i32]], [16 x [16 x i32]], [3 x [16 x [16 x i32]]], [4 x i16], [4 x i8], [4 x i8], [4 x i8], [16 x [16 x i16]], [16 x [16 x i16]], [16 x [16 x i32]] }
|
|
|
|
@tr8x8 = external global %struct.RD_8x8DATA, align 4
|
|
@tr4x4 = external global %struct.RD_8x8DATA, align 4
|
|
|
|
; Function Attrs: noreturn
|
|
declare void @foo(i16*, i16*) #0
|
|
|
|
; Function Attrs: minsize
|
|
define i32 @test() #1 {
|
|
bb:
|
|
call void @foo(i16* getelementptr inbounds (%struct.RD_8x8DATA, %struct.RD_8x8DATA* @tr8x8, i32 0, i32 10, i32 0, i32 0), i16* getelementptr inbounds (%struct.RD_8x8DATA, %struct.RD_8x8DATA* @tr4x4, i32 0, i32 10, i32 0, i32 0))
|
|
unreachable
|
|
}
|
|
|
|
attributes #0 = { noreturn }
|
|
attributes #1 = { minsize }
|