mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
befd1de274
When matching half of the build_vector to a load, there could still be a hidden dependency on the other half of the build_vector the pattern wouldn't detect. If there was an additional chain dependency on the other value, a cycle could be introduced. I don't think a tablegen pattern is capable of matching the necessary conditions, so move this into PreprocessISelDAG. Check isPredecessorOf for the other value to avoid a cycle. This has a warning that it's expensive, so this should probably be moved into an MI pass eventually that will have more freedom to reorder instructions to help match this. That is currently complicated by the lack of a computeKnownBits type mechanism for the selected function. llvm-svn: 355731
27 lines
924 B
LLVM
27 lines
924 B
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; There was an infinite loop in DAGCombiner from a target build_vector
|
|
; combine and a generic insert_vector_elt combine.
|
|
|
|
; GCN-LABEL: {{^}}combine_loop:
|
|
; GCN: flat_load_short_d16_hi
|
|
; GCN: flat_store_short
|
|
define amdgpu_kernel void @combine_loop(i16* %arg) #0 {
|
|
bb:
|
|
br label %bb1
|
|
|
|
bb1:
|
|
%tmp = phi <2 x i16> [ <i16 15360, i16 15360>, %bb ], [ %tmp5, %bb1 ]
|
|
%tmp2 = phi half [ 0xH0000, %bb ], [ %tmp8, %bb1 ]
|
|
%tmp3 = load volatile half, half* null, align 536870912
|
|
%tmp4 = bitcast half %tmp3 to i16
|
|
%tmp5 = insertelement <2 x i16> <i16 0, i16 undef>, i16 %tmp4, i32 1
|
|
%tmp6 = bitcast i16* %arg to half*
|
|
store volatile half %tmp2, half* %tmp6, align 2
|
|
%tmp7 = bitcast <2 x i16> %tmp to <2 x half>
|
|
%tmp8 = extractelement <2 x half> %tmp7, i32 0
|
|
br label %bb1
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|