mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
43b7a9472e
Sadly, this duplicates (twice) the logic from InstSimplify. There might be some way to at least share the DAG versions of the code, but copying the folds seems to be the standard method to ensure that we don't miss these folds. Unlike in IR, we don't run DAGCombiner to fixpoint, so there's no way to ensure that we do these kinds of simplifications unless the code is repeated at node creation time and during combines. There were other tests that would become worthless with this improvement that I changed as pre-commits: rL347161 rL347164 rL347165 rL347166 rL347167 I'm not sure how to salvage the remaining tests (diffs in this patch). So the x86 tests verify that the new code is working as intended. The AMDGPU test is actually similar to my motivating case: we have some undef value that has survived to machine IR in an x86 test, and then it gets folded in some weird way, or we crash if we don't transfer the undef flag. But we would have been better off never getting to that point by doing these simplifications. This will lead back to PR32023 someday... https://bugs.llvm.org/show_bug.cgi?id=32023 llvm-svn: 347170
54 lines
1.6 KiB
LLVM
54 lines
1.6 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
declare i1 @llvm.amdgcn.class.f32(float, i32)
|
|
|
|
; Produces error after adding an implicit def to v_cndmask_b32
|
|
|
|
; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
|
|
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
|
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
|
|
define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
|
|
bb0:
|
|
%tmp = icmp sgt i32 %arg1, 4
|
|
%c = icmp eq i32 %arg3, 0
|
|
%tmp4 = select i1 %c, float %arg, float 1.000000e+00
|
|
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
|
|
%tmp6 = fcmp olt float %arg2, 1.000000e+00
|
|
%tmp7 = fcmp olt float %arg, %tmp4
|
|
%tmp8 = and i1 %tmp5, %tmp6
|
|
%tmp9 = and i1 %tmp8, %tmp7
|
|
br i1 %tmp9, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
store volatile i32 0, i32 addrspace(1)* undef
|
|
br label %bb2
|
|
|
|
bb2:
|
|
ret void
|
|
}
|
|
|
|
; The undef flag on the condition src must be preserved on the
|
|
; implicit vcc use to avoid verifier errors.
|
|
|
|
; GCN-LABEL: {{^}}preserve_condition_undef_flag:
|
|
; GCN-NOT: vcc
|
|
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
|
|
bb0:
|
|
%tmp = icmp sgt i32 %arg1, 4
|
|
%undef = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
|
|
%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
|
|
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
|
|
%tmp6 = fcmp olt float %arg2, 1.000000e+00
|
|
%tmp7 = fcmp olt float %arg, undef
|
|
%tmp8 = and i1 %tmp5, %tmp6
|
|
%tmp9 = and i1 %tmp8, %tmp7
|
|
br i1 %tmp9, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
store volatile i32 0, i32 addrspace(1)* undef
|
|
br label %bb2
|
|
|
|
bb2:
|
|
ret void
|
|
}
|