1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00
llvm-mirror/test/CodeGen/X86/cmov.ll
Sanjay Patel 8720d89aac [DAGCombiner] re-enable truncation of binops
This is effectively re-committing the changes from:
rL347917 (D54640)
rL348195 (D55126)
...which were effectively reverted here:
rL348604
...because the code had a bug that could induce infinite looping
or eventual out-of-memory compilation.

The bug was that this code did not guard against transforming
opaque constants. More details are in the post-commit mailing
list thread for r347917. A reduced test for that is included
in the x86 bool-math.ll file. (I wasn't able to reduce a PPC
backend test for this, but it was almost the same pattern.)

Original commit message for r347917:

The motivating case for this is shown in:
https://bugs.llvm.org/show_bug.cgi?id=32023
and the corresponding rot16.ll regression tests.

Because x86 scalar shift amounts are i8 values, we can end up with trunc-binop-trunc
sequences that don't get folded in IR.

As the TODO comments suggest, there will be regressions if we extend this (for x86,
we mostly seem to be missing LEA opportunities, but there are likely vector folds
missing too). I think those should be considered existing bugs because this is the
same transform that we do as an IR canonicalization in instcombine. We just need
more tests to make those visible independent of this patch.

llvm-svn: 348706
2018-12-08 16:07:38 +00:00

224 lines
6.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown -disable-cgp-select2branch -x86-cmov-converter=false | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: movl $12, %eax
; CHECK-NEXT: cmovael (%rcx), %eax
; CHECK-NEXT: retq
entry:
%0 = lshr i32 %x, %n
%1 = and i32 %0, 1
%toBool = icmp eq i32 %1, 0
%v = load i32, i32* %vp
%.0 = select i1 %toBool, i32 %v, i32 12
ret i32 %.0
}
define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: movl $12, %eax
; CHECK-NEXT: cmovbl (%rcx), %eax
; CHECK-NEXT: retq
entry:
%0 = lshr i32 %x, %n
%1 = and i32 %0, 1
%toBool = icmp eq i32 %1, 0
%v = load i32, i32* %vp
%.0 = select i1 %toBool, i32 12, i32 %v
ret i32 %.0
}
; x86's 32-bit cmov zeroes the high 32 bits of the destination. Make
; sure CodeGen takes advantage of that to avoid an unnecessary
; zero-extend (movl) after the cmov.
declare void @bar(i64) nounwind
define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: testb $1, %dl
; CHECK-NEXT: cmovel %esi, %edi
; CHECK-NEXT: callq bar
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
%c = trunc i64 %a to i32
%d = trunc i64 %b to i32
%e = select i1 %p, i32 %c, i32 %d
%f = zext i32 %e to i64
call void @bar(i64 %f)
ret void
}
; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
; move without recomputing EFLAGS, because the expansion of the conditional
; move with control flow may clobber EFLAGS (e.g., with xor, to set the
; register to zero).
; The test is a little awkward; the important part is that there's a test before the
; setne.
; PR4814
@g_3 = external global i8
@g_96 = external global i8
@g_100 = external global i8
@_2E_str = external constant [15 x i8], align 1
define i1 @test4() nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsbl {{.*}}(%rip), %edx
; CHECK-NEXT: movzbl %dl, %ecx
; CHECK-NEXT: shrl $7, %ecx
; CHECK-NEXT: xorb $1, %cl
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %edx
; CHECK-NEXT: movb {{.*}}(%rip), %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_2
; CHECK-NEXT: # %bb.1: # %bb.i.i.i
; CHECK-NEXT: movb {{.*}}(%rip), %cl
; CHECK-NEXT: .LBB3_2: # %func_4.exit.i
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: setne %bl
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: je .LBB3_4
; CHECK-NEXT: # %bb.3: # %func_4.exit.i
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .LBB3_4: # %func_4.exit.i
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_7
; CHECK-NEXT: # %bb.5: # %func_4.exit.i
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne .LBB3_7
; CHECK-NEXT: # %bb.6: # %bb.i.i
; CHECK-NEXT: movb {{.*}}(%rip), %cl
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: .LBB3_7: # %func_1.exit
; CHECK-NEXT: movb %cl, {{.*}}(%rip)
; CHECK-NEXT: movzbl %cl, %esi
; CHECK-NEXT: movl $_2E_str, %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: callq printf
; CHECK-NEXT: movl %ebx, %eax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
entry:
%0 = load i8, i8* @g_3, align 1
%1 = sext i8 %0 to i32
%.lobit.i = lshr i8 %0, 7
%tmp.i = zext i8 %.lobit.i to i32
%tmp.not.i = xor i32 %tmp.i, 1
%iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i
%retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8
%2 = icmp eq i8 %retval56.i.i, 0
%g_96.promoted.i = load i8, i8* @g_96
%3 = icmp eq i8 %g_96.promoted.i, 0
br i1 %3, label %func_4.exit.i, label %bb.i.i.i
bb.i.i.i:
%4 = load volatile i8, i8* @g_100, align 1
br label %func_4.exit.i
func_4.exit.i:
%.not.i = xor i1 %2, true
%brmerge.i = or i1 %3, %.not.i
%.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0
br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
bb.i.i:
%5 = load volatile i8, i8* @g_100, align 1
br label %func_1.exit
func_1.exit:
%g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ]
%ret = phi i1 [ 0, %bb.i.i ], [ %.not.i, %func_4.exit.i ]
store i8 %g_96.tmp.0.i, i8* @g_96
%6 = zext i8 %g_96.tmp.0.i to i32
%7 = tail call i32 (i8*, ...) @printf(i8* noalias getelementptr ([15 x i8], [15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind
ret i1 %ret
}
declare i32 @printf(i8* nocapture, ...) nounwind
; Should compile to setcc | -2.
; rdar://6668608
define i32 @test5(i32* nocapture %P) nounwind readonly {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $41, (%rdi)
; CHECK-NEXT: setg %al
; CHECK-NEXT: orl $-2, %eax
; CHECK-NEXT: retq
entry:
%0 = load i32, i32* %P, align 4
%1 = icmp sgt i32 %0, 41
%iftmp.0.0 = select i1 %1, i32 -1, i32 -2
ret i32 %iftmp.0.0
}
define i32 @test6(i32* nocapture %P) nounwind readonly {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $42, (%rdi)
; CHECK-NEXT: setl %al
; CHECK-NEXT: leal 4(%rax,%rax,8), %eax
; CHECK-NEXT: retq
entry:
%0 = load i32, i32* %P, align 4
%1 = icmp sgt i32 %0, 41
%iftmp.0.0 = select i1 %1, i32 4, i32 13
ret i32 %iftmp.0.0
}
; Don't try to use a 16-bit conditional move to do an 8-bit select,
; because it isn't worth it. Just use a branch instead.
define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB6_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB6_1:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%d = select i1 %c, i8 %a, i8 %b
ret i8 %d
}
define i32 @smin(i32 %x) {
; CHECK-LABEL: smin:
; CHECK: # %bb.0:
; CHECK-NEXT: notl %edi
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovsl %edi, %eax
; CHECK-NEXT: retq
%not_x = xor i32 %x, -1
%1 = icmp slt i32 %not_x, -1
%sel = select i1 %1, i32 %not_x, i32 -1
ret i32 %sel
}