mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
ba34d0d84b
This prevents isel from emitting a TEST instruction that optimizeCompareInstr will need to remove later. In some of the modified tests, the SUB gets duplicated due to the flags being needed in two places and being clobbered in between. optimizeCompareInstr was able to optimize away the TEST that was using the result of one of them, but optimizeCompareInstr doesn't know to turn SUB into CMP after removing the TEST. It only knows how to turn SUB into CMP if the result was already dead. With this change the TEST never exists, so optimizeCompareInstr doesn't have to remove it. Then it can just turn the SUB into CMP immediately. Fixes PR43649. llvm-svn: 374755
274 lines
8.3 KiB
LLVM
274 lines
8.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
|
|
|
|
define void @knownbits_zext_in_reg(i8*) nounwind {
|
|
; X32-LABEL: knownbits_zext_in_reg:
|
|
; X32: # %bb.0: # %BB
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movzbl (%eax), %ecx
|
|
; X32-NEXT: imull $101, %ecx, %eax
|
|
; X32-NEXT: shrl $14, %eax
|
|
; X32-NEXT: imull $177, %ecx, %edx
|
|
; X32-NEXT: shrl $14, %edx
|
|
; X32-NEXT: movzbl %al, %ecx
|
|
; X32-NEXT: xorl %ebx, %ebx
|
|
; X32-NEXT: .p2align 4, 0x90
|
|
; X32-NEXT: .LBB0_1: # %CF
|
|
; X32-NEXT: # =>This Loop Header: Depth=1
|
|
; X32-NEXT: # Child Loop BB0_2 Depth 2
|
|
; X32-NEXT: movl %ecx, %eax
|
|
; X32-NEXT: divb %dl
|
|
; X32-NEXT: .p2align 4, 0x90
|
|
; X32-NEXT: .LBB0_2: # %CF237
|
|
; X32-NEXT: # Parent Loop BB0_1 Depth=1
|
|
; X32-NEXT: # => This Inner Loop Header: Depth=2
|
|
; X32-NEXT: testb %bl, %bl
|
|
; X32-NEXT: jne .LBB0_2
|
|
; X32-NEXT: jmp .LBB0_1
|
|
;
|
|
; X64-LABEL: knownbits_zext_in_reg:
|
|
; X64: # %bb.0: # %BB
|
|
; X64-NEXT: movzbl (%rdi), %eax
|
|
; X64-NEXT: imull $101, %eax, %ecx
|
|
; X64-NEXT: shrl $14, %ecx
|
|
; X64-NEXT: imull $177, %eax, %edx
|
|
; X64-NEXT: shrl $14, %edx
|
|
; X64-NEXT: movzbl %cl, %ecx
|
|
; X64-NEXT: xorl %esi, %esi
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB0_1: # %CF
|
|
; X64-NEXT: # =>This Loop Header: Depth=1
|
|
; X64-NEXT: # Child Loop BB0_2 Depth 2
|
|
; X64-NEXT: movl %ecx, %eax
|
|
; X64-NEXT: divb %dl
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB0_2: # %CF237
|
|
; X64-NEXT: # Parent Loop BB0_1 Depth=1
|
|
; X64-NEXT: # => This Inner Loop Header: Depth=2
|
|
; X64-NEXT: testb %sil, %sil
|
|
; X64-NEXT: jne .LBB0_2
|
|
; X64-NEXT: jmp .LBB0_1
|
|
BB:
|
|
%L5 = load i8, i8* %0
|
|
%Sl9 = select i1 true, i8 %L5, i8 undef
|
|
%B21 = udiv i8 %Sl9, -93
|
|
%B22 = udiv i8 %Sl9, 93
|
|
br label %CF
|
|
|
|
CF: ; preds = %CF246, %BB
|
|
%I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1
|
|
%I41 = insertelement <4 x i8> zeroinitializer, i8 %B22, i32 1
|
|
%B41 = srem <4 x i8> %I40, %I41
|
|
br label %CF237
|
|
|
|
CF237: ; preds = %CF237, %CF
|
|
%Cmp73 = icmp ne i1 undef, undef
|
|
br i1 %Cmp73, label %CF237, label %CF246
|
|
|
|
CF246: ; preds = %CF237
|
|
%Cmp117 = icmp ult <4 x i8> %B41, undef
|
|
%E156 = extractelement <4 x i1> %Cmp117, i32 2
|
|
br label %CF
|
|
}
|
|
|
|
define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind {
|
|
; X32-LABEL: knownbits_mask_add_lshr:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_mask_add_lshr:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = and i32 %a0, 32767
|
|
%2 = and i32 %a1, 32766
|
|
%3 = add i32 %1, %2
|
|
%4 = lshr i32 %3, 17
|
|
ret i32 %4
|
|
}
|
|
|
|
define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
|
|
; X32-LABEL: knownbits_mask_addc_shl:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %edi
|
|
; X32-NEXT: pushl %esi
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: movl $-1024, %esi # imm = 0xFC00
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X32-NEXT: andl %esi, %edi
|
|
; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
|
|
; X32-NEXT: addl %edi, %esi
|
|
; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: adcl $0, %ecx
|
|
; X32-NEXT: shldl $22, %edx, %ecx
|
|
; X32-NEXT: shldl $22, %esi, %edx
|
|
; X32-NEXT: movl %edx, 8(%eax)
|
|
; X32-NEXT: movl %ecx, 12(%eax)
|
|
; X32-NEXT: movl $0, 4(%eax)
|
|
; X32-NEXT: movl $0, (%eax)
|
|
; X32-NEXT: popl %esi
|
|
; X32-NEXT: popl %edi
|
|
; X32-NEXT: retl $4
|
|
;
|
|
; X64-LABEL: knownbits_mask_addc_shl:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00
|
|
; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00
|
|
; X64-NEXT: addq %rdi, %rsi
|
|
; X64-NEXT: adcl $0, %edx
|
|
; X64-NEXT: shldq $54, %rsi, %rdx
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = and i64 %a0, -1024
|
|
%2 = zext i64 %1 to i128
|
|
%3 = and i64 %a1, -1024
|
|
%4 = zext i64 %3 to i128
|
|
%5 = add i128 %2, %4
|
|
%6 = zext i64 %a2 to i128
|
|
%7 = shl i128 %6, 64
|
|
%8 = add i128 %5, %7
|
|
%9 = shl i128 %8, 54
|
|
ret i128 %9
|
|
}
|
|
|
|
define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
|
|
; X32-LABEL: knownbits_uaddo_saddo:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: movl %ecx, %edx
|
|
; X32-NEXT: addl %eax, %edx
|
|
; X32-NEXT: setb %bl
|
|
; X32-NEXT: testl %eax, %eax
|
|
; X32-NEXT: setns %al
|
|
; X32-NEXT: testl %ecx, %ecx
|
|
; X32-NEXT: setns %cl
|
|
; X32-NEXT: cmpb %al, %cl
|
|
; X32-NEXT: sete %al
|
|
; X32-NEXT: testl %edx, %edx
|
|
; X32-NEXT: setns %dl
|
|
; X32-NEXT: cmpb %dl, %cl
|
|
; X32-NEXT: setne %dl
|
|
; X32-NEXT: andb %al, %dl
|
|
; X32-NEXT: orb %bl, %dl
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_uaddo_saddo:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: shlq $32, %rdi
|
|
; X64-NEXT: shlq $32, %rsi
|
|
; X64-NEXT: addq %rdi, %rsi
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: seto %dl
|
|
; X64-NEXT: orb %al, %dl
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = shl i64 %a0, 32
|
|
%2 = shl i64 %a1, 32
|
|
%u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2)
|
|
%uval = extractvalue {i64, i1} %u, 0
|
|
%uovf = extractvalue {i64, i1} %u, 1
|
|
%s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2)
|
|
%sval = extractvalue {i64, i1} %s, 0
|
|
%sovf = extractvalue {i64, i1} %s, 1
|
|
%sum = add i64 %uval, %sval
|
|
%3 = trunc i64 %sum to i32
|
|
%4 = or i1 %uovf, %sovf
|
|
%ret0 = insertvalue {i32, i1} undef, i32 %3, 0
|
|
%ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
|
|
ret {i32, i1} %ret1
|
|
}
|
|
|
|
define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
|
|
; X32-LABEL: knownbits_usubo_ssubo:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X32-NEXT: cmpl %eax, %ecx
|
|
; X32-NEXT: setb %dh
|
|
; X32-NEXT: setns %dl
|
|
; X32-NEXT: testl %ecx, %ecx
|
|
; X32-NEXT: setns %cl
|
|
; X32-NEXT: cmpb %dl, %cl
|
|
; X32-NEXT: setne %ch
|
|
; X32-NEXT: testl %eax, %eax
|
|
; X32-NEXT: setns %al
|
|
; X32-NEXT: cmpb %al, %cl
|
|
; X32-NEXT: setne %dl
|
|
; X32-NEXT: andb %ch, %dl
|
|
; X32-NEXT: orb %dh, %dl
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_usubo_ssubo:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: shlq $32, %rdi
|
|
; X64-NEXT: shlq $32, %rsi
|
|
; X64-NEXT: cmpq %rsi, %rdi
|
|
; X64-NEXT: setb %al
|
|
; X64-NEXT: seto %dl
|
|
; X64-NEXT: orb %al, %dl
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%1 = shl i64 %a0, 32
|
|
%2 = shl i64 %a1, 32
|
|
%u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2)
|
|
%uval = extractvalue {i64, i1} %u, 0
|
|
%uovf = extractvalue {i64, i1} %u, 1
|
|
%s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2)
|
|
%sval = extractvalue {i64, i1} %s, 0
|
|
%sovf = extractvalue {i64, i1} %s, 1
|
|
%sum = add i64 %uval, %sval
|
|
%3 = trunc i64 %sum to i32
|
|
%4 = or i1 %uovf, %sovf
|
|
%ret0 = insertvalue {i32, i1} undef, i32 %3, 0
|
|
%ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
|
|
ret {i32, i1} %ret1
|
|
}
|
|
|
|
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
|
|
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
|
|
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
|
|
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
|
|
|
|
define i32 @knownbits_fshl(i32 %a0) nounwind {
|
|
; X32-LABEL: knownbits_fshl:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: movl $3, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_fshl:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $3, %eax
|
|
; X64-NEXT: retq
|
|
%1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
|
|
%2 = and i32 %1, 3
|
|
ret i32 %2
|
|
}
|
|
|
|
define i32 @knownbits_fshr(i32 %a0) nounwind {
|
|
; X32-LABEL: knownbits_fshr:
|
|
; X32: # %bb.0:
|
|
; X32-NEXT: movl $3, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: knownbits_fshr:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl $3, %eax
|
|
; X64-NEXT: retq
|
|
%1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
|
|
%2 = and i32 %1, 3
|
|
ret i32 %2
|
|
}
|
|
|
|
declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
|
|
declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone
|