mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
fcbbc1dc0f
X86 matches several 'shift+xor' funnel shift patterns: fold (or (srl (srl x1, 1), (xor y, 31)), (shl x0, y)) -> (fshl x0, x1, y) fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) -> (fshr x0, x1, y) fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y)) -> (fshr x0, x1, y) These patterns are also what we end up with the proposed expansion changes in D77301. This patch moves these to DAGCombine's generic MatchFunnelPosNeg. All existing X86 test cases still pass, and we just have a small codegen change in pr32282.ll. Reviewed By: @spatel Differential Revision: https://reviews.llvm.org/D78935
88 lines
2.7 KiB
LLVM
88 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=X86
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=X64
|
|
|
|
; Check for assert in foldMaskAndShiftToScale due to out of range mask scaling.
|
|
|
|
@b = common global i8 zeroinitializer, align 1
|
|
@c = common global i8 zeroinitializer, align 1
|
|
@d = common global i64 zeroinitializer, align 8
|
|
@e = common global i64 zeroinitializer, align 8
|
|
|
|
define void @foo(i64 %x) nounwind {
|
|
; X86-LABEL: foo:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %eax
|
|
; X86-NEXT: movl d, %eax
|
|
; X86-NEXT: notl %eax
|
|
; X86-NEXT: movl d+4, %ecx
|
|
; X86-NEXT: notl %ecx
|
|
; X86-NEXT: andl $701685459, %ecx # imm = 0x29D2DED3
|
|
; X86-NEXT: andl $-566231040, %eax # imm = 0xDE400000
|
|
; X86-NEXT: shrdl $21, %ecx, %eax
|
|
; X86-NEXT: shrl $21, %ecx
|
|
; X86-NEXT: addl $7, %eax
|
|
; X86-NEXT: adcl $0, %ecx
|
|
; X86-NEXT: pushl %ecx
|
|
; X86-NEXT: pushl %eax
|
|
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
|
; X86-NEXT: pushl {{[0-9]+}}(%esp)
|
|
; X86-NEXT: calll __divdi3
|
|
; X86-NEXT: addl $16, %esp
|
|
; X86-NEXT: orl %eax, %edx
|
|
; X86-NEXT: setne {{[0-9]+}}(%esp)
|
|
; X86-NEXT: popl %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: foo:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq %rdi, %rax
|
|
; X64-NEXT: movq {{.*}}(%rip), %rcx
|
|
; X64-NEXT: movabsq $3013716102212485120, %rdx # imm = 0x29D2DED3DE400000
|
|
; X64-NEXT: andnq %rdx, %rcx, %rcx
|
|
; X64-NEXT: shrq $21, %rcx
|
|
; X64-NEXT: addq $7, %rcx
|
|
; X64-NEXT: movq %rdi, %rdx
|
|
; X64-NEXT: orq %rcx, %rdx
|
|
; X64-NEXT: shrq $32, %rdx
|
|
; X64-NEXT: je .LBB0_1
|
|
; X64-NEXT: # %bb.2:
|
|
; X64-NEXT: cqto
|
|
; X64-NEXT: idivq %rcx
|
|
; X64-NEXT: jmp .LBB0_3
|
|
; X64-NEXT: .LBB0_1:
|
|
; X64-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; X64-NEXT: xorl %edx, %edx
|
|
; X64-NEXT: divl %ecx
|
|
; X64-NEXT: # kill: def $eax killed $eax def $rax
|
|
; X64-NEXT: .LBB0_3:
|
|
; X64-NEXT: testq %rax, %rax
|
|
; X64-NEXT: setne -{{[0-9]+}}(%rsp)
|
|
; X64-NEXT: retq
|
|
%1 = alloca i8, align 1
|
|
%2 = load i64, i64* @d, align 8
|
|
%3 = or i64 -3013716102214263007, %2
|
|
%4 = xor i64 %3, -1
|
|
%5 = load i64, i64* @e, align 8
|
|
%6 = load i8, i8* @b, align 1
|
|
%7 = trunc i8 %6 to i1
|
|
%8 = zext i1 %7 to i64
|
|
%9 = xor i64 %5, %8
|
|
%10 = load i8, i8* @c, align 1
|
|
%11 = trunc i8 %10 to i1
|
|
%12 = zext i1 %11 to i32
|
|
%13 = or i32 551409149, %12
|
|
%14 = sub nsw i32 %13, 551409131
|
|
%15 = zext i32 %14 to i64
|
|
%16 = shl i64 %9, %15
|
|
%17 = sub nsw i64 %16, 223084523
|
|
%18 = ashr i64 %4, %17
|
|
%19 = and i64 %18, 9223372036854775806
|
|
%20 = add nsw i64 7, %19
|
|
%21 = sdiv i64 %x, %20
|
|
%22 = icmp ne i64 %21, 0
|
|
%23 = zext i1 %22 to i8
|
|
store i8 %23, i8* %1, align 1
|
|
ret void
|
|
}
|