mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
a8b807dced
Summary: The main motivation is shown by all these `neg` instructions that are now created. In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test. AArch64 test changes all look good (`neg` created), or neutral. X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created). I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill is now hoisted into preheader (which should still be good?), 2 4-byte reloads become 1 8-byte reload, and are elsewhere, but i'm not sure how that affects that loop. I'm unable to interpret AMDGPU change, looks neutral-ish? This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]]. https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later) This is a recommit, originally committed in rL361852, but reverted to investigate test-suite compile-time hangs, and then reverted in rL362109 to fix missing constant folds that were causing endless combine loops. Reviewers: craig.topper, RKSimon, spatel, arsenm Reviewed By: RKSimon Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62223 llvm-svn: 362142
82 lines
3.1 KiB
LLVM
82 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom | FileCheck %s
|
|
; <rdar://problem/8006248>
|
|
|
|
; This randomly started passing after an unrelated change, if it fails again it
|
|
; might be worth looking at PR12324: misched bringup.
|
|
|
|
@llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata"
|
|
|
|
define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
|
|
; CHECK-LABEL: func:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movslq (%rsi), %rax
|
|
; CHECK-NEXT: movl $4, %esi
|
|
; CHECK-NEXT: subq %rax, %rsi
|
|
; CHECK-NEXT: movq (%rdx), %rax
|
|
; CHECK-NEXT: movswl 8(%rdi), %edx
|
|
; CHECK-NEXT: movswl (%rax,%rsi,2), %eax
|
|
; CHECK-NEXT: movl $1, %esi
|
|
; CHECK-NEXT: imull %edx, %eax
|
|
; CHECK-NEXT: xorl %edx, %edx
|
|
; CHECK-NEXT: addl $2138875574, %eax # imm = 0x7F7CA6B6
|
|
; CHECK-NEXT: cmpl $-8608074, %eax # imm = 0xFF7CA6B6
|
|
; CHECK-NEXT: movslq %eax, %rdi
|
|
; CHECK-NEXT: setl %dl
|
|
; CHECK-NEXT: cmpl $2138875573, %eax # imm = 0x7F7CA6B5
|
|
; CHECK-NEXT: movq %rdi, %r8
|
|
; CHECK-NEXT: leal -1(%rdx,%rdx), %edx
|
|
; CHECK-NEXT: cmovlel %edx, %esi
|
|
; CHECK-NEXT: subq %rax, %r8
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: cmpl $1, %esi
|
|
; CHECK-NEXT: cmovneq %rax, %r8
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: cmovnsq %rax, %r8
|
|
; CHECK-NEXT: movq (%rcx), %rax
|
|
; CHECK-NEXT: subq %r8, %rdi
|
|
; CHECK-NEXT: leaq -2138875574(%rax,%rdi), %rax
|
|
; CHECK-NEXT: movq %rax, (%rcx)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%tmp103 = getelementptr inbounds [40 x i16], [40 x i16]* %a, i64 0, i64 4
|
|
%tmp104 = load i16, i16* %tmp103, align 2
|
|
%tmp105 = sext i16 %tmp104 to i32
|
|
%tmp106 = load i32, i32* %b, align 4
|
|
%tmp107 = sub nsw i32 4, %tmp106
|
|
%tmp108 = load i16*, i16** %c, align 8
|
|
%tmp109 = sext i32 %tmp107 to i64
|
|
%tmp110 = getelementptr inbounds i16, i16* %tmp108, i64 %tmp109
|
|
%tmp111 = load i16, i16* %tmp110, align 1
|
|
%tmp112 = sext i16 %tmp111 to i32
|
|
%tmp = mul i32 355244649, %tmp112
|
|
%tmp1 = mul i32 %tmp, %tmp105
|
|
%tmp2 = add i32 %tmp1, 2138875574
|
|
%tmp3 = add i32 %tmp2, 1546991088
|
|
%tmp4 = mul i32 %tmp3, 2122487257
|
|
%tmp5 = icmp sge i32 %tmp4, 2138875574
|
|
%tmp6 = icmp slt i32 %tmp4, -8608074
|
|
%tmp7 = or i1 %tmp5, %tmp6
|
|
%outSign = select i1 %tmp7, i32 1, i32 -1
|
|
%tmp8 = icmp slt i32 %tmp4, 0
|
|
%tmp9 = icmp eq i32 %outSign, 1
|
|
%tmp10 = and i1 %tmp8, %tmp9
|
|
%tmp11 = sext i32 %tmp4 to i64
|
|
%tmp12 = add i64 %tmp11, 5089792279245435153
|
|
%tmp13 = sub i64 %tmp12, 2138875574
|
|
%tmp14 = zext i32 %tmp4 to i64
|
|
%tmp15 = sub i64 %tmp11, %tmp14
|
|
%tmp16 = select i1 %tmp10, i64 %tmp15, i64 0
|
|
%tmp17 = sub i64 %tmp13, %tmp16
|
|
%tmp18 = mul i64 %tmp17, 4540133155013554595
|
|
%tmp19 = sub i64 %tmp18, 5386586244038704851
|
|
%tmp20 = add i64 %tmp19, -1368057358110947217
|
|
%tmp21 = mul i64 %tmp20, -422037402840850817
|
|
%tmp115 = load i64, i64* %d, align 8
|
|
%alphaX = mul i64 468858157810230901, %tmp21
|
|
%alphaXbetaY = add i64 %alphaX, %tmp115
|
|
%transformed = add i64 %alphaXbetaY, 9040145182981852475
|
|
store i64 %transformed, i64* %d, align 8
|
|
ret void
|
|
}
|