mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
d654e7d40c
Enable enableMultipleCopyHints() on X86. Original Patch by @jonpa: While enabling the mischeduler for SystemZ, it was discovered that for some reason a test needed one extra seemingly needless COPY (test/CodeGen/SystemZ/call-03.ll). The handling for that is resulted in this patch, which improves the register coalescing by providing not just one copy hint, but a sorted list of copy hints. On SystemZ, this gives ~12500 less register moves on SPEC, as well as marginally less spilling. Instead of improving just the SystemZ backend, the improvement has been implemented in common-code (calculateSpillWeightAndHint(). This gives a lot of test failures, but since this should be a general improvement I hope that the involved targets will help and review the test updates. Differential Revision: https://reviews.llvm.org/D38128 llvm-svn: 342578
165 lines
5.2 KiB
LLVM
165 lines
5.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
|
|
|
|
define i128 @sub128(i128 %a, i128 %b) nounwind {
|
|
; CHECK-LABEL: sub128:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
; CHECK-NEXT: subq %rdx, %rax
|
|
; CHECK-NEXT: sbbq %rcx, %rsi
|
|
; CHECK-NEXT: movq %rsi, %rdx
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = sub i128 %a, %b
|
|
ret i128 %0
|
|
}
|
|
|
|
define i256 @sub256(i256 %a, i256 %b) nounwind {
|
|
; CHECK-LABEL: sub256:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
; CHECK-NEXT: subq %r9, %rsi
|
|
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
|
|
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
|
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
|
|
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
|
; CHECK-NEXT: movq %rsi, (%rdi)
|
|
; CHECK-NEXT: movq %rcx, 16(%rdi)
|
|
; CHECK-NEXT: movq %r8, 24(%rdi)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = sub i256 %a, %b
|
|
ret i256 %0
|
|
}
|
|
|
|
%S = type { [4 x i64] }
|
|
|
|
define %S @negate(%S* nocapture readonly %this) {
|
|
; CHECK-LABEL: negate:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
; CHECK-NEXT: xorl %r8d, %r8d
|
|
; CHECK-NEXT: xorl %edx, %edx
|
|
; CHECK-NEXT: subq (%rsi), %rdx
|
|
; CHECK-NEXT: movl $0, %edi
|
|
; CHECK-NEXT: sbbq 8(%rsi), %rdi
|
|
; CHECK-NEXT: movl $0, %ecx
|
|
; CHECK-NEXT: sbbq 16(%rsi), %rcx
|
|
; CHECK-NEXT: sbbq 24(%rsi), %r8
|
|
; CHECK-NEXT: movq %rdx, (%rax)
|
|
; CHECK-NEXT: movq %rdi, 8(%rax)
|
|
; CHECK-NEXT: movq %rcx, 16(%rax)
|
|
; CHECK-NEXT: movq %r8, 24(%rax)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0
|
|
%1 = load i64, i64* %0, align 8
|
|
%2 = xor i64 %1, -1
|
|
%3 = zext i64 %2 to i128
|
|
%4 = add nuw nsw i128 %3, 1
|
|
%5 = trunc i128 %4 to i64
|
|
%6 = lshr i128 %4, 64
|
|
%7 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 1
|
|
%8 = load i64, i64* %7, align 8
|
|
%9 = xor i64 %8, -1
|
|
%10 = zext i64 %9 to i128
|
|
%11 = add nuw nsw i128 %6, %10
|
|
%12 = trunc i128 %11 to i64
|
|
%13 = lshr i128 %11, 64
|
|
%14 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 2
|
|
%15 = load i64, i64* %14, align 8
|
|
%16 = xor i64 %15, -1
|
|
%17 = zext i64 %16 to i128
|
|
%18 = add nuw nsw i128 %13, %17
|
|
%19 = lshr i128 %18, 64
|
|
%20 = trunc i128 %18 to i64
|
|
%21 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 3
|
|
%22 = load i64, i64* %21, align 8
|
|
%23 = xor i64 %22, -1
|
|
%24 = zext i64 %23 to i128
|
|
%25 = add nuw nsw i128 %19, %24
|
|
%26 = trunc i128 %25 to i64
|
|
%27 = insertvalue [4 x i64] undef, i64 %5, 0
|
|
%28 = insertvalue [4 x i64] %27, i64 %12, 1
|
|
%29 = insertvalue [4 x i64] %28, i64 %20, 2
|
|
%30 = insertvalue [4 x i64] %29, i64 %26, 3
|
|
%31 = insertvalue %S undef, [4 x i64] %30, 0
|
|
ret %S %31
|
|
}
|
|
|
|
define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
|
|
; CHECK-LABEL: sub:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
; CHECK-NEXT: notq %rdx
|
|
; CHECK-NEXT: xorl %edi, %edi
|
|
; CHECK-NEXT: addq (%rsi), %rdx
|
|
; CHECK-NEXT: setb %dil
|
|
; CHECK-NEXT: addq $1, %rdx
|
|
; CHECK-NEXT: adcq 8(%rsi), %rdi
|
|
; CHECK-NEXT: setb %r10b
|
|
; CHECK-NEXT: movzbl %r10b, %r10d
|
|
; CHECK-NEXT: notq %rcx
|
|
; CHECK-NEXT: addq %rdi, %rcx
|
|
; CHECK-NEXT: adcq 16(%rsi), %r10
|
|
; CHECK-NEXT: setb %dil
|
|
; CHECK-NEXT: movzbl %dil, %edi
|
|
; CHECK-NEXT: notq %r8
|
|
; CHECK-NEXT: addq %r10, %r8
|
|
; CHECK-NEXT: adcq 24(%rsi), %rdi
|
|
; CHECK-NEXT: notq %r9
|
|
; CHECK-NEXT: addq %rdi, %r9
|
|
; CHECK-NEXT: movq %rdx, (%rax)
|
|
; CHECK-NEXT: movq %rcx, 8(%rax)
|
|
; CHECK-NEXT: movq %r8, 16(%rax)
|
|
; CHECK-NEXT: movq %r9, 24(%rax)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = extractvalue %S %arg.b, 0
|
|
%.elt6 = extractvalue [4 x i64] %0, 1
|
|
%.elt8 = extractvalue [4 x i64] %0, 2
|
|
%.elt10 = extractvalue [4 x i64] %0, 3
|
|
%.elt = extractvalue [4 x i64] %0, 0
|
|
%1 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 0
|
|
%2 = load i64, i64* %1, align 8
|
|
%3 = zext i64 %2 to i128
|
|
%4 = add nuw nsw i128 %3, 1
|
|
%5 = xor i64 %.elt, -1
|
|
%6 = zext i64 %5 to i128
|
|
%7 = add nuw nsw i128 %4, %6
|
|
%8 = trunc i128 %7 to i64
|
|
%9 = lshr i128 %7, 64
|
|
%10 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 1
|
|
%11 = load i64, i64* %10, align 8
|
|
%12 = zext i64 %11 to i128
|
|
%13 = add nuw nsw i128 %9, %12
|
|
%14 = xor i64 %.elt6, -1
|
|
%15 = zext i64 %14 to i128
|
|
%16 = add nuw nsw i128 %13, %15
|
|
%17 = trunc i128 %16 to i64
|
|
%18 = lshr i128 %16, 64
|
|
%19 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 2
|
|
%20 = load i64, i64* %19, align 8
|
|
%21 = zext i64 %20 to i128
|
|
%22 = add nuw nsw i128 %18, %21
|
|
%23 = xor i64 %.elt8, -1
|
|
%24 = zext i64 %23 to i128
|
|
%25 = add nuw nsw i128 %22, %24
|
|
%26 = lshr i128 %25, 64
|
|
%27 = trunc i128 %25 to i64
|
|
%28 = getelementptr inbounds %S, %S* %this, i64 0, i32 0, i64 3
|
|
%29 = load i64, i64* %28, align 8
|
|
%30 = zext i64 %29 to i128
|
|
%31 = add nuw nsw i128 %26, %30
|
|
%32 = xor i64 %.elt10, -1
|
|
%33 = zext i64 %32 to i128
|
|
%34 = add nuw nsw i128 %31, %33
|
|
%35 = trunc i128 %34 to i64
|
|
%36 = insertvalue [4 x i64] undef, i64 %8, 0
|
|
%37 = insertvalue [4 x i64] %36, i64 %17, 1
|
|
%38 = insertvalue [4 x i64] %37, i64 %27, 2
|
|
%39 = insertvalue [4 x i64] %38, i64 %35, 3
|
|
%40 = insertvalue %S undef, [4 x i64] %39, 0
|
|
ret %S %40
|
|
}
|