mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 03:23:01 +02:00
d97d025eba
be non-optimal. To be precise, we should avoid folding loads if the instructions only update part of the destination register, and the non-updated part is not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks the partial register dependency and it can improve performance. e.g. movss (%rdi), %xmm0 cvtss2sd %xmm0, %xmm0 instead of cvtss2sd (%rdi), %xmm0 An alternative method to break dependency is to clear the register first. e.g. xorps %xmm0, %xmm0 cvtss2sd (%rdi), %xmm0 llvm-svn: 91672
29 lines
703 B
LLVM
29 lines
703 B
LLVM
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES
|
|
; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO
|
|
|
|
define double @t1(float* nocapture %x) nounwind readonly ssp {
|
|
entry:
|
|
; YES: t1:
|
|
; YES: movss (%rdi), %xmm0
|
|
; YES; cvtss2sd %xmm0, %xmm0
|
|
|
|
; NO: t1:
|
|
; NO; cvtss2sd (%rdi), %xmm0
|
|
%0 = load float* %x, align 4
|
|
%1 = fpext float %0 to double
|
|
ret double %1
|
|
}
|
|
|
|
define float @t2(double* nocapture %x) nounwind readonly ssp {
|
|
entry:
|
|
; YES: t2:
|
|
; YES: movsd (%rdi), %xmm0
|
|
; YES; cvtsd2ss %xmm0, %xmm0
|
|
|
|
; NO: t2:
|
|
; NO; cvtsd2ss (%rdi), %xmm0
|
|
%0 = load double* %x, align 8
|
|
%1 = fptrunc double %0 to float
|
|
ret float %1
|
|
}
|