1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-25 05:52:53 +02:00
llvm-mirror/test/CodeGen/X86/peephole-cvt-sse.ll
Andrey Turetskiy c010154611 [X86] Loosen memory folding requirements for cvtdq2pd and cvtps2pd instructions.
According to spec cvtdq2pd and cvtps2pd instructions don't require memory operand to be aligned
to 16 bytes. This patch removes this requirement from the memory folding table.

Differential Revision: https://reviews.llvm.org/D23919

llvm-svn: 280402
2016-09-01 18:50:02 +00:00

40 lines
1.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-pc-linux -mattr=+sse4.2 < %s | FileCheck %s --check-prefix=X86-64
; RUN: llc -mtriple=i386-pc-linux -mattr=+sse4.2 < %s | FileCheck %s --check-prefix=I386
; Check that unaligned loads merge with cvtdq2pd and cvtps2pd.
define <2 x double> @peephole_cvtps2pd(<4 x float>* %a0) {
; X86-64-LABEL: peephole_cvtps2pd:
; X86-64: # BB#0:
; X86-64-NEXT: cvtps2pd (%rdi), %xmm0
; X86-64-NEXT: retq
;
; I386-LABEL: peephole_cvtps2pd:
; I386: # BB#0:
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-NEXT: cvtps2pd (%eax), %xmm0
; I386-NEXT: retl
%1 = load <4 x float>, <4 x float>* %a0, align 1
%2 = shufflevector <4 x float> %1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%3 = fpext <2 x float> %2 to <2 x double>
ret <2 x double> %3
}
define <2 x double> @peephole_cvtdq2pd(<4 x i32>* %a0) {
; X86-64-LABEL: peephole_cvtdq2pd:
; X86-64: # BB#0:
; X86-64-NEXT: cvtdq2pd (%rdi), %xmm0
; X86-64-NEXT: retq
;
; I386-LABEL: peephole_cvtdq2pd:
; I386: # BB#0:
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-NEXT: cvtdq2pd (%eax), %xmm0
; I386-NEXT: retl
%1 = load <4 x i32>, <4 x i32>* %a0, align 1
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%3 = sitofp <2 x i32> %2 to <2 x double>
ret <2 x double> %3
}