1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/fcmp-constant.ll
Sanjay Patel 4a6def05d5 [x86] favor vector constant load to avoid GPR to XMM transfer, part 2
This replaces the build_vector lowering code that was just added in
D80013
and matches the pattern later from the x86-specific "vzext_movl".
That seems to result in the same or better improvements and gets rid
of the 'TODO' items from that patch.

AFAICT, we always shrink wider constant vectors to 128-bit on these
patterns, so we still get the implicit zero-extension to ymm/zmm
without wasting space on larger vector constants. There's a trade-off
there because that means we miss potential load-folding.

Similarly, we could load scalar constants here with implicit
zero-extension even to 128-bit. That saves constant space, but it
means we forego load-folding, and so it increases register pressure.
This seems like a good middle-ground between those 2 options.

Differential Revision: https://reviews.llvm.org/D80131
2020-05-25 08:01:48 -04:00

259 lines
8.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
;
; fcmp oeq
;
define <2 x i64> @fcmp_oeq_v2f64() {
; CHECK-LABEL: fcmp_oeq_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp oeq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_oeq_v2f64_undef() {
; CHECK-LABEL: fcmp_oeq_v2f64_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp oeq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_oeq_v2f64_undef_elt() {
; CHECK-LABEL: fcmp_oeq_v2f64_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp oeq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <4 x i32> @fcmp_oeq_v4f32() {
; CHECK-LABEL: fcmp_oeq_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp oeq <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_oeq_v4f32_undef() {
; CHECK-LABEL: fcmp_oeq_v4f32_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp oeq <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_oeq_v4f32_undef_elt() {
; CHECK-LABEL: fcmp_oeq_v4f32_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,4294967295,0]
; CHECK-NEXT: retq
%1 = fcmp oeq <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
;
; fcmp ueq
;
define <2 x i64> @fcmp_ueq_v2f64() {
; CHECK-LABEL: fcmp_ueq_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_ueq_v2f64_undef() {
; CHECK-LABEL: fcmp_ueq_v2f64_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_ueq_v2f64_undef_elt() {
; CHECK-LABEL: fcmp_ueq_v2f64_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,0]
; CHECK-NEXT: retq
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <4 x i32> @fcmp_ueq_v4f32() {
; CHECK-LABEL: fcmp_ueq_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp ueq <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_ueq_v4f32_undef() {
; CHECK-LABEL: fcmp_ueq_v4f32_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp ueq <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_ueq_v4f32_undef_elt() {
; CHECK-LABEL: fcmp_ueq_v4f32_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp ueq <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
;
; fcmp false
;
define <2 x i64> @fcmp_false_v2f64() {
; CHECK-LABEL: fcmp_false_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp false <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_false_v2f64_undef() {
; CHECK-LABEL: fcmp_false_v2f64_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp false <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_false_v2f64_undef_elt() {
; CHECK-LABEL: fcmp_false_v2f64_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp false <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <4 x i32> @fcmp_false_v4f32() {
; CHECK-LABEL: fcmp_false_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp false <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_false_v4f32_undef() {
; CHECK-LABEL: fcmp_false_v4f32_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp false <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_false_v4f32_undef_elt() {
; CHECK-LABEL: fcmp_false_v4f32_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp false <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
;
; fcmp true
;
define <2 x i64> @fcmp_true_v2f64() {
; CHECK-LABEL: fcmp_true_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp true <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_true_v2f64_undef() {
; CHECK-LABEL: fcmp_true_v2f64_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp true <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @fcmp_true_v2f64_undef_elt() {
; CHECK-LABEL: fcmp_true_v2f64_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp true <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
%2 = sext <2 x i1> %1 to <2 x i64>
ret <2 x i64> %2
}
define <4 x i32> @fcmp_true_v4f32() {
; CHECK-LABEL: fcmp_true_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp true <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_true_v4f32_undef() {
; CHECK-LABEL: fcmp_true_v4f32_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp true <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}
define <4 x i32> @fcmp_true_v4f32_undef_elt() {
; CHECK-LABEL: fcmp_true_v4f32_undef_elt:
; CHECK: # %bb.0:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = fcmp true <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
%2 = sext <4 x i1> %1 to <4 x i32>
ret <4 x i32> %2
}