mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
4a6def05d5
This replaces the build_vector lowering code that was just added in D80013 and matches the pattern later from the x86-specific "vzext_movl". That seems to result in the same or better improvements and gets rid of the 'TODO' items from that patch. AFAICT, we always shrink wider constant vectors to 128-bit on these patterns, so we still get the implicit zero-extension to ymm/zmm without wasting space on larger vector constants. There's a trade-off there because that means we miss potential load-folding. Similarly, we could load scalar constants here with implicit zero-extension even to 128-bit. That saves constant space, but it means we forego load-folding, and so it increases register pressure. This seems like a good middle-ground between those 2 options. Differential Revision: https://reviews.llvm.org/D80131
259 lines
8.2 KiB
LLVM
259 lines
8.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
|
|
|
;
|
|
; fcmp oeq
|
|
;
|
|
|
|
define <2 x i64> @fcmp_oeq_v2f64() {
|
|
; CHECK-LABEL: fcmp_oeq_v2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp oeq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_oeq_v2f64_undef() {
|
|
; CHECK-LABEL: fcmp_oeq_v2f64_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp oeq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_oeq_v2f64_undef_elt() {
|
|
; CHECK-LABEL: fcmp_oeq_v2f64_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp oeq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_oeq_v4f32() {
|
|
; CHECK-LABEL: fcmp_oeq_v4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp oeq <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_oeq_v4f32_undef() {
|
|
; CHECK-LABEL: fcmp_oeq_v4f32_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp oeq <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_oeq_v4f32_undef_elt() {
|
|
; CHECK-LABEL: fcmp_oeq_v4f32_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,4294967295,0]
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp oeq <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
;
|
|
; fcmp ueq
|
|
;
|
|
|
|
define <2 x i64> @fcmp_ueq_v2f64() {
|
|
; CHECK-LABEL: fcmp_ueq_v2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_ueq_v2f64_undef() {
|
|
; CHECK-LABEL: fcmp_ueq_v2f64_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_ueq_v2f64_undef_elt() {
|
|
; CHECK-LABEL: fcmp_ueq_v2f64_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,0]
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_ueq_v4f32() {
|
|
; CHECK-LABEL: fcmp_ueq_v4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp ueq <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_ueq_v4f32_undef() {
|
|
; CHECK-LABEL: fcmp_ueq_v4f32_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp ueq <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_ueq_v4f32_undef_elt() {
|
|
; CHECK-LABEL: fcmp_ueq_v4f32_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp ueq <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
;
|
|
; fcmp false
|
|
;
|
|
|
|
define <2 x i64> @fcmp_false_v2f64() {
|
|
; CHECK-LABEL: fcmp_false_v2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp false <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_false_v2f64_undef() {
|
|
; CHECK-LABEL: fcmp_false_v2f64_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp false <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_false_v2f64_undef_elt() {
|
|
; CHECK-LABEL: fcmp_false_v2f64_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp false <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_false_v4f32() {
|
|
; CHECK-LABEL: fcmp_false_v4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp false <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_false_v4f32_undef() {
|
|
; CHECK-LABEL: fcmp_false_v4f32_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp false <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_false_v4f32_undef_elt() {
|
|
; CHECK-LABEL: fcmp_false_v4f32_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp false <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
;
|
|
; fcmp true
|
|
;
|
|
|
|
define <2 x i64> @fcmp_true_v2f64() {
|
|
; CHECK-LABEL: fcmp_true_v2f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp true <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_true_v2f64_undef() {
|
|
; CHECK-LABEL: fcmp_true_v2f64_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp true <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, undef
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @fcmp_true_v2f64_undef_elt() {
|
|
; CHECK-LABEL: fcmp_true_v2f64_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp true <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
|
|
%2 = sext <2 x i1> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_true_v4f32() {
|
|
; CHECK-LABEL: fcmp_true_v4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp true <4 x float> <float -0.0, float 1.0, float -1.0, float +2.0>, <float +0.0, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_true_v4f32_undef() {
|
|
; CHECK-LABEL: fcmp_true_v4f32_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp true <4 x float> <float 1.0, float -1.0, float +2.0, float -0.0>, undef
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
define <4 x i32> @fcmp_true_v4f32_undef_elt() {
|
|
; CHECK-LABEL: fcmp_true_v4f32_undef_elt:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = fcmp true <4 x float> <float -0.0, float 1.0, float -1.0, float undef>, <float undef, float 1.0, float -1.0, float +2.0>
|
|
%2 = sext <4 x i1> %1 to <4 x i32>
|
|
ret <4 x i32> %2
|
|
}
|