1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86] Use MOVZX instead of MOVSX in f16_to_fp isel patterns.

Using sign extend forces the adjacent element to either all zeros
or all ones. But all ones is a NAN. So that doesn't seem like a
great idea.

Trying to work on supporting this with strict FP where NAN would
definitely be bad.
This commit is contained in:
Craig Topper 2020-02-09 18:35:57 -08:00
parent 93b04ecdf3
commit 9d2a7779c9
8 changed files with 252 additions and 252 deletions

View File

@ -8699,7 +8699,7 @@ let Predicates = [HasVLX] in {
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
(v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
(v8i16 (COPY_TO_REGCLASS (MOVZX32rr16 GR16:$src), VR128X)))), FR32X)) >;
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
(f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr

View File

@ -7400,7 +7400,7 @@ let Predicates = [HasF16C, NoVLX] in {
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr
(v4i32 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)))), FR32)) >;
(v4i32 (COPY_TO_REGCLASS (MOVZX32rr16 GR16:$src), VR128)))), FR32)) >;
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
(f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr

View File

@ -2264,7 +2264,7 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
; KNL-LABEL: test_concat_v2i1:
; KNL: ## %bb.0:
; KNL-NEXT: movswl 2(%rdi), %eax
; KNL-NEXT: movzwl 2(%rdi), %eax
; KNL-NEXT: vmovd %eax, %xmm0
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@ -2272,7 +2272,7 @@ define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %
; KNL-NEXT: setb %al
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: movswl (%rdi), %eax
; KNL-NEXT: movzwl (%rdi), %eax
; KNL-NEXT: vmovd %eax, %xmm2
; KNL-NEXT: vcvtph2ps %xmm2, %xmm2
; KNL-NEXT: vucomiss %xmm1, %xmm2
@ -2312,7 +2312,7 @@ define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %
;
; SKX-LABEL: test_concat_v2i1:
; SKX: ## %bb.0:
; SKX-NEXT: movswl 2(%rdi), %eax
; SKX-NEXT: movzwl 2(%rdi), %eax
; SKX-NEXT: vmovd %eax, %xmm0
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@ -2320,7 +2320,7 @@ define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %
; SKX-NEXT: setb %al
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: kshiftlb $1, %k0, %k0
; SKX-NEXT: movswl (%rdi), %eax
; SKX-NEXT: movzwl (%rdi), %eax
; SKX-NEXT: vmovd %eax, %xmm2
; SKX-NEXT: vcvtph2ps %xmm2, %xmm2
; SKX-NEXT: vucomiss %xmm1, %xmm2

View File

@ -1432,7 +1432,7 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL: ## %bb.0: ## %entry
; KNL-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
; KNL-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
; KNL-NEXT: movswl %cx, %ecx ## encoding: [0x0f,0xbf,0xc9]
; KNL-NEXT: movzwl %cx, %ecx ## encoding: [0x0f,0xb7,0xc9]
; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@ -1442,7 +1442,7 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
; KNL-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
; KNL-NEXT: cwtl ## encoding: [0x98]
; KNL-NEXT: movzwl %ax, %eax ## encoding: [0x0f,0xb7,0xc0]
; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
@ -1465,7 +1465,7 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; AVX512BW: ## %bb.0: ## %entry
; AVX512BW-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
; AVX512BW-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
; AVX512BW-NEXT: movswl %cx, %ecx ## encoding: [0x0f,0xbf,0xc9]
; AVX512BW-NEXT: movzwl %cx, %ecx ## encoding: [0x0f,0xb7,0xc9]
; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@ -1475,7 +1475,7 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; AVX512BW-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
; AVX512BW-NEXT: cwtl ## encoding: [0x98]
; AVX512BW-NEXT: movzwl %ax, %eax ## encoding: [0x0f,0xb7,0xc0]
; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
@ -1497,7 +1497,7 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; SKX: ## %bb.0: ## %entry
; SKX-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
; SKX-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
; SKX-NEXT: movswl %cx, %ecx ## encoding: [0x0f,0xbf,0xc9]
; SKX-NEXT: movzwl %cx, %ecx ## encoding: [0x0f,0xb7,0xc9]
; SKX-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
@ -1507,7 +1507,7 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; SKX-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; SKX-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
; SKX-NEXT: kshiftlb $1, %k0, %k0 ## encoding: [0xc4,0xe3,0x79,0x32,0xc0,0x01]
; SKX-NEXT: cwtl ## encoding: [0x98]
; SKX-NEXT: movzwl %ax, %eax ## encoding: [0x0f,0xb7,0xc0]
; SKX-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]

View File

@ -79,7 +79,7 @@ define float @test_extend32(half* %addr) #0 {
;
; BWON-F16C-LABEL: test_extend32:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: movswl (%rdi), %eax
; BWON-F16C-NEXT: movzwl (%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: retq
@ -110,7 +110,7 @@ define double @test_extend64(half* %addr) #0 {
;
; BWON-F16C-LABEL: test_extend64:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: movswl (%rdi), %eax
; BWON-F16C-NEXT: movzwl (%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
@ -203,7 +203,7 @@ define i64 @test_fptosi_i64(half* %p) #0 {
;
; BWON-F16C-LABEL: test_fptosi_i64:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: movswl (%rdi), %eax
; BWON-F16C-NEXT: movzwl (%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax
@ -285,7 +285,7 @@ define i64 @test_fptoui_i64(half* %p) #0 {
;
; BWON-F16C-LABEL: test_fptoui_i64:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: movswl (%rdi), %eax
; BWON-F16C-NEXT: movzwl (%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@ -423,21 +423,21 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
; BWON-F16C-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; BWON-F16C-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1
; BWON-F16C-NEXT: vpextrw $1, %xmm1, %eax
; BWON-F16C-NEXT: cwtl
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vmovd %xmm1, %eax
; BWON-F16C-NEXT: cwtl
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: cwtl
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; BWON-F16C-NEXT: vpextrw $1, %xmm0, %eax
; BWON-F16C-NEXT: cwtl
; BWON-F16C-NEXT: movzwl %ax, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
@ -530,20 +530,20 @@ define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
;
; BWON-F16C-LABEL: test_extend64_vec4:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: movswl 6(%rdi), %eax
; BWON-F16C-NEXT: movzwl 6(%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; BWON-F16C-NEXT: movswl 4(%rdi), %eax
; BWON-F16C-NEXT: movzwl 4(%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; BWON-F16C-NEXT: movswl 2(%rdi), %eax
; BWON-F16C-NEXT: movzwl 2(%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; BWON-F16C-NEXT: movswl (%rdi), %eax
; BWON-F16C-NEXT: movzwl (%rdi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm2
; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
@ -929,7 +929,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: movswl (%rsi), %eax
; BWON-F16C-NEXT: movzwl (%rsi), %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
@ -991,7 +991,7 @@ define half @PR40273(half) #0 {
;
; BWON-F16C-LABEL: PR40273:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: movswl %di, %eax
; BWON-F16C-NEXT: movzwl %di, %eax
; BWON-F16C-NEXT: vmovd %eax, %xmm0
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; BWON-F16C-NEXT: xorl %eax, %eax

View File

@ -42,10 +42,10 @@ define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
;
; F16C-LABEL: ir_fadd_v1f16:
; F16C: # %bb.0:
; F16C-NEXT: movswl %si, %eax
; F16C-NEXT: movzwl %si, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movswl %di, %eax
; F16C-NEXT: movzwl %di, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
@ -147,20 +147,20 @@ define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
;
; F16C-LABEL: ir_fadd_v2f16:
; F16C: # %bb.0:
; F16C-NEXT: movswl %cx, %eax
; F16C-NEXT: movzwl %cx, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movswl %si, %eax
; F16C-NEXT: movzwl %si, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; F16C-NEXT: movswl %dx, %eax
; F16C-NEXT: movzwl %dx, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: movswl %di, %eax
; F16C-NEXT: movzwl %di, %eax
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0

View File

@ -2195,11 +2195,11 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
;
; AVX512-LABEL: fptosi_2f16_to_4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: movswl %di, %eax
; AVX512-NEXT: movzwl %di, %eax
; AVX512-NEXT: vmovd %eax, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttss2si %xmm0, %eax
; AVX512-NEXT: movswl %si, %ecx
; AVX512-NEXT: movzwl %si, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttss2si %xmm0, %ecx

File diff suppressed because it is too large Load Diff