1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 04:22:57 +02:00
llvm-mirror/test/CodeGen/X86/widen_bitops-0.ll
Simon Pilgrim d9c22f7404 [X86][SSE] Attempt to break register dependencies during lowerBuildVector
LowerBuildVectorv16i8/LowerBuildVectorv8i16 insert values into a UNDEF vector if the build vector doesn't contain any zero elements, resulting in register dependencies with a previous use of the register.

This patch attempts to break the register dependency by either always zeroing the vector before hand or (if we're inserting to the 0'th element) by using VZEXT_MOVL(SCALAR_TO_VECTOR(i32 AEXT(Elt))) which lowers to (V)MOVD and performs a similar function. Additionally (V)MOVD is a shorter instruction than PINSRB/PINSRW. We already do something similar for SSE41 PINSRD.

On pre-SSE41 LowerBuildVectorv16i8 we go a little further and use VZEXT_MOVL(SCALAR_TO_VECTOR(i32 ZEXT(Elt))) if the build vector contains zeros to avoid the vector zeroing at the cost of a scalar zero extension, which can probably be brought over to the other cases in a future patch in some cases (load folding etc.)

Differential Revision: https://reviews.llvm.org/D29720

llvm-svn: 294581
2017-02-09 11:50:19 +00:00

308 lines
9.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
;
; AND/XOR/OR i24 as v3i8
;
define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: and_i24_as_v3i8:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i24_as_v3i8:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: andl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <3 x i8>
%2 = bitcast i24 %b to <3 x i8>
%3 = and <3 x i8> %1, %2
%4 = bitcast <3 x i8> %3 to i24
ret i24 %4
}
define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: xor_i24_as_v3i8:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i24_as_v3i8:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: xorl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <3 x i8>
%2 = bitcast i24 %b to <3 x i8>
%3 = xor <3 x i8> %1, %2
%4 = bitcast <3 x i8> %3 to i24
ret i24 %4
}
define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: or_i24_as_v3i8:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i24_as_v3i8:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: orl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <3 x i8>
%2 = bitcast i24 %b to <3 x i8>
%3 = or <3 x i8> %1, %2
%4 = bitcast <3 x i8> %3 to i24
ret i24 %4
}
;
; AND/XOR/OR i24 as v8i3
;
define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: and_i24_as_v8i3:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i24_as_v8i3:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: andl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
%3 = and <8 x i3> %1, %2
%4 = bitcast <8 x i3> %3 to i24
ret i24 %4
}
define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: xor_i24_as_v8i3:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i24_as_v8i3:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: xorl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
%3 = xor <8 x i3> %1, %2
%4 = bitcast <8 x i3> %3 to i24
ret i24 %4
}
define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: or_i24_as_v8i3:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i24_as_v8i3:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: orl %esi, %edi
; X64-SSE-NEXT: movl %edi, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
%3 = or <8 x i3> %1, %2
%4 = bitcast <8 x i3> %3 to i24
ret i24 %4
}
;
; AND/XOR/OR v3i8 as i24
;
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: and_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pand %xmm0, %xmm1
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v3i8_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
; X64-SSE-NEXT: movd %edi, %xmm1
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
; X64-SSE-NEXT: pand %xmm0, %xmm1
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
%3 = and i24 %1, %2
%4 = bitcast i24 %3 to <3 x i8>
ret <3 x i8> %4
}
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: xor_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pxor %xmm0, %xmm1
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v3i8_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
; X64-SSE-NEXT: movd %edi, %xmm1
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
; X64-SSE-NEXT: pxor %xmm0, %xmm1
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
%3 = xor i24 %1, %2
%4 = bitcast i24 %3 to <3 x i8>
ret <3 x i8> %4
}
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: or_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1
; X32-SSE-NEXT: por %xmm0, %xmm1
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v3i8_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
; X64-SSE-NEXT: movd %edi, %xmm1
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
%3 = or i24 %1, %2
%4 = bitcast i24 %3 to <3 x i8>
ret <3 x i8> %4
}
;
; AND/XOR/OR v8i3 as i24
;
define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: and_v8i3_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: andps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v8i3_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: andps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i3> %a to i24
%2 = bitcast <8 x i3> %b to i24
%3 = and i24 %1, %2
%4 = bitcast i24 %3 to <8 x i3>
ret <8 x i3> %4
}
define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: xor_v8i3_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: xorps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v8i3_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: xorps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i3> %a to i24
%2 = bitcast <8 x i3> %b to i24
%3 = xor i24 %1, %2
%4 = bitcast i24 %3 to <8 x i3>
ret <8 x i3> %4
}
define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: or_v8i3_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: orps %xmm1, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v8i3_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: orps %xmm1, %xmm0
; X64-SSE-NEXT: retq
%1 = bitcast <8 x i3> %a to i24
%2 = bitcast <8 x i3> %b to i24
%3 = or i24 %1, %2
%4 = bitcast i24 %3 to <8 x i3>
ret <8 x i3> %4
}