1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

[X86] Added extra widening tests for and/xor/or bit operations

To make sure we're dealing with both cases of legal/illegal number of vector elements and legal/illegal vector element types

llvm-svn: 265929
This commit is contained in:
Simon Pilgrim 2016-04-11 10:58:52 +00:00
parent 6a1e2f4957
commit 9a8e91143a

View File

@ -2,6 +2,10 @@
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
;
; AND/XOR/OR i24 as v3i8
;
define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: and_i24_as_v3i8:
; X32-SSE: # BB#0:
@ -89,6 +93,735 @@ define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind {
ret i24 %4
}
;
; AND/XOR/OR i24 as v8i3
;
define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: and_i24_as_v8i3:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pushl %ebp
; X32-SSE-NEXT: movl %esp, %ebp
; X32-SSE-NEXT: andl $-8, %esp
; X32-SSE-NEXT: subl $24, %esp
; X32-SSE-NEXT: movl 12(%ebp), %eax
; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: shrl $16, %eax
; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movl 8(%ebp), %eax
; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: shrl $16, %eax
; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $3, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: movl %eax, %edx
; X32-SSE-NEXT: andl $7, %edx
; X32-SSE-NEXT: movd %edx, %xmm1
; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $6, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $9, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $12, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
; X32-SSE-NEXT: shrl $15, %eax
; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $3, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: movl %eax, %edx
; X32-SSE-NEXT: andl $7, %edx
; X32-SSE-NEXT: movd %edx, %xmm0
; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $6, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $9, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $12, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
; X32-SSE-NEXT: shrl $15, %eax
; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
; X32-SSE-NEXT: pand %xmm1, %xmm0
; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-SSE-NEXT: shll $16, %ecx
; X32-SSE-NEXT: movzwl (%esp), %eax
; X32-SSE-NEXT: orl %ecx, %eax
; X32-SSE-NEXT: movl %ebp, %esp
; X32-SSE-NEXT: popl %ebp
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_i24_as_v8i3:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: shrl $16, %esi
; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: shrl $16, %edi
; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $3, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: movl %eax, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: movd %edx, %xmm0
; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $6, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $9, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $12, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
; X64-SSE-NEXT: shrl $15, %eax
; X64-SSE-NEXT: movzwl %ax, %eax
; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
; X64-SSE-NEXT: xorl %eax, %eax
; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $3, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: movl %ecx, %esi
; X64-SSE-NEXT: andl $7, %esi
; X64-SSE-NEXT: movd %esi, %xmm1
; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $6, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $9, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $12, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
; X64-SSE-NEXT: shrl $15, %ecx
; X64-SSE-NEXT: movzwl %cx, %ecx
; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
; X64-SSE-NEXT: pand %xmm0, %xmm1
; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE-NEXT: shll $16, %ecx
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: orl %ecx, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
%3 = and <8 x i3> %1, %2
%4 = bitcast <8 x i3> %3 to i24
ret i24 %4
}
define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: xor_i24_as_v8i3:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pushl %ebp
; X32-SSE-NEXT: movl %esp, %ebp
; X32-SSE-NEXT: andl $-8, %esp
; X32-SSE-NEXT: subl $24, %esp
; X32-SSE-NEXT: movl 12(%ebp), %eax
; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: shrl $16, %eax
; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movl 8(%ebp), %eax
; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: shrl $16, %eax
; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $3, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: movl %eax, %edx
; X32-SSE-NEXT: andl $7, %edx
; X32-SSE-NEXT: movd %edx, %xmm1
; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $6, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $9, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $12, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
; X32-SSE-NEXT: shrl $15, %eax
; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $3, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: movl %eax, %edx
; X32-SSE-NEXT: andl $7, %edx
; X32-SSE-NEXT: movd %edx, %xmm0
; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $6, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $9, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $12, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
; X32-SSE-NEXT: shrl $15, %eax
; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
; X32-SSE-NEXT: pxor %xmm1, %xmm0
; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-SSE-NEXT: shll $16, %ecx
; X32-SSE-NEXT: movzwl (%esp), %eax
; X32-SSE-NEXT: orl %ecx, %eax
; X32-SSE-NEXT: movl %ebp, %esp
; X32-SSE-NEXT: popl %ebp
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_i24_as_v8i3:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: shrl $16, %esi
; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: shrl $16, %edi
; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $3, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: movl %eax, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: movd %edx, %xmm0
; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $6, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $9, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $12, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
; X64-SSE-NEXT: shrl $15, %eax
; X64-SSE-NEXT: movzwl %ax, %eax
; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
; X64-SSE-NEXT: xorl %eax, %eax
; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $3, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: movl %ecx, %esi
; X64-SSE-NEXT: andl $7, %esi
; X64-SSE-NEXT: movd %esi, %xmm1
; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $6, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $9, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $12, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
; X64-SSE-NEXT: shrl $15, %ecx
; X64-SSE-NEXT: movzwl %cx, %ecx
; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
; X64-SSE-NEXT: pxor %xmm0, %xmm1
; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE-NEXT: shll $16, %ecx
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: orl %ecx, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
%3 = xor <8 x i3> %1, %2
%4 = bitcast <8 x i3> %3 to i24
ret i24 %4
}
define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
; X32-SSE-LABEL: or_i24_as_v8i3:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pushl %ebp
; X32-SSE-NEXT: movl %esp, %ebp
; X32-SSE-NEXT: andl $-8, %esp
; X32-SSE-NEXT: subl $24, %esp
; X32-SSE-NEXT: movl 12(%ebp), %eax
; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: shrl $16, %eax
; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movl 8(%ebp), %eax
; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: shrl $16, %eax
; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $3, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: movl %eax, %edx
; X32-SSE-NEXT: andl $7, %edx
; X32-SSE-NEXT: movd %edx, %xmm1
; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $6, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $9, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $12, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
; X32-SSE-NEXT: shrl $15, %eax
; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $3, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: movl %eax, %edx
; X32-SSE-NEXT: andl $7, %edx
; X32-SSE-NEXT: movd %edx, %xmm0
; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $6, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $9, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
; X32-SSE-NEXT: movl %eax, %ecx
; X32-SSE-NEXT: shrl $12, %ecx
; X32-SSE-NEXT: andl $7, %ecx
; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
; X32-SSE-NEXT: shrl $15, %eax
; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
; X32-SSE-NEXT: por %xmm1, %xmm0
; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: andl $15, %eax
; X32-SSE-NEXT: movb %al, (%esp)
; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-SSE-NEXT: shll $16, %ecx
; X32-SSE-NEXT: movzwl (%esp), %eax
; X32-SSE-NEXT: orl %ecx, %eax
; X32-SSE-NEXT: movl %ebp, %esp
; X32-SSE-NEXT: popl %ebp
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_i24_as_v8i3:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: shrl $16, %esi
; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: shrl $16, %edi
; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $3, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: movl %eax, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: movd %edx, %xmm0
; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $6, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $9, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
; X64-SSE-NEXT: movl %eax, %ecx
; X64-SSE-NEXT: shrl $12, %ecx
; X64-SSE-NEXT: andl $7, %ecx
; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
; X64-SSE-NEXT: shrl $15, %eax
; X64-SSE-NEXT: movzwl %ax, %eax
; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
; X64-SSE-NEXT: xorl %eax, %eax
; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $3, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: movl %ecx, %esi
; X64-SSE-NEXT: andl $7, %esi
; X64-SSE-NEXT: movd %esi, %xmm1
; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $6, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $9, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
; X64-SSE-NEXT: movl %ecx, %edx
; X64-SSE-NEXT: shrl $12, %edx
; X64-SSE-NEXT: andl $7, %edx
; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
; X64-SSE-NEXT: shrl $15, %ecx
; X64-SSE-NEXT: movzwl %cx, %ecx
; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: andl $15, %eax
; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-SSE-NEXT: shll $16, %ecx
; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSE-NEXT: orl %ecx, %eax
; X64-SSE-NEXT: retq
%1 = bitcast i24 %a to <8 x i3>
%2 = bitcast i24 %b to <8 x i3>
%3 = or <8 x i3> %1, %2
%4 = bitcast <8 x i3> %3 to i24
ret i24 %4
}
;
; AND/XOR/OR v3i8 as i24
;
define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: and_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: subl $12, %esp
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd %xmm0, %ecx
; X32-SSE-NEXT: andl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm0
; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
; X32-SSE-NEXT: addl $12, %esp
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v3i8_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
; X64-SSE-NEXT: pshufb %xmm1, %xmm0
; X64-SSE-NEXT: movd %xmm0, %eax
; X64-SSE-NEXT: movd %edi, %xmm0
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
; X64-SSE-NEXT: pshufb %xmm1, %xmm0
; X64-SSE-NEXT: movd %xmm0, %ecx
; X64-SSE-NEXT: andl %eax, %ecx
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
%3 = and i24 %1, %2
%4 = bitcast i24 %3 to <3 x i8>
ret <3 x i8> %4
}
define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: xor_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: subl $12, %esp
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd %xmm0, %ecx
; X32-SSE-NEXT: xorl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm0
; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
; X32-SSE-NEXT: addl $12, %esp
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v3i8_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
; X64-SSE-NEXT: pshufb %xmm1, %xmm0
; X64-SSE-NEXT: movd %xmm0, %eax
; X64-SSE-NEXT: movd %edi, %xmm0
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
; X64-SSE-NEXT: pshufb %xmm1, %xmm0
; X64-SSE-NEXT: movd %xmm0, %ecx
; X64-SSE-NEXT: xorl %eax, %ecx
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
%3 = xor i24 %1, %2
%4 = bitcast i24 %3 to <3 x i8>
ret <3 x i8> %4
}
define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-LABEL: or_v3i8_as_i24:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: subl $12, %esp
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd %xmm0, %eax
; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; X32-SSE-NEXT: movd %xmm0, %ecx
; X32-SSE-NEXT: orl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm0
; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
; X32-SSE-NEXT: addl $12, %esp
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v3i8_as_i24:
; X64-SSE: # BB#0:
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
; X64-SSE-NEXT: pshufb %xmm1, %xmm0
; X64-SSE-NEXT: movd %xmm0, %eax
; X64-SSE-NEXT: movd %edi, %xmm0
; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
; X64-SSE-NEXT: pshufb %xmm1, %xmm0
; X64-SSE-NEXT: movd %xmm0, %ecx
; X64-SSE-NEXT: orl %eax, %ecx
; X64-SSE-NEXT: movd %ecx, %xmm0
; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
%3 = or i24 %1, %2
%4 = bitcast i24 %3 to <3 x i8>
ret <3 x i8> %4
}
;
; AND/XOR/OR v8i3 as i24
;
define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
; X32-SSE-LABEL: and_v8i3_as_i24:
; X32-SSE: # BB#0: