1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 21:13:02 +02:00
llvm-mirror/test/CodeGen/X86/sad_variations.ll
Oren Ben Simhon c298b766f8 [X86] Add 64 bit pattern matching for PSADBW
PSADBW pattern currently supports the 32 bit IR pattern and only GLT (greather than) comparison.
The patch extends the pattern to catch also 64 bit IR pattern and includes all other comparison types (not only GLT).

Differential Revision: https://reviews.llvm.org/D31577

llvm-svn: 299425
2017-04-04 10:23:18 +00:00

348 lines
15 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #0 {
; SSE2-LABEL: sad8_32bit_icmp_sge:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_32bit_icmp_sge:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_32bit_icmp_sge:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: retq
entry:
%idx.ext = zext i32 %stride to i64
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i32>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i32>
%6 = sub nsw <8 x i32> %2, %5
%7 = icmp sgt <8 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
%rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i32> %9, %rdx.shuf
%rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
%rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
%10 = extractelement <8 x i32> %bin.rdx232, i32 0
ret i32 %10
}
define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #1 {
; SSE2-LABEL: sad8_32bit_icmp_sgt:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_32bit_icmp_sgt:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_32bit_icmp_sgt:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: retq
entry:
%idx.ext = zext i32 %stride to i64
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i32>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i32>
%6 = sub nsw <8 x i32> %2, %5
%7 = icmp sgt <8 x i32> %6, zeroinitializer
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
%rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i32> %9, %rdx.shuf
%rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
%rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
%10 = extractelement <8 x i32> %bin.rdx232, i32 0
ret i32 %10
}
define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #2 {
; SSE2-LABEL: sad8_32bit_icmp_sle:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_32bit_icmp_sle:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_32bit_icmp_sle:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: retq
entry:
%idx.ext = zext i32 %stride to i64
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i32>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i32>
%6 = sub nsw <8 x i32> %2, %5
%7 = icmp slt <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
%rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i32> %9, %rdx.shuf
%rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
%rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
%10 = extractelement <8 x i32> %bin.rdx232, i32 0
ret i32 %10
}
define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #3 {
; SSE2-LABEL: sad8_32bit_icmp_slt:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_32bit_icmp_slt:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_32bit_icmp_slt:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: retq
entry:
%idx.ext = zext i32 %stride to i64
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i32>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i32>
%6 = sub nsw <8 x i32> %2, %5
%7 = icmp slt <8 x i32> %6, zeroinitializer
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
%rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i32> %9, %rdx.shuf
%rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
%rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
%10 = extractelement <8 x i32> %bin.rdx232, i32 0
ret i32 %10
}
define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
; SSE2-LABEL: sad8_64bit_icmp_sext_slt:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %rax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_64bit_icmp_sext_slt:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_64bit_icmp_sext_slt:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: retq
entry:
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i32>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i32>
%6 = sub nsw <8 x i32> %2, %5
%7 = icmp slt <8 x i32> %6, zeroinitializer
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
%10 = sext <8 x i32> %9 to <8 x i64>
%rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i64> %rdx.shuf, %10
%rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
%rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
%11 = extractelement <8 x i64> %bin.rdx239, i32 0
ret i64 %11
}
define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
; SSE2-LABEL: sad8_64bit_icmp_zext_slt:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %rax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_64bit_icmp_zext_slt:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_64bit_icmp_zext_slt:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: retq
entry:
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i32>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i32>
%6 = sub nsw <8 x i32> %2, %5
%7 = icmp slt <8 x i32> %6, zeroinitializer
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
%10 = zext <8 x i32> %9 to <8 x i64>
%rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i64> %rdx.shuf, %10
%rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
%rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
%11 = extractelement <8 x i64> %bin.rdx239, i32 0
ret i64 %11
}
define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
; SSE2-LABEL: sad8_early_64bit_icmp_zext_slt:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: psadbw %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %rax
; SSE2-NEXT: retq
;
; AVX2-LABEL: sad8_early_64bit_icmp_zext_slt:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: retq
;
; AVX512F-LABEL: sad8_early_64bit_icmp_zext_slt:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512F-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: retq
entry:
br label %for.body
for.body: ; preds = %entry
%0 = bitcast i8* %cur to <8 x i8>*
%1 = load <8 x i8>, <8 x i8>* %0, align 1
%2 = zext <8 x i8> %1 to <8 x i64>
%3 = bitcast i8* %ref to <8 x i8>*
%4 = load <8 x i8>, <8 x i8>* %3, align 1
%5 = zext <8 x i8> %4 to <8 x i64>
%6 = sub nsw <8 x i64> %2, %5
%7 = icmp slt <8 x i64> %6, zeroinitializer
%8 = sub nsw <8 x i64> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i64> %8, <8 x i64> %6
%rdx.shuf = shufflevector <8 x i64> %9, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx = add <8 x i64> %rdx.shuf, %9
%rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
%rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
%10 = extractelement <8 x i64> %bin.rdx239, i32 0
ret i64 %10
}