1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
Wang, Pengfei 72838e8fb2 [X86][FPEnv] Teach X86 mask compare intrinsics to respect strict FP semantics.
When we use mask compare intrinsics under strict FP option, the masked
elements shouldn't raise any exception. So, we cann't replace the
intrinsic with a full compare + "and" operation.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D85385
2020-08-11 10:28:41 +08:00

53 lines
3.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
; This test checks optimal passing values between "cmp" and "kor" intrinsics
; PR28839
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind readnone uwtable
define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, <16 x float> %x) local_unnamed_addr #0 {
; CHECK-LABEL: cmp_kor_seq_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vcmpgeps %zmm4, %zmm0, %k0
; CHECK-NEXT: vcmpgeps %zmm4, %zmm1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: vcmpgeps %zmm4, %zmm2, %k1
; CHECK-NEXT: vcmpgeps %zmm4, %zmm3, %k2
; CHECK-NEXT: korw %k2, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:
%0 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
%1 = bitcast <16 x i1> %0 to i16
%2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
%3 = bitcast <16 x i1> %2 to i16
%4 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
%5 = bitcast <16 x i1> %4 to i16
%6 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
%7 = bitcast <16 x i1> %6 to i16
%8 = bitcast i16 %1 to <16 x i1>
%9 = bitcast i16 %3 to <16 x i1>
%10 = or <16 x i1> %8, %9
%11 = bitcast <16 x i1> %10 to i16
%12 = bitcast i16 %5 to <16 x i1>
%13 = bitcast i16 %7 to <16 x i1>
%14 = or <16 x i1> %12, %13
%15 = bitcast <16 x i1> %14 to i16
%16 = bitcast i16 %11 to <16 x i1>
%17 = bitcast i16 %15 to <16 x i1>
%18 = or <16 x i1> %16, %17
%19 = bitcast <16 x i1> %18 to i16
ret i16 %19
}
; Function Attrs: nounwind readnone
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1
attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }