1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

re-generate the tests using the update_llc_test_checks.py script

llvm-svn: 272643
This commit is contained in:
Igor Breger 2016-06-14 07:05:10 +00:00
parent 723821aed3
commit 0008668f4b
8 changed files with 1137 additions and 684 deletions

View File

@ -1,13 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: f_fu
; CHECK-NOT: vpblend
; CHECK: vmovdqa32 {{.*}} {%k1}
define void @f_fu(float* %ret, float* %aa, float %b) {
; CHECK-LABEL: f_fu:
; CHECK: ## BB#0: ## %allocas
; CHECK-NEXT: vcvttss2si %xmm0, %eax
; CHECK-NEXT: vpbroadcastd %eax, %zmm0
; CHECK-NEXT: vcvttps2dq (%rsi), %zmm1
; CHECK-NEXT: vpsrld $31, %zmm0, %zmm2
; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm2
; CHECK-NEXT: vpsrad $1, %zmm2, %zmm2
; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1 {%k1}
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: retq
allocas:
%ptr_cast_for_load = bitcast float* %aa to <16 x float>*
%ptr_masked_load.39 = load <16 x float>, <16 x float>* %ptr_cast_for_load, align 4

View File

@ -1,279 +1,320 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
; CHECK-LABEL: @test1
; CHECK: vmovd %xmm0, %eax ## encoding: [0x62
; CHECK: ret
define i32 @test1(float %x) {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = bitcast float %x to i32
ret i32 %res
}
; CHECK-LABEL: @test2
; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test2(i32 %x) {
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = insertelement <4 x i32>undef, i32 %x, i32 0
ret <4 x i32>%res
}
; CHECK-LABEL: @test3
; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62
; CHECK: ret
define <2 x i64> @test3(i64 %x) {
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = insertelement <2 x i64>undef, i64 %x, i32 0
ret <2 x i64>%res
}
; CHECK-LABEL: @test4
; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test4(i32* %x) {
; CHECK-LABEL: test4:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x
%res = insertelement <4 x i32>undef, i32 %y, i32 0
ret <4 x i32>%res
}
; CHECK-LABEL: @test5
; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62
; CHECK: ret
define void @test5(float %x, float* %y) {
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
store float %x, float* %y, align 4
ret void
}
; CHECK-LABEL: @test6
; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62
; CHECK: ret
define void @test6(double %x, double* %y) {
; CHECK-LABEL: test6:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
store double %x, double* %y, align 8
ret void
}
; CHECK-LABEL: @test7
; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define float @test7(i32* %x) {
; CHECK-LABEL: test7:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x
%res = bitcast i32 %y to float
ret float %res
}
; CHECK-LABEL: @test8
; CHECK: vmovd %xmm0, %eax ## encoding: [0x62
; CHECK: ret
define i32 @test8(<4 x i32> %x) {
; CHECK-LABEL: test8:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = extractelement <4 x i32> %x, i32 0
ret i32 %res
}
; CHECK-LABEL: @test9
; CHECK: vmovq %xmm0, %rax ## encoding: [0x62
; CHECK: ret
define i64 @test9(<2 x i64> %x) {
; CHECK-LABEL: test9:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = extractelement <2 x i64> %x, i32 0
ret i64 %res
}
; CHECK-LABEL: @test10
; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test10(i32* %x) {
; CHECK-LABEL: test10:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x, align 4
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
ret <4 x i32>%res
}
; CHECK-LABEL: @test11
; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x float> @test11(float* %x) {
; CHECK-LABEL: test11:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load float, float* %x, align 4
%res = insertelement <4 x float>zeroinitializer, float %y, i32 0
ret <4 x float>%res
}
; CHECK-LABEL: @test12
; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <2 x double> @test12(double* %x) {
; CHECK-LABEL: test12:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load double, double* %x, align 8
%res = insertelement <2 x double>zeroinitializer, double %y, i32 0
ret <2 x double>%res
}
; CHECK-LABEL: @test13
; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62
; CHECK: ret
define <2 x i64> @test13(i64 %x) {
; CHECK-LABEL: test13:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
ret <2 x i64>%res
}
; CHECK-LABEL: @test14
; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test14(i32 %x) {
; CHECK-LABEL: test14:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
ret <4 x i32>%res
}
; CHECK-LABEL: @test15
; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test15(i32* %x) {
; CHECK-LABEL: test15:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load i32, i32* %x, align 4
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
ret <4 x i32>%res
}
; CHECK-LABEL: test16
; CHECK: vmovdqu32
; CHECK: ret
define <16 x i32> @test16(i8 * %addr) {
; CHECK-LABEL: test16:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
%res = load <16 x i32>, <16 x i32>* %vaddr, align 1
ret <16 x i32>%res
}
; CHECK-LABEL: test17
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test17(i8 * %addr) {
; CHECK-LABEL: test17:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
%res = load <16 x i32>, <16 x i32>* %vaddr, align 64
ret <16 x i32>%res
}
; CHECK-LABEL: test18
; CHECK: vmovdqa64
; CHECK: ret
define void @test18(i8 * %addr, <8 x i64> %data) {
; CHECK-LABEL: test18:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
store <8 x i64>%data, <8 x i64>* %vaddr, align 64
ret void
}
; CHECK-LABEL: test19
; CHECK: vmovdqu32
; CHECK: ret
define void @test19(i8 * %addr, <16 x i32> %data) {
; CHECK-LABEL: test19:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
store <16 x i32>%data, <16 x i32>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test20
; CHECK: vmovdqa32
; CHECK: ret
define void @test20(i8 * %addr, <16 x i32> %data) {
; CHECK-LABEL: test20:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i32>*
store <16 x i32>%data, <16 x i32>* %vaddr, align 64
ret void
}
; CHECK-LABEL: test21
; CHECK: vmovdqa64
; CHECK: ret
define <8 x i64> @test21(i8 * %addr) {
; CHECK-LABEL: test21:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
%res = load <8 x i64>, <8 x i64>* %vaddr, align 64
ret <8 x i64>%res
}
; CHECK-LABEL: test22
; CHECK: vmovdqu64
; CHECK: ret
define void @test22(i8 * %addr, <8 x i64> %data) {
; CHECK-LABEL: test22:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
store <8 x i64>%data, <8 x i64>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test23
; CHECK: vmovdqu64
; CHECK: ret
define <8 x i64> @test23(i8 * %addr) {
; CHECK-LABEL: test23:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i64>*
%res = load <8 x i64>, <8 x i64>* %vaddr, align 1
ret <8 x i64>%res
}
; CHECK-LABEL: test24
; CHECK: vmovapd
; CHECK: ret
define void @test24(i8 * %addr, <8 x double> %data) {
; CHECK-LABEL: test24:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
store <8 x double>%data, <8 x double>* %vaddr, align 64
ret void
}
; CHECK-LABEL: test25
; CHECK: vmovapd
; CHECK: ret
define <8 x double> @test25(i8 * %addr) {
; CHECK-LABEL: test25:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
%res = load <8 x double>, <8 x double>* %vaddr, align 64
ret <8 x double>%res
}
; CHECK-LABEL: test26
; CHECK: vmovaps
; CHECK: ret
define void @test26(i8 * %addr, <16 x float> %data) {
; CHECK-LABEL: test26:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x float>*
store <16 x float>%data, <16 x float>* %vaddr, align 64
ret void
}
; CHECK-LABEL: test27
; CHECK: vmovaps
; CHECK: ret
define <16 x float> @test27(i8 * %addr) {
; CHECK-LABEL: test27:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x float>*
%res = load <16 x float>, <16 x float>* %vaddr, align 64
ret <16 x float>%res
}
; CHECK-LABEL: test28
; CHECK: vmovupd
; CHECK: ret
define void @test28(i8 * %addr, <8 x double> %data) {
; CHECK-LABEL: test28:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
store <8 x double>%data, <8 x double>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test29
; CHECK: vmovupd
; CHECK: ret
define <8 x double> @test29(i8 * %addr) {
; CHECK-LABEL: test29:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x double>*
%res = load <8 x double>, <8 x double>* %vaddr, align 1
ret <8 x double>%res
}
; CHECK-LABEL: test30
; CHECK: vmovups
; CHECK: ret
define void @test30(i8 * %addr, <16 x float> %data) {
; CHECK-LABEL: test30:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x float>*
store <16 x float>%data, <16 x float>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test31
; CHECK: vmovups
; CHECK: ret
define <16 x float> @test31(i8 * %addr) {
; CHECK-LABEL: test31:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x float>*
%res = load <16 x float>, <16 x float>* %vaddr, align 1
ret <16 x float>%res
}
; CHECK-LABEL: test32
; CHECK: vmovdqa32{{.*{%k[1-7]} }}
; CHECK: ret
define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
; CHECK-LABEL: test32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
%r = load <16 x i32>, <16 x i32>* %vaddr, align 64
@ -281,10 +322,13 @@ define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
ret <16 x i32>%res
}
; CHECK-LABEL: test33
; CHECK: vmovdqu32{{.*{%k[1-7]} }}
; CHECK: ret
define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
; CHECK-LABEL: test33:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
%r = load <16 x i32>, <16 x i32>* %vaddr, align 1
@ -292,10 +336,13 @@ define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
ret <16 x i32>%res
}
; CHECK-LABEL: test34
; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
; CHECK-LABEL: test34:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
%r = load <16 x i32>, <16 x i32>* %vaddr, align 64
@ -303,10 +350,13 @@ define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
ret <16 x i32>%res
}
; CHECK-LABEL: test35
; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
; CHECK-LABEL: test35:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
%r = load <16 x i32>, <16 x i32>* %vaddr, align 1
@ -314,10 +364,13 @@ define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
ret <16 x i32>%res
}
; CHECK-LABEL: test36
; CHECK: vmovdqa64{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
; CHECK-LABEL: test36:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
%r = load <8 x i64>, <8 x i64>* %vaddr, align 64
@ -325,10 +378,13 @@ define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
ret <8 x i64>%res
}
; CHECK-LABEL: test37
; CHECK: vmovdqu64{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
; CHECK-LABEL: test37:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
%r = load <8 x i64>, <8 x i64>* %vaddr, align 1
@ -336,10 +392,13 @@ define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
ret <8 x i64>%res
}
; CHECK-LABEL: test38
; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
; CHECK-LABEL: test38:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
%r = load <8 x i64>, <8 x i64>* %vaddr, align 64
@ -347,10 +406,13 @@ define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
ret <8 x i64>%res
}
; CHECK-LABEL: test39
; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
; CHECK-LABEL: test39:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
%r = load <8 x i64>, <8 x i64>* %vaddr, align 1
@ -358,10 +420,14 @@ define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
ret <8 x i64>%res
}
; CHECK-LABEL: test40
; CHECK: vmovaps{{.*{%k[1-7]} }}
; CHECK: ret
define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
; CHECK-LABEL: test40:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
%r = load <16 x float>, <16 x float>* %vaddr, align 64
@ -369,10 +435,14 @@ define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1)
ret <16 x float>%res
}
; CHECK-LABEL: test41
; CHECK: vmovups{{.*{%k[1-7]} }}
; CHECK: ret
define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
; CHECK-LABEL: test41:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
%r = load <16 x float>, <16 x float>* %vaddr, align 1
@ -380,10 +450,14 @@ define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1)
ret <16 x float>%res
}
; CHECK-LABEL: test42
; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
; CHECK-LABEL: test42:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
%r = load <16 x float>, <16 x float>* %vaddr, align 64
@ -391,10 +465,14 @@ define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
ret <16 x float>%res
}
; CHECK-LABEL: test43
; CHECK: vmovups{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
; CHECK-LABEL: test43:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
%r = load <16 x float>, <16 x float>* %vaddr, align 1
@ -402,10 +480,14 @@ define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
ret <16 x float>%res
}
; CHECK-LABEL: test44
; CHECK: vmovapd{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
; CHECK-LABEL: test44:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
%r = load <8 x double>, <8 x double>* %vaddr, align 64
@ -413,10 +495,14 @@ define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1)
ret <8 x double>%res
}
; CHECK-LABEL: test45
; CHECK: vmovupd{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
; CHECK-LABEL: test45:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
%r = load <8 x double>, <8 x double>* %vaddr, align 1
@ -424,10 +510,14 @@ define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1)
ret <8 x double>%res
}
; CHECK-LABEL: test46
; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
; CHECK-LABEL: test46:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
%r = load <8 x double>, <8 x double>* %vaddr, align 64
@ -435,10 +525,14 @@ define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
ret <8 x double>%res
}
; CHECK-LABEL: test47
; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
; CHECK-LABEL: test47:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
%r = load <8 x double>, <8 x double>* %vaddr, align 1

View File

@ -1,27 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s
; CHECK-LABEL: test1
; CHECK: vmovdqu8
; CHECK: ret
define <64 x i8> @test1(i8 * %addr) {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0
; CHECK-NEXT: retq
%vaddr = bitcast i8* %addr to <64 x i8>*
%res = load <64 x i8>, <64 x i8>* %vaddr, align 1
ret <64 x i8>%res
}
; CHECK-LABEL: test2
; CHECK: vmovdqu8
; CHECK: ret
define void @test2(i8 * %addr, <64 x i8> %data) {
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 %zmm0, (%rdi)
; CHECK-NEXT: retq
%vaddr = bitcast i8* %addr to <64 x i8>*
store <64 x i8>%data, <64 x i8>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test3
; CHECK: vmovdqu8{{.*{%k[1-7]}}}
; CHECK: ret
define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%mask = icmp ne <64 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <64 x i8>*
%r = load <64 x i8>, <64 x i8>* %vaddr, align 1
@ -29,10 +35,13 @@ define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
ret <64 x i8>%res
}
; CHECK-LABEL: test4
; CHECK: vmovdqu8{{.*{%k[1-7]} {z}}}
; CHECK: ret
define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
; CHECK-LABEL: test4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = icmp ne <64 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <64 x i8>*
%r = load <64 x i8>, <64 x i8>* %vaddr, align 1
@ -40,28 +49,33 @@ define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
ret <64 x i8>%res
}
; CHECK-LABEL: test5
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test5(i8 * %addr) {
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0
; CHECK-NEXT: retq
%vaddr = bitcast i8* %addr to <32 x i16>*
%res = load <32 x i16>, <32 x i16>* %vaddr, align 1
ret <32 x i16>%res
}
; CHECK-LABEL: test6
; CHECK: vmovdqu16
; CHECK: ret
define void @test6(i8 * %addr, <32 x i16> %data) {
; CHECK-LABEL: test6:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 %zmm0, (%rdi)
; CHECK-NEXT: retq
%vaddr = bitcast i8* %addr to <32 x i16>*
store <32 x i16>%data, <32 x i16>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test7
; CHECK: vmovdqu16{{.*{%k[1-7]}}}
; CHECK: ret
define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
; CHECK-LABEL: test7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i16>*
%r = load <32 x i16>, <32 x i16>* %vaddr, align 1
@ -69,10 +83,13 @@ define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
ret <32 x i16>%res
}
; CHECK-LABEL: test8
; CHECK: vmovdqu16{{.*{%k[1-7]} {z}}}
; CHECK: ret
define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
; CHECK-LABEL: test8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i16>*
%r = load <32 x i16>, <32 x i16>* %vaddr, align 1

View File

@ -1,94 +1,114 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; CHECK-LABEL: test1
; CHECK: vpcmpeqb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: test2
; CHECK: vpcmpgtb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
; CHECK-LABEL: test2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sgt <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: @test3
; CHECK: vpcmplew {{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind {
; CHECK-LABEL: test3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k1
; CHECK-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y
ret <32 x i16> %max
}
; CHECK-LABEL: test4
; CHECK: vpcmpnleub {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
; CHECK-LABEL: test4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k1
; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: test5
; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind {
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %yp, align 4
%mask = icmp eq <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test6
; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp sgt <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test7
; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp sle <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test8
; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp ule <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test9
; CHECK: vpcmpeqw %zmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind {
; CHECK-LABEL: test9:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpeqw %zmm3, %zmm2, %k1 {%k1}
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <32 x i16> %x1, %y1
%mask0 = icmp eq <32 x i16> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
@ -96,11 +116,14 @@ define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16
ret <32 x i16> %max
}
; CHECK-LABEL: @test10
; CHECK: vpcmpleb %zmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind {
; CHECK-LABEL: test10:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleb %zmm1, %zmm0, %k1
; CHECK-NEXT: vpcmpleb %zmm2, %zmm3, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <64 x i8> %x1, %y1
%mask0 = icmp sle <64 x i8> %x, %y
%mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
@ -108,11 +131,14 @@ define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: @test11
; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind {
; CHECK-LABEL: test11:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %zmm2, %zmm1, %k1
; CHECK-NEXT: vpcmpgtb (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <64 x i8> %x1, %y1
%y = load <64 x i8>, <64 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <64 x i8> %x, %y
@ -121,11 +147,14 @@ define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i
ret <64 x i8> %max
}
; CHECK-LABEL: @test12
; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind {
; CHECK-LABEL: test12:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %zmm1, %zmm2, %k1
; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <32 x i16> %x1, %y1
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask0 = icmp ule <32 x i16> %x, %y

View File

@ -1,27 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
; CHECK-LABEL: test_256_1
; CHECK: vmovdqu8 {{.*}} ## encoding: [0x62
; CHECK: ret
define <32 x i8> @test_256_1(i8 * %addr) {
; CHECK-LABEL: test_256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <32 x i8>*
%res = load <32 x i8>, <32 x i8>* %vaddr, align 1
ret <32 x i8>%res
}
; CHECK-LABEL: test_256_2
; CHECK: vmovdqu8{{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_256_2(i8 * %addr, <32 x i8> %data) {
; CHECK-LABEL: test_256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <32 x i8>*
store <32 x i8>%data, <32 x i8>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_256_3
; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62
; CHECK: ret
define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
; CHECK-LABEL: test_256_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04]
; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <32 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i8>*
%r = load <32 x i8>, <32 x i8>* %vaddr, align 1
@ -29,10 +35,13 @@ define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
ret <32 x i8>%res
}
; CHECK-LABEL: test_256_4
; CHECK: vmovdqu8{{.*{%k[1-7]} {z} }}## encoding: [0x62
; CHECK: ret
define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
; CHECK-LABEL: test_256_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc9,0x04]
; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <32 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i8>*
%r = load <32 x i8>, <32 x i8>* %vaddr, align 1
@ -40,28 +49,33 @@ define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
ret <32 x i8>%res
}
; CHECK-LABEL: test_256_5
; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
; CHECK: ret
define <16 x i16> @test_256_5(i8 * %addr) {
; CHECK-LABEL: test_256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i16>*
%res = load <16 x i16>, <16 x i16>* %vaddr, align 1
ret <16 x i16>%res
}
; CHECK-LABEL: test_256_6
; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_256_6(i8 * %addr, <16 x i16> %data) {
; CHECK-LABEL: test_256_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i16>*
store <16 x i16>%data, <16 x i16>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_256_7
; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62
; CHECK: ret
define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
; CHECK-LABEL: test_256_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04]
; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i16>*
%r = load <16 x i16>, <16 x i16>* %vaddr, align 1
@ -69,10 +83,13 @@ define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
ret <16 x i16>%res
}
; CHECK-LABEL: test_256_8
; CHECK: vmovdqu16{{.*{%k[1-7]} {z} }}## encoding: [0x62
; CHECK: ret
define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
; CHECK-LABEL: test_256_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x04]
; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i16>*
%r = load <16 x i16>, <16 x i16>* %vaddr, align 1
@ -80,28 +97,33 @@ define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
ret <16 x i16>%res
}
; CHECK-LABEL: test_128_1
; CHECK: vmovdqu8 {{.*}} ## encoding: [0x62
; CHECK: ret
define <16 x i8> @test_128_1(i8 * %addr) {
; CHECK-LABEL: test_128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i8>*
%res = load <16 x i8>, <16 x i8>* %vaddr, align 1
ret <16 x i8>%res
}
; CHECK-LABEL: test_128_2
; CHECK: vmovdqu8{{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_128_2(i8 * %addr, <16 x i8> %data) {
; CHECK-LABEL: test_128_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i8>*
store <16 x i8>%data, <16 x i8>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_128_3
; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62
; CHECK: ret
define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
; CHECK-LABEL: test_128_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04]
; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i8>*
%r = load <16 x i8>, <16 x i8>* %vaddr, align 1
@ -109,10 +131,13 @@ define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
ret <16 x i8>%res
}
; CHECK-LABEL: test_128_4
; CHECK: vmovdqu8{{.*{%k[1-7]} {z} }}## encoding: [0x62
; CHECK: ret
define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
; CHECK-LABEL: test_128_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x04]
; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <16 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i8>*
%r = load <16 x i8>, <16 x i8>* %vaddr, align 1
@ -120,28 +145,33 @@ define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
ret <16 x i8>%res
}
; CHECK-LABEL: test_128_5
; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
; CHECK: ret
define <8 x i16> @test_128_5(i8 * %addr) {
; CHECK-LABEL: test_128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i16>*
%res = load <8 x i16>, <8 x i16>* %vaddr, align 1
ret <8 x i16>%res
}
; CHECK-LABEL: test_128_6
; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_128_6(i8 * %addr, <8 x i16> %data) {
; CHECK-LABEL: test_128_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i16>*
store <8 x i16>%data, <8 x i16>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_128_7
; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62
; CHECK: ret
define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
; CHECK-LABEL: test_128_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04]
; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i16>*
%r = load <8 x i16>, <8 x i16>* %vaddr, align 1
@ -149,10 +179,13 @@ define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
ret <8 x i16>%res
}
; CHECK-LABEL: test_128_8
; CHECK: vmovdqu16{{.*{%k[1-7]} {z} }}## encoding: [0x62
; CHECK: ret
define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) {
; CHECK-LABEL: test_128_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x04]
; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i16>*
%r = load <8 x i16>, <8 x i16>* %vaddr, align 1

View File

@ -1,94 +1,114 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; CHECK-LABEL: test256_1
; CHECK: vpcmpeqb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind {
; CHECK-LABEL: test256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y
ret <32 x i8> %max
}
; CHECK-LABEL: test256_2
; CHECK: vpcmpgtb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
; CHECK-LABEL: test256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask = icmp sgt <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
ret <32 x i8> %max
}
; CHECK-LABEL: @test256_3
; CHECK: vpcmplew {{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounwind {
; CHECK-LABEL: test256_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k1
; CHECK-NEXT: vmovdqu16 %ymm2, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y
ret <16 x i16> %max
}
; CHECK-LABEL: test256_4
; CHECK: vpcmpnleub {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
; CHECK-LABEL: test256_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
ret <32 x i8> %max
}
; CHECK-LABEL: test256_5
; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind {
; CHECK-LABEL: test256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %yp, align 4
%mask = icmp eq <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_6
; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test256_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp sgt <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_7
; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test256_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp sle <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_8
; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test256_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp ule <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_9
; CHECK: vpcmpeqw %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind {
; CHECK-LABEL: test256_9:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpeqw %ymm3, %ymm2, %k1 {%k1}
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <16 x i16> %x1, %y1
%mask0 = icmp eq <16 x i16> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@ -96,11 +116,14 @@ define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_10
; CHECK: vpcmpleb %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind {
; CHECK-LABEL: test256_10:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpleb %ymm2, %ymm3, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <32 x i8> %x1, %y1
%mask0 = icmp sle <32 x i8> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
@ -108,11 +131,14 @@ define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8
ret <32 x i8> %max
}
; CHECK-LABEL: @test256_11
; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind {
; CHECK-LABEL: test256_11:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm2, %ymm1, %k1
; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <32 x i8> %x1, %y1
%y = load <32 x i8>, <32 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <32 x i8> %x, %y
@ -121,11 +147,14 @@ define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32
ret <32 x i8> %max
}
; CHECK-LABEL: @test256_12
; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind {
; CHECK-LABEL: test256_12:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <16 x i16> %x1, %y1
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask0 = icmp ule <16 x i16> %x, %y
@ -134,95 +163,114 @@ define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1,
ret <16 x i16> %max
}
; CHECK-LABEL: test128_1
; CHECK: vpcmpeqb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: test128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %max
}
; CHECK-LABEL: test128_2
; CHECK: vpcmpgtb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
; CHECK-LABEL: test128_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask = icmp sgt <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
ret <16 x i8> %max
}
; CHECK-LABEL: @test128_3
; CHECK: vpcmplew {{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind {
; CHECK-LABEL: test128_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k1
; CHECK-NEXT: vmovdqu16 %xmm2, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y
ret <8 x i16> %max
}
; CHECK-LABEL: test128_4
; CHECK: vpcmpnleub {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
; CHECK-LABEL: test128_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
ret <16 x i8> %max
}
; CHECK-LABEL: test128_5
; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind {
; CHECK-LABEL: test128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %yp, align 4
%mask = icmp eq <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_6
; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test128_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp sgt <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_7
; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test128_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp sle <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_8
; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
; CHECK-LABEL: test128_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp ule <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_9
; CHECK: vpcmpeqw %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind {
; CHECK-LABEL: test128_9:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpeqw %xmm3, %xmm2, %k1 {%k1}
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <8 x i16> %x1, %y1
%mask0 = icmp eq <8 x i16> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@ -230,11 +278,14 @@ define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16>
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_10
; CHECK: vpcmpleb %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind {
; CHECK-LABEL: test128_10:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpleb %xmm2, %xmm3, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <16 x i8> %x1, %y1
%mask0 = icmp sle <16 x i8> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@ -242,11 +293,14 @@ define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8
ret <16 x i8> %max
}
; CHECK-LABEL: @test128_11
; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind {
; CHECK-LABEL: test128_11:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %xmm2, %xmm1, %k1
; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <16 x i8> %x1, %y1
%y = load <16 x i8>, <16 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <16 x i8> %x, %y
@ -255,11 +309,14 @@ define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16
ret <16 x i8> %max
}
; CHECK-LABEL: @test128_12
; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind {
; CHECK-LABEL: test128_12:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmplew %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i16> %x1, %y1
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask0 = icmp ule <8 x i16> %x, %y

View File

@ -1,153 +1,173 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
; CHECK-LABEL: test_256_1
; CHECK: vmovdqu32
; CHECK: ret
define <8 x i32> @test_256_1(i8 * %addr) {
; CHECK-LABEL: test_256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
%res = load <8 x i32>, <8 x i32>* %vaddr, align 1
ret <8 x i32>%res
}
; CHECK-LABEL: test_256_2
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test_256_2(i8 * %addr) {
; CHECK-LABEL: test_256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
%res = load <8 x i32>, <8 x i32>* %vaddr, align 32
ret <8 x i32>%res
}
; CHECK-LABEL: test_256_3
; CHECK: vmovdqa64
; CHECK: ret
define void @test_256_3(i8 * %addr, <4 x i64> %data) {
; CHECK-LABEL: test_256_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
store <4 x i64>%data, <4 x i64>* %vaddr, align 32
ret void
}
; CHECK-LABEL: test_256_4
; CHECK: vmovdqu32
; CHECK: ret
define void @test_256_4(i8 * %addr, <8 x i32> %data) {
; CHECK-LABEL: test_256_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
store <8 x i32>%data, <8 x i32>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_256_5
; CHECK: vmovdqa32
; CHECK: ret
define void @test_256_5(i8 * %addr, <8 x i32> %data) {
; CHECK-LABEL: test_256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
store <8 x i32>%data, <8 x i32>* %vaddr, align 32
ret void
}
; CHECK-LABEL: test_256_6
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test_256_6(i8 * %addr) {
; CHECK-LABEL: test_256_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
%res = load <4 x i64>, <4 x i64>* %vaddr, align 32
ret <4 x i64>%res
}
; CHECK-LABEL: test_256_7
; CHECK: vmovdqu64
; CHECK: ret
define void @test_256_7(i8 * %addr, <4 x i64> %data) {
; CHECK-LABEL: test_256_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
store <4 x i64>%data, <4 x i64>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_256_8
; CHECK: vmovdqu64
; CHECK: ret
define <4 x i64> @test_256_8(i8 * %addr) {
; CHECK-LABEL: test_256_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
%res = load <4 x i64>, <4 x i64>* %vaddr, align 1
ret <4 x i64>%res
}
; CHECK-LABEL: test_256_9
; CHECK: vmovapd {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_256_9(i8 * %addr, <4 x double> %data) {
; CHECK-LABEL: test_256_9:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
store <4 x double>%data, <4 x double>* %vaddr, align 32
ret void
}
; CHECK-LABEL: test_256_10
; CHECK: vmovapd {{.*}} ## encoding: [0x62
; CHECK: ret
define <4 x double> @test_256_10(i8 * %addr) {
; CHECK-LABEL: test_256_10:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
%res = load <4 x double>, <4 x double>* %vaddr, align 32
ret <4 x double>%res
}
; CHECK-LABEL: test_256_11
; CHECK: vmovaps {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_256_11(i8 * %addr, <8 x float> %data) {
; CHECK-LABEL: test_256_11:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
store <8 x float>%data, <8 x float>* %vaddr, align 32
ret void
}
; CHECK-LABEL: test_256_12
; CHECK: vmovaps {{.*}} ## encoding: [0x62
; CHECK: ret
define <8 x float> @test_256_12(i8 * %addr) {
; CHECK-LABEL: test_256_12:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
%res = load <8 x float>, <8 x float>* %vaddr, align 32
ret <8 x float>%res
}
; CHECK-LABEL: test_256_13
; CHECK: vmovupd {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_256_13(i8 * %addr, <4 x double> %data) {
; CHECK-LABEL: test_256_13:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovupd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
store <4 x double>%data, <4 x double>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_256_14
; CHECK: vmovupd {{.*}} ## encoding: [0x62
; CHECK: ret
define <4 x double> @test_256_14(i8 * %addr) {
; CHECK-LABEL: test_256_14:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
%res = load <4 x double>, <4 x double>* %vaddr, align 1
ret <4 x double>%res
}
; CHECK-LABEL: test_256_15
; CHECK: vmovups {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_256_15(i8 * %addr, <8 x float> %data) {
; CHECK-LABEL: test_256_15:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
store <8 x float>%data, <8 x float>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_256_16
; CHECK: vmovups {{.*}} ## encoding: [0x62
; CHECK: ret
define <8 x float> @test_256_16(i8 * %addr) {
; CHECK-LABEL: test_256_16:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
%res = load <8 x float>, <8 x float>* %vaddr, align 1
ret <8 x float>%res
}
; CHECK-LABEL: test_256_17
; CHECK: vmovdqa32{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_17:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
%r = load <8 x i32>, <8 x i32>* %vaddr, align 32
@ -155,10 +175,13 @@ define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
ret <8 x i32>%res
}
; CHECK-LABEL: test_256_18
; CHECK: vmovdqu32{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_18:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
%r = load <8 x i32>, <8 x i32>* %vaddr, align 1
@ -166,10 +189,13 @@ define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
ret <8 x i32>%res
}
; CHECK-LABEL: test_256_19
; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_19:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
%r = load <8 x i32>, <8 x i32>* %vaddr, align 32
@ -177,10 +203,13 @@ define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
ret <8 x i32>%res
}
; CHECK-LABEL: test_256_20
; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_20:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
%r = load <8 x i32>, <8 x i32>* %vaddr, align 1
@ -188,10 +217,13 @@ define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
ret <8 x i32>%res
}
; CHECK-LABEL: test_256_21
; CHECK: vmovdqa64{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_21:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
%r = load <4 x i64>, <4 x i64>* %vaddr, align 32
@ -199,10 +231,13 @@ define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
ret <4 x i64>%res
}
; CHECK-LABEL: test_256_22
; CHECK: vmovdqu64{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_22:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
%r = load <4 x i64>, <4 x i64>* %vaddr, align 1
@ -210,10 +245,13 @@ define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
ret <4 x i64>%res
}
; CHECK-LABEL: test_256_23
; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_23:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
%r = load <4 x i64>, <4 x i64>* %vaddr, align 32
@ -221,10 +259,13 @@ define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
ret <4 x i64>%res
}
; CHECK-LABEL: test_256_24
; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_24:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
%r = load <4 x i64>, <4 x i64>* %vaddr, align 1
@ -232,10 +273,14 @@ define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
ret <4 x i64>%res
}
; CHECK-LABEL: test_256_25
; CHECK: vmovaps{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
; CHECK-LABEL: test_256_25:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
%r = load <8 x float>, <8 x float>* %vaddr, align 32
@ -243,10 +288,14 @@ define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1
ret <8 x float>%res
}
; CHECK-LABEL: test_256_26
; CHECK: vmovups{{.*{%k[1-7]} }}
; CHECK: ret
define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
; CHECK-LABEL: test_256_26:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
%r = load <8 x float>, <8 x float>* %vaddr, align 1
@ -254,10 +303,14 @@ define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1
ret <8 x float>%res
}
; CHECK-LABEL: test_256_27
; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
; CHECK-LABEL: test_256_27:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
%r = load <8 x float>, <8 x float>* %vaddr, align 32
@ -265,10 +318,14 @@ define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
ret <8 x float>%res
}
; CHECK-LABEL: test_256_28
; CHECK: vmovups{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
; CHECK-LABEL: test_256_28:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
%r = load <8 x float>, <8 x float>* %vaddr, align 1
@ -276,10 +333,13 @@ define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
ret <8 x float>%res
}
; CHECK-LABEL: test_256_29
; CHECK: vmovapd{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_29:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
%r = load <4 x double>, <4 x double>* %vaddr, align 32
@ -287,10 +347,13 @@ define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1
ret <4 x double>%res
}
; CHECK-LABEL: test_256_30
; CHECK: vmovupd{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_30:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
%r = load <4 x double>, <4 x double>* %vaddr, align 1
@ -298,10 +361,13 @@ define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1
ret <4 x double>%res
}
; CHECK-LABEL: test_256_31
; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_31:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
%r = load <4 x double>, <4 x double>* %vaddr, align 32
@ -309,10 +375,13 @@ define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
ret <4 x double>%res
}
; CHECK-LABEL: test_256_32
; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
%r = load <4 x double>, <4 x double>* %vaddr, align 1
@ -320,154 +389,173 @@ define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
ret <4 x double>%res
}
; CHECK-LABEL: test_128_1
; CHECK: vmovdqu32
; CHECK: ret
define <4 x i32> @test_128_1(i8 * %addr) {
; CHECK-LABEL: test_128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
%res = load <4 x i32>, <4 x i32>* %vaddr, align 1
ret <4 x i32>%res
}
; CHECK-LABEL: test_128_2
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test_128_2(i8 * %addr) {
; CHECK-LABEL: test_128_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
%res = load <4 x i32>, <4 x i32>* %vaddr, align 16
ret <4 x i32>%res
}
; CHECK-LABEL: test_128_3
; CHECK: vmovdqa64
; CHECK: ret
define void @test_128_3(i8 * %addr, <2 x i64> %data) {
; CHECK-LABEL: test_128_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
store <2 x i64>%data, <2 x i64>* %vaddr, align 16
ret void
}
; CHECK-LABEL: test_128_4
; CHECK: vmovdqu32
; CHECK: ret
define void @test_128_4(i8 * %addr, <4 x i32> %data) {
; CHECK-LABEL: test_128_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
store <4 x i32>%data, <4 x i32>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_128_5
; CHECK: vmovdqa32
; CHECK: ret
define void @test_128_5(i8 * %addr, <4 x i32> %data) {
; CHECK-LABEL: test_128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
store <4 x i32>%data, <4 x i32>* %vaddr, align 16
ret void
}
; CHECK-LABEL: test_128_6
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test_128_6(i8 * %addr) {
; CHECK-LABEL: test_128_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
%res = load <2 x i64>, <2 x i64>* %vaddr, align 16
ret <2 x i64>%res
}
; CHECK-LABEL: test_128_7
; CHECK: vmovdqu64
; CHECK: ret
define void @test_128_7(i8 * %addr, <2 x i64> %data) {
; CHECK-LABEL: test_128_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
store <2 x i64>%data, <2 x i64>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_128_8
; CHECK: vmovdqu64
; CHECK: ret
define <2 x i64> @test_128_8(i8 * %addr) {
; CHECK-LABEL: test_128_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
%res = load <2 x i64>, <2 x i64>* %vaddr, align 1
ret <2 x i64>%res
}
; CHECK-LABEL: test_128_9
; CHECK: vmovapd {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_128_9(i8 * %addr, <2 x double> %data) {
; CHECK-LABEL: test_128_9:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
store <2 x double>%data, <2 x double>* %vaddr, align 16
ret void
}
; CHECK-LABEL: test_128_10
; CHECK: vmovapd {{.*}} ## encoding: [0x62
; CHECK: ret
define <2 x double> @test_128_10(i8 * %addr) {
; CHECK-LABEL: test_128_10:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
%res = load <2 x double>, <2 x double>* %vaddr, align 16
ret <2 x double>%res
}
; CHECK-LABEL: test_128_11
; CHECK: vmovaps {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_128_11(i8 * %addr, <4 x float> %data) {
; CHECK-LABEL: test_128_11:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
store <4 x float>%data, <4 x float>* %vaddr, align 16
ret void
}
; CHECK-LABEL: test_128_12
; CHECK: vmovaps {{.*}} ## encoding: [0x62
; CHECK: ret
define <4 x float> @test_128_12(i8 * %addr) {
; CHECK-LABEL: test_128_12:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
%res = load <4 x float>, <4 x float>* %vaddr, align 16
ret <4 x float>%res
}
; CHECK-LABEL: test_128_13
; CHECK: vmovupd {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_128_13(i8 * %addr, <2 x double> %data) {
; CHECK-LABEL: test_128_13:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
store <2 x double>%data, <2 x double>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_128_14
; CHECK: vmovupd {{.*}} ## encoding: [0x62
; CHECK: ret
define <2 x double> @test_128_14(i8 * %addr) {
; CHECK-LABEL: test_128_14:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
%res = load <2 x double>, <2 x double>* %vaddr, align 1
ret <2 x double>%res
}
; CHECK-LABEL: test_128_15
; CHECK: vmovups {{.*}} ## encoding: [0x62
; CHECK: ret
define void @test_128_15(i8 * %addr, <4 x float> %data) {
; CHECK-LABEL: test_128_15:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
store <4 x float>%data, <4 x float>* %vaddr, align 1
ret void
}
; CHECK-LABEL: test_128_16
; CHECK: vmovups {{.*}} ## encoding: [0x62
; CHECK: ret
define <4 x float> @test_128_16(i8 * %addr) {
; CHECK-LABEL: test_128_16:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
%res = load <4 x float>, <4 x float>* %vaddr, align 1
ret <4 x float>%res
}
; CHECK-LABEL: test_128_17
; CHECK: vmovdqa32{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_17:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
%r = load <4 x i32>, <4 x i32>* %vaddr, align 16
@ -475,10 +563,13 @@ define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
ret <4 x i32>%res
}
; CHECK-LABEL: test_128_18
; CHECK: vmovdqu32{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_18:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
%r = load <4 x i32>, <4 x i32>* %vaddr, align 1
@ -486,10 +577,13 @@ define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
ret <4 x i32>%res
}
; CHECK-LABEL: test_128_19
; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_19:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
%r = load <4 x i32>, <4 x i32>* %vaddr, align 16
@ -497,10 +591,13 @@ define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
ret <4 x i32>%res
}
; CHECK-LABEL: test_128_20
; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_20:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
%r = load <4 x i32>, <4 x i32>* %vaddr, align 1
@ -508,10 +605,13 @@ define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
ret <4 x i32>%res
}
; CHECK-LABEL: test_128_21
; CHECK: vmovdqa64{{.*{%k[1-7]} }}
; CHECK: ret
define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_21:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
%r = load <2 x i64>, <2 x i64>* %vaddr, align 16
@ -519,10 +619,13 @@ define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
ret <2 x i64>%res
}
; CHECK-LABEL: test_128_22
; CHECK: vmovdqu64{{.*{%k[1-7]} }}
; CHECK: ret
define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_22:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
%r = load <2 x i64>, <2 x i64>* %vaddr, align 1
@ -530,10 +633,13 @@ define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
ret <2 x i64>%res
}
; CHECK-LABEL: test_128_23
; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_23:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
%r = load <2 x i64>, <2 x i64>* %vaddr, align 16
@ -541,10 +647,13 @@ define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
ret <2 x i64>%res
}
; CHECK-LABEL: test_128_24
; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_24:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
%r = load <2 x i64>, <2 x i64>* %vaddr, align 1
@ -552,10 +661,13 @@ define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
ret <2 x i64>%res
}
; CHECK-LABEL: test_128_25
; CHECK: vmovaps{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_25:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
%r = load <4 x float>, <4 x float>* %vaddr, align 16
@ -563,10 +675,13 @@ define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1)
ret <4 x float>%res
}
; CHECK-LABEL: test_128_26
; CHECK: vmovups{{.*{%k[1-7]} }}
; CHECK: ret
define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_26:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
%r = load <4 x float>, <4 x float>* %vaddr, align 1
@ -574,10 +689,13 @@ define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1)
ret <4 x float>%res
}
; CHECK-LABEL: test_128_27
; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_27:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
%r = load <4 x float>, <4 x float>* %vaddr, align 16
@ -585,10 +703,13 @@ define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
ret <4 x float>%res
}
; CHECK-LABEL: test_128_28
; CHECK: vmovups{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_28:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
%r = load <4 x float>, <4 x float>* %vaddr, align 1
@ -596,10 +717,13 @@ define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
ret <4 x float>%res
}
; CHECK-LABEL: test_128_29
; CHECK: vmovapd{{.*{%k[1-7]} }}
; CHECK: ret
define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_29:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
%r = load <2 x double>, <2 x double>* %vaddr, align 16
@ -607,10 +731,13 @@ define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1
ret <2 x double>%res
}
; CHECK-LABEL: test_128_30
; CHECK: vmovupd{{.*{%k[1-7]} }}
; CHECK: ret
define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_30:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
%r = load <2 x double>, <2 x double>* %vaddr, align 1
@ -618,10 +745,13 @@ define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1
ret <2 x double>%res
}
; CHECK-LABEL: test_128_31
; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_31:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
%r = load <2 x double>, <2 x double>* %vaddr, align 16
@ -629,10 +759,13 @@ define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
ret <2 x double>%res
}
; CHECK-LABEL: test_128_32
; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
; CHECK: ret
define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_32:
; CHECK: ## BB#0:
; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
%r = load <2 x double>, <2 x double>* %vaddr, align 1

View File

@ -1,94 +1,114 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; CHECK-LABEL: test256_1
; CHECK: vpcmpeqq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: test256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
ret <4 x i64> %max
}
; CHECK-LABEL: test256_2
; CHECK: vpcmpgtq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind {
; CHECK-LABEL: test256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sgt <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
ret <4 x i64> %max
}
; CHECK-LABEL: @test256_3
; CHECK: vpcmpled {{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind {
; CHECK-LABEL: test256_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1
; CHECK-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y
ret <8 x i32> %max
}
; CHECK-LABEL: test256_4
; CHECK: vpcmpnleuq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind {
; CHECK-LABEL: test256_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
ret <4 x i64> %max
}
; CHECK-LABEL: test256_5
; CHECK: vpcmpeqd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
; CHECK-LABEL: test256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_6
; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test256_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sgt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_7
; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test256_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sle <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_8
; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test256_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp ule <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_9
; CHECK: vpcmpeqd %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind {
; CHECK-LABEL: test256_9:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <8 x i32> %x1, %y1
%mask0 = icmp eq <8 x i32> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@ -96,11 +116,14 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32>
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_10
; CHECK: vpcmpleq %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind {
; CHECK-LABEL: test256_10:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1
; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i64> %x1, %y1
%mask0 = icmp sle <4 x i64> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
@ -108,11 +131,14 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64
ret <4 x i64> %max
}
; CHECK-LABEL: @test256_11
; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
; CHECK-LABEL: test256_11:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1
; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <4 x i64> %x1, %y1
%y = load <4 x i64>, <4 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <4 x i64> %x, %y
@ -121,11 +147,14 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4
ret <4 x i64> %max
}
; CHECK-LABEL: @test256_12
; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
; CHECK-LABEL: test256_12:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i32> %x1, %y1
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask0 = icmp ule <8 x i32> %x, %y
@ -134,11 +163,13 @@ define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8
ret <8 x i32> %max
}
; CHECK-LABEL: test256_13
; CHECK: vpcmpeqq (%rdi){1to4}, %ymm
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-LABEL: test256_13:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
%y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
@ -147,11 +178,13 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind
ret <4 x i64> %max
}
; CHECK-LABEL: test256_14
; CHECK: vpcmpled (%rdi){1to8}, %ymm
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind {
; CHECK-LABEL: test256_14:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
%y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
@ -160,11 +193,14 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind
ret <8 x i32> %max
}
; CHECK-LABEL: test256_15
; CHECK: vpcmpgtd (%rdi){1to8}, %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
; CHECK-LABEL: test256_15:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <8 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
@ -175,11 +211,14 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32
ret <8 x i32> %max
}
; CHECK-LABEL: test256_16
; CHECK: vpcmpgtq (%rdi){1to4}, %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
; CHECK-LABEL: test256_16:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1
; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
@ -190,95 +229,114 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64
ret <4 x i64> %max
}
; CHECK-LABEL: test128_1
; CHECK: vpcmpeqq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: test128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp eq <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %max
}
; CHECK-LABEL: test128_2
; CHECK: vpcmpgtq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind {
; CHECK-LABEL: test128_2:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sgt <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
ret <2 x i64> %max
}
; CHECK-LABEL: @test128_3
; CHECK: vpcmpled {{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind {
; CHECK-LABEL: test128_3:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1
; CHECK-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp sge <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
ret <4 x i32> %max
}
; CHECK-LABEL: test128_4
; CHECK: vpcmpnleuq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind {
; CHECK-LABEL: test128_4:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask = icmp ugt <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
ret <2 x i64> %max
}
; CHECK-LABEL: test128_5
; CHECK: vpcmpeqd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind {
; CHECK-LABEL: test128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %yp, align 4
%mask = icmp eq <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_6
; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test128_6:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sgt <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_7
; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test128_7:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sle <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_8
; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test128_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp ule <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_9
; CHECK: vpcmpeqd %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind {
; CHECK-LABEL: test128_9:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp eq <4 x i32> %x1, %y1
%mask0 = icmp eq <4 x i32> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
@ -286,11 +344,14 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32>
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_10
; CHECK: vpcmpleq %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind {
; CHECK-LABEL: test128_10:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1
; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <2 x i64> %x1, %y1
%mask0 = icmp sle <2 x i64> %x, %y
%mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
@ -298,11 +359,14 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64
ret <2 x i64> %max
}
; CHECK-LABEL: @test128_11
; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
; CHECK-LABEL: test128_11:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1
; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sgt <2 x i64> %x1, %y1
%y = load <2 x i64>, <2 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <2 x i64> %x, %y
@ -311,11 +375,14 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2
ret <2 x i64> %max
}
; CHECK-LABEL: @test128_12
; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
; CHECK-LABEL: test128_12:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i32> %x1, %y1
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask0 = icmp ule <4 x i32> %x, %y
@ -324,11 +391,13 @@ define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4
ret <4 x i32> %max
}
; CHECK-LABEL: test128_13
; CHECK: vpcmpeqq (%rdi){1to2}, %xmm
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-LABEL: test128_13:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
%y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
@ -337,11 +406,13 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind
ret <2 x i64> %max
}
; CHECK-LABEL: test128_14
; CHECK: vpcmpled (%rdi){1to4}, %xmm
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind {
; CHECK-LABEL: test128_14:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
%y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
@ -350,11 +421,14 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind
ret <4 x i32> %max
}
; CHECK-LABEL: test128_15
; CHECK: vpcmpgtd (%rdi){1to4}, %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
; CHECK-LABEL: test128_15:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <4 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
@ -365,11 +439,14 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32
ret <4 x i32> %max
}
; CHECK-LABEL: test128_16
; CHECK: vpcmpgtq (%rdi){1to2}, %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
; CHECK-LABEL: test128_16:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1
; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1}
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%mask1 = icmp sge <2 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0