mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86] Enable masked GPR broadcasts to be formed even if the broadcast has more than one use.
This is a cheap instruction. It's better to repeat it than to do two separate operations. There are probably more cases like this, but this one was reported as a regression in our internal benchmarking.
This commit is contained in:
parent
7e98b6c7a5
commit
eecd3ebbe0
@ -1310,12 +1310,14 @@ defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
|
||||
multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
|
||||
X86VectorVTInfo _, SDPatternOperator OpNode,
|
||||
RegisterClass SrcRC> {
|
||||
// Fold with a mask even if it has multiple uses since it is cheap.
|
||||
let ExeDomain = _.ExeDomain in
|
||||
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins SrcRC:$src),
|
||||
"vpbroadcast"#_.Suffix, "$src", "$src",
|
||||
(_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
|
||||
Sched<[SchedRR]>;
|
||||
(_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
|
||||
/*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
|
||||
T8PD, EVEX, Sched<[SchedRR]>;
|
||||
}
|
||||
|
||||
multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
|
||||
@ -1333,11 +1335,12 @@ multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite Sched
|
||||
(!cast<Instruction>(Name#rr)
|
||||
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
|
||||
|
||||
def : Pat <(vselect_mask _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
|
||||
// Fold with a mask even if it has multiple uses since it is cheap.
|
||||
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
|
||||
(!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
|
||||
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
|
||||
|
||||
def : Pat <(vselect_mask _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
|
||||
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
|
||||
(!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
|
||||
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
|
||||
}
|
||||
|
@ -26,102 +26,64 @@ define i16 @unpckbw_test(i16 %a0, i16 %a1) {
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcastd_gpr_512:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x44,0x24,0x01]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcastd_gpr_512:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vpbroadcastd %edi, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xc7]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x4c,0x24,0x01]
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0x44,0x24,0x01]
|
||||
; X86-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
|
||||
; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf]
|
||||
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7]
|
||||
; X64-NEXT: vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7]
|
||||
; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
|
||||
; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
|
||||
%res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
|
||||
%res3 = add <16 x i32> %res, %res1
|
||||
%res4 = add <16 x i32> %res2, %res3
|
||||
ret <16 x i32> %res4
|
||||
}
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_maskz_pbroadcastd_gpr_512(i32 %x0, i16 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_gpr_512:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0x44,0x24,0x01]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_gpr_512:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastd %edi, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xc7]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
|
||||
ret <16 x i32> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcastq_gpr_512:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcastq_gpr_512:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vpbroadcastq %rdi, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc7]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x8c,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
|
||||
; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf]
|
||||
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7]
|
||||
; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
|
||||
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
|
||||
%res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
|
||||
%res3 = add <8 x i64> %res, %res1
|
||||
%res4 = add <8 x i64> %res2, %res3
|
||||
ret <8 x i64> %res4
|
||||
}
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_maskz_pbroadcastq_gpr_512(i64 %x0, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_gpr_512:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_gpr_512:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xc7]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
|
||||
|
||||
|
||||
@ -5303,14 +5265,14 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
|
||||
; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI303_0, kind: FK_Data_4
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI299_0, kind: FK_Data_4
|
||||
; X86-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI303_1, kind: FK_Data_4
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI299_1, kind: FK_Data_4
|
||||
; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
|
||||
; X86-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI303_2, kind: FK_Data_4
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI299_2, kind: FK_Data_4
|
||||
; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
@ -5319,14 +5281,14 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
|
||||
; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI303_0-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI299_0-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI303_1-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI299_1-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
|
||||
; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI303_2-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI299_2-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
|
||||
|
@ -46,99 +46,62 @@ define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_b_gpr_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_b_gpr_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastb %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
|
||||
define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
|
||||
; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf]
|
||||
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7]
|
||||
; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7]
|
||||
; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
|
||||
; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_maskz_pbroadcast_b_gpr_512(i8 %x0, i64 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_b_gpr_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_b_gpr_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res2, %res3
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_w_gpr_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_w_gpr_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
|
||||
define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x4c,0x24,0x02]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
|
||||
; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf]
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7]
|
||||
; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7]
|
||||
; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
|
||||
; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_maskz_pbroadcast_w_gpr_512(i16 %x0, i32 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_w_gpr_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_w_gpr_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
|
||||
%res3 = add <32 x i16> %res, %res1
|
||||
%res4 = add <32 x i16> %res2, %res3
|
||||
ret <32 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
|
||||
|
||||
|
@ -4,197 +4,127 @@
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_b_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_b_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x4c,0x24,0x04]
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqu8 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
|
||||
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf]
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
|
||||
; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7]
|
||||
; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
|
||||
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_maskz_pbroadcast_b_gpr_128(i8 %x0, i16 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_b_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_b_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
|
||||
ret <16 x i8> %res
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res2, %res3
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_w_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_w_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x4c,0x24,0x04]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
|
||||
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf]
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
|
||||
; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7]
|
||||
; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
|
||||
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
|
||||
%res3 = add <8 x i16> %res, %res1
|
||||
%res4 = add <8 x i16> %res2, %res3
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_maskz_pbroadcast_w_gpr_128(i16 %x0, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_w_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_w_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_b_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_b_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastb %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x4c,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqu8 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
|
||||
; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf]
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
|
||||
; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7]
|
||||
; X64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
|
||||
; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res2, %res3
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_maskz_pbroadcast_b_gpr_256(i8 %x0, i32 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_b_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_b_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_w_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_w_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x4c,0x24,0x04]
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
|
||||
; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf]
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
|
||||
; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7]
|
||||
; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
|
||||
; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_maskz_pbroadcast_w_gpr_256(i16 %x0, i16 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_w_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0x44,0x24,0x02]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_w_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
|
||||
%res3 = add <16 x i16> %res, %res1
|
||||
%res4 = add <16 x i16> %res2, %res3
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
|
||||
|
@ -4,204 +4,131 @@
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_d_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_d_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x4c,0x24,0x04]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0x44,0x24,0x01]
|
||||
; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqa32 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
|
||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf]
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7]
|
||||
; X64-NEXT: vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7]
|
||||
; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
|
||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
|
||||
%res3 = add <4 x i32> %res, %res1
|
||||
%res4 = add <4 x i32> %res2, %res3
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_maskz_pbroadcast_d_gpr_128(i32 %x0, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_d_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0x44,0x24,0x01]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_d_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastd %edi, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_q_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovddup {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x44,0x24,0x04]
|
||||
; X86-NEXT: # xmm0 = mem[0,0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_q_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x4c,0x24,0x04]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf]
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7]
|
||||
; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
|
||||
%res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
|
||||
%res3 = add <2 x i64> %res, %res1
|
||||
%res4 = add <2 x i64> %res2, %res3
|
||||
ret <2 x i64> %res4
|
||||
}
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_maskz_pbroadcast_q_gpr_128(i64 %x0, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_q_gpr_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_q_gpr_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_d_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_d_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastd %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x4c,0x24,0x04]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0x44,0x24,0x01]
|
||||
; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqa32 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
|
||||
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf]
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7]
|
||||
; X64-NEXT: vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7]
|
||||
; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_maskz_pbroadcast_d_gpr_256(i32 %x0, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_d_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0x44,0x24,0x01]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_d_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastd %edi, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
|
||||
ret <8 x i32> %res
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
|
||||
%res3 = add <8 x i32> %res, %res1
|
||||
%res4 = add <8 x i32> %res2, %res3
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_pbroadcast_q_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x44,0x24,0x04]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_pbroadcast_q_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastq %rdi, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x4c,0x24,0x04]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc1]
|
||||
; X86-NEXT: vmovdqa64 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xd1]
|
||||
; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
|
||||
; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf]
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7]
|
||||
; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
|
||||
%res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
|
||||
%res3 = add <4 x i64> %res, %res1
|
||||
%res4 = add <4 x i64> %res2, %res3
|
||||
ret <4 x i64> %res4
|
||||
}
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_maskz_pbroadcast_q_gpr_256(i64 %x0, i8 %mask) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_pbroadcast_q_gpr_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0x84,0x24,0x04,0x00,0x00,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_pbroadcast_q_gpr_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xc7]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user