mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ScalarizeMaskedMemIntrin] Bitcast the mask to the scalar domain and use scalar bit tests for the branches.
X86 at least is able to use movmsk or kmov to move the mask to the scalar domain. Then we can just use test instructions to test individual bits. This is more efficient than extracting each mask element individually. I special cased v1i1 to use the previous behavior. This avoids poor type legalization of bitcast of v1i1 to i1. I've skipped expandload/compressstore as I think we need to handle constant masks for those better first. Many tests end up with duplicate test instructions due to tail duplication in the branch folding pass. But the same thing happens when constructing similar code in C. So its not unique to the scalarization. Not sure if this lowering code will also be good for other targets, but we're only testing X86 today. Differential Revision: https://reviews.llvm.org/D65319 llvm-svn: 367489
This commit is contained in:
parent
f234df8098
commit
4b86c5ad65
@ -173,15 +173,30 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the mask is not v1i1, use scalar bit test operations. This generates
|
||||
// better results on X86 at least.
|
||||
Value *SclrMask;
|
||||
if (VectorWidth != 1) {
|
||||
Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
|
||||
SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
|
||||
}
|
||||
|
||||
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
|
||||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
|
||||
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
|
||||
// %mask_1 = and i16 %scalar_mask, i32 1 << Idx
|
||||
// %cond = icmp ne i16 %mask_1, 0
|
||||
// br i1 %mask_1, label %cond.load, label %else
|
||||
//
|
||||
|
||||
Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
|
||||
Value *Predicate;
|
||||
if (VectorWidth != 1) {
|
||||
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
|
||||
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
|
||||
Builder.getIntN(VectorWidth, 0));
|
||||
} else {
|
||||
Predicate = Builder.CreateExtractElement(Mask, Idx);
|
||||
}
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
@ -290,13 +305,29 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the mask is not v1i1, use scalar bit test operations. This generates
|
||||
// better results on X86 at least.
|
||||
Value *SclrMask;
|
||||
if (VectorWidth != 1) {
|
||||
Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
|
||||
SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
|
||||
}
|
||||
|
||||
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
|
||||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
|
||||
// %mask_1 = and i16 %scalar_mask, i32 1 << Idx
|
||||
// %cond = icmp ne i16 %mask_1, 0
|
||||
// br i1 %mask_1, label %cond.store, label %else
|
||||
//
|
||||
Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
|
||||
Value *Predicate;
|
||||
if (VectorWidth != 1) {
|
||||
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
|
||||
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
|
||||
Builder.getIntN(VectorWidth, 0));
|
||||
} else {
|
||||
Predicate = Builder.CreateExtractElement(Mask, Idx);
|
||||
}
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
@ -392,15 +423,30 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the mask is not v1i1, use scalar bit test operations. This generates
|
||||
// better results on X86 at least.
|
||||
Value *SclrMask;
|
||||
if (VectorWidth != 1) {
|
||||
Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
|
||||
SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
|
||||
}
|
||||
|
||||
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
|
||||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
|
||||
// %Mask1 = and i16 %scalar_mask, i32 1 << Idx
|
||||
// %cond = icmp ne i16 %mask_1, 0
|
||||
// br i1 %Mask1, label %cond.load, label %else
|
||||
//
|
||||
|
||||
Value *Predicate =
|
||||
Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
|
||||
Value *Predicate;
|
||||
if (VectorWidth != 1) {
|
||||
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
|
||||
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
|
||||
Builder.getIntN(VectorWidth, 0));
|
||||
} else {
|
||||
Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
|
||||
}
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
@ -499,14 +545,29 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the mask is not v1i1, use scalar bit test operations. This generates
|
||||
// better results on X86 at least.
|
||||
Value *SclrMask;
|
||||
if (VectorWidth != 1) {
|
||||
Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
|
||||
SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
|
||||
}
|
||||
|
||||
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
|
||||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
|
||||
// %Mask1 = and i16 %scalar_mask, i32 1 << Idx
|
||||
// %cond = icmp ne i16 %mask_1, 0
|
||||
// br i1 %Mask1, label %cond.store, label %else
|
||||
//
|
||||
Value *Predicate =
|
||||
Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
|
||||
Value *Predicate;
|
||||
if (VectorWidth != 1) {
|
||||
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
|
||||
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
|
||||
Builder.getIntN(VectorWidth, 0));
|
||||
} else {
|
||||
Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
|
||||
}
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
|
@ -31,22 +31,26 @@ define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i3
|
||||
; NOGATHER-LABEL: masked_gather_v2i32:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB0_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB0_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskpd %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: jne .LBB0_1
|
||||
; NOGATHER-NEXT: # %bb.2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: jne .LBB0_3
|
||||
; NOGATHER-NEXT: .LBB0_4: # %else2
|
||||
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB0_1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: movl (%rcx), %ecx
|
||||
; NOGATHER-NEXT: vpinsrq $0, %rcx, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB0_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: .LBB0_3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB0_4: # %else2
|
||||
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -80,22 +84,26 @@ define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks,
|
||||
; NOGATHER-LABEL: masked_gather_v2i32_concat:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB1_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB1_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskpd %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: jne .LBB1_1
|
||||
; NOGATHER-NEXT: # %bb.2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: jne .LBB1_3
|
||||
; NOGATHER-NEXT: .LBB1_4: # %else2
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB1_1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: movl (%rcx), %ecx
|
||||
; NOGATHER-NEXT: vpinsrq $0, %rcx, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB1_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: .LBB1_3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB1_4: # %else2
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -130,21 +138,25 @@ define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <
|
||||
; NOGATHER-LABEL: masked_gather_v2float:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB2_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB2_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskpd %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: jne .LBB2_1
|
||||
; NOGATHER-NEXT: # %bb.2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: jne .LBB2_3
|
||||
; NOGATHER-NEXT: .LBB2_4: # %else2
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB2_1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB2_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: .LBB2_3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: .LBB2_4: # %else2
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -176,21 +188,25 @@ define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %m
|
||||
; NOGATHER-LABEL: masked_gather_v2float_concat:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB3_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB3_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskpd %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: jne .LBB3_1
|
||||
; NOGATHER-NEXT: # %bb.2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: jne .LBB3_3
|
||||
; NOGATHER-NEXT: .LBB3_4: # %else2
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB3_1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB3_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: .LBB3_3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: .LBB3_4: # %else2
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -221,35 +237,38 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3
|
||||
;
|
||||
; NOGATHER-LABEL: masked_gather_v4i32:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: vmovmskps %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB4_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB4_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB4_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB4_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB4_7
|
||||
; NOGATHER-NEXT: .LBB4_8: # %else8
|
||||
; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vzeroupper
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB4_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB4_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: .LBB4_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_8: # %else8
|
||||
; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vzeroupper
|
||||
; NOGATHER-NEXT: retq
|
||||
@ -278,36 +297,39 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <
|
||||
;
|
||||
; NOGATHER-LABEL: masked_gather_v4float:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: vmovmskps %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB5_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB5_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB5_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; NOGATHER-NEXT: .LBB5_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB5_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rax
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB5_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB5_7
|
||||
; NOGATHER-NEXT: .LBB5_8: # %else8
|
||||
; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vzeroupper
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB5_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; NOGATHER-NEXT: .LBB5_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB5_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: .LBB5_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: .LBB5_8: # %else8
|
||||
; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vzeroupper
|
||||
; NOGATHER-NEXT: retq
|
||||
@ -347,76 +369,82 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpmovmskb %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB6_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm0
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_7
|
||||
; NOGATHER-NEXT: .LBB6_8: # %else8
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_10
|
||||
; NOGATHER-NEXT: # %bb.9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_9
|
||||
; NOGATHER-NEXT: .LBB6_10: # %else11
|
||||
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: je .LBB6_12
|
||||
; NOGATHER-NEXT: # %bb.11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_12: # %else14
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_14
|
||||
; NOGATHER-NEXT: # %bb.13: # %cond.load16
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_14: # %else17
|
||||
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: testb $64, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_13
|
||||
; NOGATHER-NEXT: # %bb.14: # %else17
|
||||
; NOGATHER-NEXT: testb $-128, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_15
|
||||
; NOGATHER-NEXT: .LBB6_16: # %else20
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB6_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB6_8
|
||||
; NOGATHER-NEXT: .LBB6_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: je .LBB6_10
|
||||
; NOGATHER-NEXT: .LBB6_9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: jne .LBB6_11
|
||||
; NOGATHER-NEXT: jmp .LBB6_12
|
||||
; NOGATHER-NEXT: .LBB6_13: # %cond.load16
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $-128, %al
|
||||
; NOGATHER-NEXT: je .LBB6_16
|
||||
; NOGATHER-NEXT: # %bb.15: # %cond.load19
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: .LBB6_15: # %cond.load19
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_16: # %else20
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -456,77 +484,83 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpmovmskb %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB7_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm0
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_7
|
||||
; NOGATHER-NEXT: .LBB7_8: # %else8
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_10
|
||||
; NOGATHER-NEXT: # %bb.9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_9
|
||||
; NOGATHER-NEXT: .LBB7_10: # %else11
|
||||
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: je .LBB7_12
|
||||
; NOGATHER-NEXT: # %bb.11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_12: # %else14
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_14
|
||||
; NOGATHER-NEXT: # %bb.13: # %cond.load16
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_14: # %else17
|
||||
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: testb $64, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_13
|
||||
; NOGATHER-NEXT: # %bb.14: # %else17
|
||||
; NOGATHER-NEXT: testb $-128, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_15
|
||||
; NOGATHER-NEXT: .LBB7_16: # %else20
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB7_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB7_8
|
||||
; NOGATHER-NEXT: .LBB7_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: testb $16, %al
|
||||
; NOGATHER-NEXT: je .LBB7_10
|
||||
; NOGATHER-NEXT: .LBB7_9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $32, %al
|
||||
; NOGATHER-NEXT: jne .LBB7_11
|
||||
; NOGATHER-NEXT: jmp .LBB7_12
|
||||
; NOGATHER-NEXT: .LBB7_13: # %cond.load16
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $-128, %al
|
||||
; NOGATHER-NEXT: je .LBB7_16
|
||||
; NOGATHER-NEXT: # %bb.15: # %cond.load19
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: .LBB7_15: # %cond.load19
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_16: # %else20
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -560,41 +594,43 @@ define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i6
|
||||
; NOGATHER-LABEL: masked_gather_v4i64:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskps %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB8_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB8_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB8_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rcx), %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB8_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB8_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB8_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB8_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB8_7
|
||||
; NOGATHER-NEXT: .LBB8_8: # %else8
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB8_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB8_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: .LBB8_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB8_8: # %else8
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -628,41 +664,43 @@ define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks
|
||||
; NOGATHER-LABEL: masked_gather_v4double:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskps %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB9_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB9_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB9_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovhps {{.*#+}} xmm3 = xmm1[0,1],mem[0,1]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vmovhps {{.*#+}} xmm0 = xmm1[0,1],mem[0,1]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB9_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB9_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vmovlps {{.*#+}} xmm3 = mem[0,1],xmm3[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB9_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: testb $4, %al
|
||||
; NOGATHER-NEXT: jne .LBB9_5
|
||||
; NOGATHER-NEXT: # %bb.6: # %else5
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: jne .LBB9_7
|
||||
; NOGATHER-NEXT: .LBB9_8: # %else8
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB9_5: # %cond.load4
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rcx
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; NOGATHER-NEXT: vmovlps {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: testb $8, %al
|
||||
; NOGATHER-NEXT: je .LBB9_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: .LBB9_7: # %cond.load7
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB9_8: # %else8
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -694,20 +732,24 @@ define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i6
|
||||
; NOGATHER-LABEL: masked_gather_v2i64:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB10_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB10_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskpd %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: jne .LBB10_1
|
||||
; NOGATHER-NEXT: # %bb.2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: jne .LBB10_3
|
||||
; NOGATHER-NEXT: .LBB10_4: # %else2
|
||||
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB10_1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB10_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: .LBB10_3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB10_4: # %else2
|
||||
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
@ -739,20 +781,24 @@ define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks
|
||||
; NOGATHER-LABEL: masked_gather_v2double:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vmovmskpd %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB11_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: jne .LBB11_1
|
||||
; NOGATHER-NEXT: # %bb.2: # %else
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: jne .LBB11_3
|
||||
; NOGATHER-NEXT: .LBB11_4: # %else2
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
; NOGATHER-NEXT: .LBB11_1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rcx
|
||||
; NOGATHER-NEXT: vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; NOGATHER-NEXT: .LBB11_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: testb $2, %al
|
||||
; NOGATHER-NEXT: je .LBB11_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: .LBB11_3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
|
||||
; NOGATHER-NEXT: .LBB11_4: # %else2
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
|
@ -158,13 +158,12 @@ define <16 x half> @test_mask_load_16xf16(<16 x i1> %mask, <16 x half>* %addr, <
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovb2m %xmm0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, %ecx
|
||||
; CHECK-NEXT: vpmovmskb %xmm0, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_1
|
||||
; CHECK-NEXT: ## %bb.2: ## %cond.load
|
||||
; CHECK-NEXT: movswl (%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: movswl (%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm8
|
||||
; CHECK-NEXT: jmp LBB12_3
|
||||
; CHECK-NEXT: LBB12_1:
|
||||
@ -172,13 +171,11 @@ define <16 x half> @test_mask_load_16xf16(<16 x i1> %mask, <16 x half>* %addr, <
|
||||
; CHECK-NEXT: LBB12_3: ## %else
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vxorps %xmm9, %xmm9, %xmm9
|
||||
; CHECK-NEXT: kshiftrw $1, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: testb $2, %cl
|
||||
; CHECK-NEXT: je LBB12_4
|
||||
; CHECK-NEXT: ## %bb.5: ## %cond.load1
|
||||
; CHECK-NEXT: movswl 2(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: movswl 2(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm7
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm6
|
||||
@ -193,7 +190,9 @@ define <16 x half> @test_mask_load_16xf16(<16 x i1> %mask, <16 x half>* %addr, <
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm11
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm10
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm2
|
||||
; CHECK-NEXT: jmp LBB12_6
|
||||
; CHECK-NEXT: testb $4, %cl
|
||||
; CHECK-NEXT: jne LBB12_7
|
||||
; CHECK-NEXT: jmp LBB12_8
|
||||
; CHECK-NEXT: LBB12_4:
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm7
|
||||
@ -208,129 +207,52 @@ define <16 x half> @test_mask_load_16xf16(<16 x i1> %mask, <16 x half>* %addr, <
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm12
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm11
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm10
|
||||
; CHECK-NEXT: LBB12_6: ## %else2
|
||||
; CHECK-NEXT: kshiftrw $2, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: testb $4, %cl
|
||||
; CHECK-NEXT: je LBB12_8
|
||||
; CHECK-NEXT: ## %bb.7: ## %cond.load4
|
||||
; CHECK-NEXT: movswl 4(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: LBB12_7: ## %cond.load4
|
||||
; CHECK-NEXT: movswl 4(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm1
|
||||
; CHECK-NEXT: LBB12_8: ## %else5
|
||||
; CHECK-NEXT: kshiftrw $3, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_10
|
||||
; CHECK-NEXT: ## %bb.9: ## %cond.load7
|
||||
; CHECK-NEXT: movswl 6(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm7
|
||||
; CHECK-NEXT: LBB12_10: ## %else8
|
||||
; CHECK-NEXT: kshiftrw $4, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_12
|
||||
; CHECK-NEXT: ## %bb.11: ## %cond.load10
|
||||
; CHECK-NEXT: movswl 8(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm6
|
||||
; CHECK-NEXT: testb $8, %cl
|
||||
; CHECK-NEXT: jne LBB12_9
|
||||
; CHECK-NEXT: ## %bb.10: ## %else8
|
||||
; CHECK-NEXT: testb $16, %cl
|
||||
; CHECK-NEXT: jne LBB12_11
|
||||
; CHECK-NEXT: LBB12_12: ## %else11
|
||||
; CHECK-NEXT: kshiftrw $5, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_14
|
||||
; CHECK-NEXT: ## %bb.13: ## %cond.load13
|
||||
; CHECK-NEXT: movswl 10(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm5
|
||||
; CHECK-NEXT: testb $32, %cl
|
||||
; CHECK-NEXT: jne LBB12_13
|
||||
; CHECK-NEXT: LBB12_14: ## %else14
|
||||
; CHECK-NEXT: kshiftrw $6, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_16
|
||||
; CHECK-NEXT: ## %bb.15: ## %cond.load16
|
||||
; CHECK-NEXT: movswl 12(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm4
|
||||
; CHECK-NEXT: testb $64, %cl
|
||||
; CHECK-NEXT: jne LBB12_15
|
||||
; CHECK-NEXT: LBB12_16: ## %else17
|
||||
; CHECK-NEXT: kshiftrw $7, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_18
|
||||
; CHECK-NEXT: ## %bb.17: ## %cond.load19
|
||||
; CHECK-NEXT: movswl 14(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm3
|
||||
; CHECK-NEXT: testb $-128, %cl
|
||||
; CHECK-NEXT: jne LBB12_17
|
||||
; CHECK-NEXT: LBB12_18: ## %else20
|
||||
; CHECK-NEXT: kshiftrw $8, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_20
|
||||
; CHECK-NEXT: ## %bb.19: ## %cond.load22
|
||||
; CHECK-NEXT: movswl 16(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm16
|
||||
; CHECK-NEXT: testl $256, %ecx ## imm = 0x100
|
||||
; CHECK-NEXT: jne LBB12_19
|
||||
; CHECK-NEXT: LBB12_20: ## %else23
|
||||
; CHECK-NEXT: kshiftrw $9, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_22
|
||||
; CHECK-NEXT: ## %bb.21: ## %cond.load25
|
||||
; CHECK-NEXT: movswl 18(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm15
|
||||
; CHECK-NEXT: testl $512, %ecx ## imm = 0x200
|
||||
; CHECK-NEXT: jne LBB12_21
|
||||
; CHECK-NEXT: LBB12_22: ## %else26
|
||||
; CHECK-NEXT: kshiftrw $10, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_24
|
||||
; CHECK-NEXT: ## %bb.23: ## %cond.load28
|
||||
; CHECK-NEXT: movswl 20(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm14
|
||||
; CHECK-NEXT: testl $1024, %ecx ## imm = 0x400
|
||||
; CHECK-NEXT: jne LBB12_23
|
||||
; CHECK-NEXT: LBB12_24: ## %else29
|
||||
; CHECK-NEXT: kshiftrw $11, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_26
|
||||
; CHECK-NEXT: ## %bb.25: ## %cond.load31
|
||||
; CHECK-NEXT: movswl 22(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm13
|
||||
; CHECK-NEXT: testl $2048, %ecx ## imm = 0x800
|
||||
; CHECK-NEXT: jne LBB12_25
|
||||
; CHECK-NEXT: LBB12_26: ## %else32
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_28
|
||||
; CHECK-NEXT: ## %bb.27: ## %cond.load34
|
||||
; CHECK-NEXT: movswl 24(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm12
|
||||
; CHECK-NEXT: testl $4096, %ecx ## imm = 0x1000
|
||||
; CHECK-NEXT: jne LBB12_27
|
||||
; CHECK-NEXT: LBB12_28: ## %else35
|
||||
; CHECK-NEXT: kshiftrw $13, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_30
|
||||
; CHECK-NEXT: ## %bb.29: ## %cond.load37
|
||||
; CHECK-NEXT: movswl 26(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm11
|
||||
; CHECK-NEXT: testl $8192, %ecx ## imm = 0x2000
|
||||
; CHECK-NEXT: jne LBB12_29
|
||||
; CHECK-NEXT: LBB12_30: ## %else38
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: je LBB12_32
|
||||
; CHECK-NEXT: ## %bb.31: ## %cond.load40
|
||||
; CHECK-NEXT: movswl 28(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm10
|
||||
; CHECK-NEXT: testl $16384, %ecx ## imm = 0x4000
|
||||
; CHECK-NEXT: jne LBB12_31
|
||||
; CHECK-NEXT: LBB12_32: ## %else41
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: testl $32768, %ecx ## imm = 0x8000
|
||||
; CHECK-NEXT: je LBB12_34
|
||||
; CHECK-NEXT: ## %bb.33: ## %cond.load43
|
||||
; CHECK-NEXT: LBB12_33: ## %cond.load43
|
||||
; CHECK-NEXT: movswl 30(%rsi), %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm9
|
||||
@ -384,6 +306,79 @@ define <16 x half> @test_mask_load_16xf16(<16 x i1> %mask, <16 x half>* %addr, <
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 30(%rax)
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: LBB12_9: ## %cond.load7
|
||||
; CHECK-NEXT: movswl 6(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm7
|
||||
; CHECK-NEXT: testb $16, %cl
|
||||
; CHECK-NEXT: je LBB12_12
|
||||
; CHECK-NEXT: LBB12_11: ## %cond.load10
|
||||
; CHECK-NEXT: movswl 8(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm6
|
||||
; CHECK-NEXT: testb $32, %cl
|
||||
; CHECK-NEXT: je LBB12_14
|
||||
; CHECK-NEXT: LBB12_13: ## %cond.load13
|
||||
; CHECK-NEXT: movswl 10(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm5
|
||||
; CHECK-NEXT: testb $64, %cl
|
||||
; CHECK-NEXT: je LBB12_16
|
||||
; CHECK-NEXT: LBB12_15: ## %cond.load16
|
||||
; CHECK-NEXT: movswl 12(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm4
|
||||
; CHECK-NEXT: testb $-128, %cl
|
||||
; CHECK-NEXT: je LBB12_18
|
||||
; CHECK-NEXT: LBB12_17: ## %cond.load19
|
||||
; CHECK-NEXT: movswl 14(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm3
|
||||
; CHECK-NEXT: testl $256, %ecx ## imm = 0x100
|
||||
; CHECK-NEXT: je LBB12_20
|
||||
; CHECK-NEXT: LBB12_19: ## %cond.load22
|
||||
; CHECK-NEXT: movswl 16(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm16
|
||||
; CHECK-NEXT: testl $512, %ecx ## imm = 0x200
|
||||
; CHECK-NEXT: je LBB12_22
|
||||
; CHECK-NEXT: LBB12_21: ## %cond.load25
|
||||
; CHECK-NEXT: movswl 18(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm15
|
||||
; CHECK-NEXT: testl $1024, %ecx ## imm = 0x400
|
||||
; CHECK-NEXT: je LBB12_24
|
||||
; CHECK-NEXT: LBB12_23: ## %cond.load28
|
||||
; CHECK-NEXT: movswl 20(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm14
|
||||
; CHECK-NEXT: testl $2048, %ecx ## imm = 0x800
|
||||
; CHECK-NEXT: je LBB12_26
|
||||
; CHECK-NEXT: LBB12_25: ## %cond.load31
|
||||
; CHECK-NEXT: movswl 22(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm13
|
||||
; CHECK-NEXT: testl $4096, %ecx ## imm = 0x1000
|
||||
; CHECK-NEXT: je LBB12_28
|
||||
; CHECK-NEXT: LBB12_27: ## %cond.load34
|
||||
; CHECK-NEXT: movswl 24(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm12
|
||||
; CHECK-NEXT: testl $8192, %ecx ## imm = 0x2000
|
||||
; CHECK-NEXT: je LBB12_30
|
||||
; CHECK-NEXT: LBB12_29: ## %cond.load37
|
||||
; CHECK-NEXT: movswl 26(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm11
|
||||
; CHECK-NEXT: testl $16384, %ecx ## imm = 0x4000
|
||||
; CHECK-NEXT: je LBB12_32
|
||||
; CHECK-NEXT: LBB12_31: ## %cond.load40
|
||||
; CHECK-NEXT: movswl 28(%rsi), %edx
|
||||
; CHECK-NEXT: vmovd %edx, %xmm0
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm10
|
||||
; CHECK-NEXT: testl $32768, %ecx ## imm = 0x8000
|
||||
; CHECK-NEXT: jne LBB12_33
|
||||
; CHECK-NEXT: jmp LBB12_34
|
||||
%res = call <16 x half> @llvm.masked.load.v16f16(<16 x half>* %addr, i32 4, <16 x i1>%mask, <16 x half> zeroinitializer)
|
||||
ret <16 x half> %res
|
||||
}
|
||||
@ -394,159 +389,159 @@ define void @test_mask_store_16xf16(<16 x i1> %mask, <16 x half>* %addr, <16 x h
|
||||
; CHECK-LABEL: test_mask_store_16xf16:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovb2m %xmm0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, %eax
|
||||
; CHECK-NEXT: vpmovmskb %xmm0, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_2
|
||||
; CHECK-NEXT: ## %bb.1: ## %cond.store
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, (%rdi)
|
||||
; CHECK-NEXT: LBB13_2: ## %else
|
||||
; CHECK-NEXT: kshiftrw $1, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_4
|
||||
; CHECK-NEXT: ## %bb.3: ## %cond.store1
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm2, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 2(%rdi)
|
||||
; CHECK-NEXT: jne LBB13_1
|
||||
; CHECK-NEXT: ## %bb.2: ## %else
|
||||
; CHECK-NEXT: testb $2, %al
|
||||
; CHECK-NEXT: jne LBB13_3
|
||||
; CHECK-NEXT: LBB13_4: ## %else2
|
||||
; CHECK-NEXT: kshiftrw $2, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_6
|
||||
; CHECK-NEXT: ## %bb.5: ## %cond.store3
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm3, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 4(%rdi)
|
||||
; CHECK-NEXT: testb $4, %al
|
||||
; CHECK-NEXT: jne LBB13_5
|
||||
; CHECK-NEXT: LBB13_6: ## %else4
|
||||
; CHECK-NEXT: kshiftrw $3, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_8
|
||||
; CHECK-NEXT: ## %bb.7: ## %cond.store5
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm4, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 6(%rdi)
|
||||
; CHECK-NEXT: testb $8, %al
|
||||
; CHECK-NEXT: jne LBB13_7
|
||||
; CHECK-NEXT: LBB13_8: ## %else6
|
||||
; CHECK-NEXT: kshiftrw $4, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_10
|
||||
; CHECK-NEXT: ## %bb.9: ## %cond.store7
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm5, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 8(%rdi)
|
||||
; CHECK-NEXT: testb $16, %al
|
||||
; CHECK-NEXT: jne LBB13_9
|
||||
; CHECK-NEXT: LBB13_10: ## %else8
|
||||
; CHECK-NEXT: kshiftrw $5, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_12
|
||||
; CHECK-NEXT: ## %bb.11: ## %cond.store9
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm6, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 10(%rdi)
|
||||
; CHECK-NEXT: testb $32, %al
|
||||
; CHECK-NEXT: jne LBB13_11
|
||||
; CHECK-NEXT: LBB13_12: ## %else10
|
||||
; CHECK-NEXT: kshiftrw $6, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_14
|
||||
; CHECK-NEXT: ## %bb.13: ## %cond.store11
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm7, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 12(%rdi)
|
||||
; CHECK-NEXT: testb $64, %al
|
||||
; CHECK-NEXT: jne LBB13_13
|
||||
; CHECK-NEXT: LBB13_14: ## %else12
|
||||
; CHECK-NEXT: kshiftrw $7, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_16
|
||||
; CHECK-NEXT: ## %bb.15: ## %cond.store13
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 14(%rdi)
|
||||
; CHECK-NEXT: testb $-128, %al
|
||||
; CHECK-NEXT: jne LBB13_15
|
||||
; CHECK-NEXT: LBB13_16: ## %else14
|
||||
; CHECK-NEXT: kshiftrw $8, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_18
|
||||
; CHECK-NEXT: ## %bb.17: ## %cond.store15
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 16(%rdi)
|
||||
; CHECK-NEXT: testl $256, %eax ## imm = 0x100
|
||||
; CHECK-NEXT: jne LBB13_17
|
||||
; CHECK-NEXT: LBB13_18: ## %else16
|
||||
; CHECK-NEXT: kshiftrw $9, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_20
|
||||
; CHECK-NEXT: ## %bb.19: ## %cond.store17
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 18(%rdi)
|
||||
; CHECK-NEXT: testl $512, %eax ## imm = 0x200
|
||||
; CHECK-NEXT: jne LBB13_19
|
||||
; CHECK-NEXT: LBB13_20: ## %else18
|
||||
; CHECK-NEXT: kshiftrw $10, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_22
|
||||
; CHECK-NEXT: ## %bb.21: ## %cond.store19
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 20(%rdi)
|
||||
; CHECK-NEXT: testl $1024, %eax ## imm = 0x400
|
||||
; CHECK-NEXT: jne LBB13_21
|
||||
; CHECK-NEXT: LBB13_22: ## %else20
|
||||
; CHECK-NEXT: kshiftrw $11, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_24
|
||||
; CHECK-NEXT: ## %bb.23: ## %cond.store21
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 22(%rdi)
|
||||
; CHECK-NEXT: testl $2048, %eax ## imm = 0x800
|
||||
; CHECK-NEXT: jne LBB13_23
|
||||
; CHECK-NEXT: LBB13_24: ## %else22
|
||||
; CHECK-NEXT: kshiftrw $12, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_26
|
||||
; CHECK-NEXT: ## %bb.25: ## %cond.store23
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 24(%rdi)
|
||||
; CHECK-NEXT: testl $4096, %eax ## imm = 0x1000
|
||||
; CHECK-NEXT: jne LBB13_25
|
||||
; CHECK-NEXT: LBB13_26: ## %else24
|
||||
; CHECK-NEXT: kshiftrw $13, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_28
|
||||
; CHECK-NEXT: ## %bb.27: ## %cond.store25
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 26(%rdi)
|
||||
; CHECK-NEXT: testl $8192, %eax ## imm = 0x2000
|
||||
; CHECK-NEXT: jne LBB13_27
|
||||
; CHECK-NEXT: LBB13_28: ## %else26
|
||||
; CHECK-NEXT: kshiftrw $14, %k0, %k1
|
||||
; CHECK-NEXT: kmovd %k1, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: je LBB13_30
|
||||
; CHECK-NEXT: ## %bb.29: ## %cond.store27
|
||||
; CHECK-NEXT: testl $16384, %eax ## imm = 0x4000
|
||||
; CHECK-NEXT: jne LBB13_29
|
||||
; CHECK-NEXT: LBB13_30: ## %else28
|
||||
; CHECK-NEXT: testl $32768, %eax ## imm = 0x8000
|
||||
; CHECK-NEXT: jne LBB13_31
|
||||
; CHECK-NEXT: LBB13_32: ## %else30
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: LBB13_1: ## %cond.store
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, (%rdi)
|
||||
; CHECK-NEXT: testb $2, %al
|
||||
; CHECK-NEXT: je LBB13_4
|
||||
; CHECK-NEXT: LBB13_3: ## %cond.store1
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm2, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 2(%rdi)
|
||||
; CHECK-NEXT: testb $4, %al
|
||||
; CHECK-NEXT: je LBB13_6
|
||||
; CHECK-NEXT: LBB13_5: ## %cond.store3
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm3, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 4(%rdi)
|
||||
; CHECK-NEXT: testb $8, %al
|
||||
; CHECK-NEXT: je LBB13_8
|
||||
; CHECK-NEXT: LBB13_7: ## %cond.store5
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm4, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 6(%rdi)
|
||||
; CHECK-NEXT: testb $16, %al
|
||||
; CHECK-NEXT: je LBB13_10
|
||||
; CHECK-NEXT: LBB13_9: ## %cond.store7
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm5, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 8(%rdi)
|
||||
; CHECK-NEXT: testb $32, %al
|
||||
; CHECK-NEXT: je LBB13_12
|
||||
; CHECK-NEXT: LBB13_11: ## %cond.store9
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm6, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 10(%rdi)
|
||||
; CHECK-NEXT: testb $64, %al
|
||||
; CHECK-NEXT: je LBB13_14
|
||||
; CHECK-NEXT: LBB13_13: ## %cond.store11
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm7, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 12(%rdi)
|
||||
; CHECK-NEXT: testb $-128, %al
|
||||
; CHECK-NEXT: je LBB13_16
|
||||
; CHECK-NEXT: LBB13_15: ## %cond.store13
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 28(%rdi)
|
||||
; CHECK-NEXT: LBB13_30: ## %else28
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, %eax
|
||||
; CHECK-NEXT: testb $1, %al
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 14(%rdi)
|
||||
; CHECK-NEXT: testl $256, %eax ## imm = 0x100
|
||||
; CHECK-NEXT: je LBB13_18
|
||||
; CHECK-NEXT: LBB13_17: ## %cond.store15
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 16(%rdi)
|
||||
; CHECK-NEXT: testl $512, %eax ## imm = 0x200
|
||||
; CHECK-NEXT: je LBB13_20
|
||||
; CHECK-NEXT: LBB13_19: ## %cond.store17
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 18(%rdi)
|
||||
; CHECK-NEXT: testl $1024, %eax ## imm = 0x400
|
||||
; CHECK-NEXT: je LBB13_22
|
||||
; CHECK-NEXT: LBB13_21: ## %cond.store19
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 20(%rdi)
|
||||
; CHECK-NEXT: testl $2048, %eax ## imm = 0x800
|
||||
; CHECK-NEXT: je LBB13_24
|
||||
; CHECK-NEXT: LBB13_23: ## %cond.store21
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 22(%rdi)
|
||||
; CHECK-NEXT: testl $4096, %eax ## imm = 0x1000
|
||||
; CHECK-NEXT: je LBB13_26
|
||||
; CHECK-NEXT: LBB13_25: ## %cond.store23
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 24(%rdi)
|
||||
; CHECK-NEXT: testl $8192, %eax ## imm = 0x2000
|
||||
; CHECK-NEXT: je LBB13_28
|
||||
; CHECK-NEXT: LBB13_27: ## %cond.store25
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 26(%rdi)
|
||||
; CHECK-NEXT: testl $16384, %eax ## imm = 0x4000
|
||||
; CHECK-NEXT: je LBB13_30
|
||||
; CHECK-NEXT: LBB13_29: ## %cond.store27
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %ecx
|
||||
; CHECK-NEXT: movw %cx, 28(%rdi)
|
||||
; CHECK-NEXT: testl $32768, %eax ## imm = 0x8000
|
||||
; CHECK-NEXT: je LBB13_32
|
||||
; CHECK-NEXT: ## %bb.31: ## %cond.store29
|
||||
; CHECK-NEXT: LBB13_31: ## %cond.store29
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %xmm0, %eax
|
||||
; CHECK-NEXT: movw %ax, 30(%rdi)
|
||||
; CHECK-NEXT: LBB13_32: ## %else30
|
||||
; CHECK-NEXT: retq
|
||||
call void @llvm.masked.store.v16f16.p0v16f16(<16 x half> %val, <16 x half>* %addr, i32 4, <16 x i1>%mask)
|
||||
ret void
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -69,8 +69,9 @@ declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> ,
|
||||
; SCALAR-NEXT: br label %else
|
||||
; SCALAR: else:
|
||||
; SCALAR-NEXT: %res.phi.else = phi
|
||||
; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i64 1
|
||||
; SCALAR-NEXT: br i1 %Mask1, label %cond.load1, label %else2
|
||||
; SCALAR-NEXT: and i16 %{{.*}}, 2
|
||||
; SCALAR-NEXT: icmp ne i16 %{{.*}}, 0
|
||||
; SCALAR-NEXT: br i1 %{{.*}}, label %cond.load1, label %else2
|
||||
|
||||
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
|
||||
; KNL_64-LABEL: test2:
|
||||
@ -211,16 +212,18 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
|
||||
|
||||
|
||||
; SCALAR-LABEL: test5
|
||||
; SCALAR: %Mask0 = extractelement <16 x i1> %imask, i64 0
|
||||
; SCALAR-NEXT: br i1 %Mask0, label %cond.store, label %else
|
||||
; SCALAR: and i16 %scalar_mask, 1
|
||||
; SCALAR-NEXT: icmp ne i16 %{{.*}}, 0
|
||||
; SCALAR-NEXT: br i1 %{{.*}}, label %cond.store, label %else
|
||||
; SCALAR: cond.store:
|
||||
; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i64 0
|
||||
; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i64 0
|
||||
; SCALAR-NEXT: store i32 %Elt0, i32* %Ptr0, align 4
|
||||
; SCALAR-NEXT: br label %else
|
||||
; SCALAR: else:
|
||||
; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i64 1
|
||||
; SCALAR-NEXT: br i1 %Mask1, label %cond.store1, label %else2
|
||||
; SCALAR-NEXT: and i16 %scalar_mask, 2
|
||||
; SCALAR-NEXT: icmp ne i16 %{{.*}}, 0
|
||||
; SCALAR-NEXT: br i1 %{{.*}}, label %cond.store1, label %else2
|
||||
|
||||
define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
|
||||
; KNL_64-LABEL: test5:
|
||||
@ -1660,33 +1663,47 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; KNL_64: # %bb.0:
|
||||
; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; KNL_64-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL_64-NEXT: kshiftrw $2, %k0, %k2
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: andb $1, %al
|
||||
; KNL_64-NEXT: kmovw %k1, %ecx
|
||||
; KNL_64-NEXT: andb $1, %cl
|
||||
; KNL_64-NEXT: addb %cl, %cl
|
||||
; KNL_64-NEXT: orb %al, %cl
|
||||
; KNL_64-NEXT: kmovw %k2, %eax
|
||||
; KNL_64-NEXT: andb $1, %al
|
||||
; KNL_64-NEXT: shlb $2, %al
|
||||
; KNL_64-NEXT: orb %cl, %al
|
||||
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: je .LBB31_2
|
||||
; KNL_64-NEXT: # %bb.1: # %cond.load
|
||||
; KNL_64-NEXT: vmovq %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: .LBB31_2: # %else
|
||||
; KNL_64-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL_64-NEXT: kmovw %k1, %eax
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: je .LBB31_4
|
||||
; KNL_64-NEXT: # %bb.3: # %cond.load1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: jne .LBB31_1
|
||||
; KNL_64-NEXT: # %bb.2: # %else
|
||||
; KNL_64-NEXT: testb $2, %al
|
||||
; KNL_64-NEXT: jne .LBB31_3
|
||||
; KNL_64-NEXT: .LBB31_4: # %else2
|
||||
; KNL_64-NEXT: kshiftrw $2, %k0, %k0
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: testb $4, %al
|
||||
; KNL_64-NEXT: jne .LBB31_5
|
||||
; KNL_64-NEXT: .LBB31_6: # %else5
|
||||
; KNL_64-NEXT: vmovdqa %xmm3, %xmm0
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
; KNL_64-NEXT: .LBB31_1: # %cond.load
|
||||
; KNL_64-NEXT: vmovq %xmm0, %rcx
|
||||
; KNL_64-NEXT: vpinsrd $0, (%rcx), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: testb $2, %al
|
||||
; KNL_64-NEXT: je .LBB31_4
|
||||
; KNL_64-NEXT: .LBB31_3: # %cond.load1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: testb $4, %al
|
||||
; KNL_64-NEXT: je .LBB31_6
|
||||
; KNL_64-NEXT: # %bb.5: # %cond.load4
|
||||
; KNL_64-NEXT: .LBB31_5: # %cond.load4
|
||||
; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; KNL_64-NEXT: vmovq %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: .LBB31_6: # %else5
|
||||
; KNL_64-NEXT: vmovdqa %xmm3, %xmm0
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
@ -1698,32 +1715,48 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; KNL_32-NEXT: vmovdqa %xmm0, %xmm3
|
||||
; KNL_32-NEXT: vpslld $31, %xmm2, %xmm0
|
||||
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL_32-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL_32-NEXT: kshiftrw $2, %k0, %k2
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: andb $1, %al
|
||||
; KNL_32-NEXT: kmovw %k1, %ecx
|
||||
; KNL_32-NEXT: andb $1, %cl
|
||||
; KNL_32-NEXT: addb %cl, %cl
|
||||
; KNL_32-NEXT: orb %al, %cl
|
||||
; KNL_32-NEXT: kmovw %k2, %eax
|
||||
; KNL_32-NEXT: andb $1, %al
|
||||
; KNL_32-NEXT: shlb $2, %al
|
||||
; KNL_32-NEXT: orb %cl, %al
|
||||
; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_2
|
||||
; KNL_32-NEXT: # %bb.1: # %cond.load
|
||||
; KNL_32-NEXT: vmovd %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB31_2: # %else
|
||||
; KNL_32-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL_32-NEXT: kmovw %k1, %eax
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_4
|
||||
; KNL_32-NEXT: # %bb.3: # %cond.load1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: jne .LBB31_1
|
||||
; KNL_32-NEXT: # %bb.2: # %else
|
||||
; KNL_32-NEXT: testb $2, %al
|
||||
; KNL_32-NEXT: jne .LBB31_3
|
||||
; KNL_32-NEXT: .LBB31_4: # %else2
|
||||
; KNL_32-NEXT: kshiftrw $2, %k0, %k0
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: testb $4, %al
|
||||
; KNL_32-NEXT: jne .LBB31_5
|
||||
; KNL_32-NEXT: .LBB31_6: # %else5
|
||||
; KNL_32-NEXT: addl $12, %esp
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 4
|
||||
; KNL_32-NEXT: vzeroupper
|
||||
; KNL_32-NEXT: retl
|
||||
; KNL_32-NEXT: .LBB31_1: # %cond.load
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 16
|
||||
; KNL_32-NEXT: vmovd %xmm1, %ecx
|
||||
; KNL_32-NEXT: vpinsrd $0, (%ecx), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: testb $2, %al
|
||||
; KNL_32-NEXT: je .LBB31_4
|
||||
; KNL_32-NEXT: .LBB31_3: # %cond.load1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx
|
||||
; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: testb $4, %al
|
||||
; KNL_32-NEXT: je .LBB31_6
|
||||
; KNL_32-NEXT: # %bb.5: # %cond.load4
|
||||
; KNL_32-NEXT: .LBB31_5: # %cond.load4
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB31_6: # %else5
|
||||
; KNL_32-NEXT: addl $12, %esp
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 4
|
||||
; KNL_32-NEXT: vzeroupper
|
||||
@ -1733,33 +1766,47 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmovd2m %xmm2, %k0
|
||||
; SKX-NEXT: kshiftrb $1, %k0, %k1
|
||||
; SKX-NEXT: kshiftrb $2, %k0, %k2
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: kmovw %k1, %ecx
|
||||
; SKX-NEXT: andb $1, %cl
|
||||
; SKX-NEXT: addb %cl, %cl
|
||||
; SKX-NEXT: orb %al, %cl
|
||||
; SKX-NEXT: kmovw %k2, %eax
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: shlb $2, %al
|
||||
; SKX-NEXT: orb %cl, %al
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je .LBB31_2
|
||||
; SKX-NEXT: # %bb.1: # %cond.load
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
|
||||
; SKX-NEXT: .LBB31_2: # %else
|
||||
; SKX-NEXT: kshiftrb $1, %k0, %k1
|
||||
; SKX-NEXT: kmovw %k1, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je .LBB31_4
|
||||
; SKX-NEXT: # %bb.3: # %cond.load1
|
||||
; SKX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; SKX-NEXT: jne .LBB31_1
|
||||
; SKX-NEXT: # %bb.2: # %else
|
||||
; SKX-NEXT: testb $2, %al
|
||||
; SKX-NEXT: jne .LBB31_3
|
||||
; SKX-NEXT: .LBB31_4: # %else2
|
||||
; SKX-NEXT: kshiftrb $2, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: testb $4, %al
|
||||
; SKX-NEXT: jne .LBB31_5
|
||||
; SKX-NEXT: .LBB31_6: # %else5
|
||||
; SKX-NEXT: vmovdqa %xmm3, %xmm0
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: .LBB31_1: # %cond.load
|
||||
; SKX-NEXT: vmovq %xmm0, %rcx
|
||||
; SKX-NEXT: vpinsrd $0, (%rcx), %xmm3, %xmm3
|
||||
; SKX-NEXT: testb $2, %al
|
||||
; SKX-NEXT: je .LBB31_4
|
||||
; SKX-NEXT: .LBB31_3: # %cond.load1
|
||||
; SKX-NEXT: vpextrq $1, %xmm0, %rcx
|
||||
; SKX-NEXT: vpinsrd $1, (%rcx), %xmm3, %xmm3
|
||||
; SKX-NEXT: testb $4, %al
|
||||
; SKX-NEXT: je .LBB31_6
|
||||
; SKX-NEXT: # %bb.5: # %cond.load4
|
||||
; SKX-NEXT: .LBB31_5: # %cond.load4
|
||||
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; SKX-NEXT: .LBB31_6: # %else5
|
||||
; SKX-NEXT: vmovdqa %xmm3, %xmm0
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
@ -1771,32 +1818,47 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
||||
; SKX_32-NEXT: vmovdqa %xmm0, %xmm3
|
||||
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm0
|
||||
; SKX_32-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX_32-NEXT: kshiftrb $1, %k0, %k1
|
||||
; SKX_32-NEXT: kshiftrb $2, %k0, %k2
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: andb $1, %al
|
||||
; SKX_32-NEXT: kmovw %k1, %ecx
|
||||
; SKX_32-NEXT: andb $1, %cl
|
||||
; SKX_32-NEXT: addb %cl, %cl
|
||||
; SKX_32-NEXT: orb %al, %cl
|
||||
; SKX_32-NEXT: kmovw %k2, %eax
|
||||
; SKX_32-NEXT: andb $1, %al
|
||||
; SKX_32-NEXT: shlb $2, %al
|
||||
; SKX_32-NEXT: orb %cl, %al
|
||||
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: je .LBB31_2
|
||||
; SKX_32-NEXT: # %bb.1: # %cond.load
|
||||
; SKX_32-NEXT: vmovd %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB31_2: # %else
|
||||
; SKX_32-NEXT: kshiftrb $1, %k0, %k1
|
||||
; SKX_32-NEXT: kmovw %k1, %eax
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: je .LBB31_4
|
||||
; SKX_32-NEXT: # %bb.3: # %cond.load1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: jne .LBB31_1
|
||||
; SKX_32-NEXT: # %bb.2: # %else
|
||||
; SKX_32-NEXT: testb $2, %al
|
||||
; SKX_32-NEXT: jne .LBB31_3
|
||||
; SKX_32-NEXT: .LBB31_4: # %else2
|
||||
; SKX_32-NEXT: kshiftrb $2, %k0, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: testb $4, %al
|
||||
; SKX_32-NEXT: jne .LBB31_5
|
||||
; SKX_32-NEXT: .LBB31_6: # %else5
|
||||
; SKX_32-NEXT: addl $12, %esp
|
||||
; SKX_32-NEXT: .cfi_def_cfa_offset 4
|
||||
; SKX_32-NEXT: retl
|
||||
; SKX_32-NEXT: .LBB31_1: # %cond.load
|
||||
; SKX_32-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX_32-NEXT: vmovd %xmm1, %ecx
|
||||
; SKX_32-NEXT: vpinsrd $0, (%ecx), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: testb $2, %al
|
||||
; SKX_32-NEXT: je .LBB31_4
|
||||
; SKX_32-NEXT: .LBB31_3: # %cond.load1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm1, %ecx
|
||||
; SKX_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: testb $4, %al
|
||||
; SKX_32-NEXT: je .LBB31_6
|
||||
; SKX_32-NEXT: # %bb.5: # %cond.load4
|
||||
; SKX_32-NEXT: .LBB31_5: # %cond.load4
|
||||
; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB31_6: # %else5
|
||||
; SKX_32-NEXT: addl $12, %esp
|
||||
; SKX_32-NEXT: .cfi_def_cfa_offset 4
|
||||
; SKX_32-NEXT: retl
|
||||
|
@ -122,20 +122,23 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32>
|
||||
; WIDEN_AVX2-NEXT: vmovq %rdi, %xmm3
|
||||
; WIDEN_AVX2-NEXT: vpbroadcastq %xmm3, %xmm3
|
||||
; WIDEN_AVX2-NEXT: vpaddq %xmm1, %xmm3, %xmm1
|
||||
; WIDEN_AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: je .LBB1_2
|
||||
; WIDEN_AVX2-NEXT: # %bb.1: # %cond.store
|
||||
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vmovlps %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB1_2: # %else
|
||||
; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; WIDEN_AVX2-NEXT: vmovmskpd %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB1_1
|
||||
; WIDEN_AVX2-NEXT: # %bb.2: # %else
|
||||
; WIDEN_AVX2-NEXT: testb $2, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB1_3
|
||||
; WIDEN_AVX2-NEXT: .LBB1_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
; WIDEN_AVX2-NEXT: .LBB1_1: # %cond.store
|
||||
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rcx
|
||||
; WIDEN_AVX2-NEXT: vmovlps %xmm0, (%rcx)
|
||||
; WIDEN_AVX2-NEXT: testb $2, %al
|
||||
; WIDEN_AVX2-NEXT: je .LBB1_4
|
||||
; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: .LBB1_3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vmovhps %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB1_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
;
|
||||
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_index:
|
||||
@ -147,20 +150,23 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32>
|
||||
; PROMOTE_AVX2-NEXT: vmovq %rdi, %xmm3
|
||||
; PROMOTE_AVX2-NEXT: vpbroadcastq %xmm3, %xmm3
|
||||
; PROMOTE_AVX2-NEXT: vpaddq %xmm1, %xmm3, %xmm1
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: je .LBB1_2
|
||||
; PROMOTE_AVX2-NEXT: # %bb.1: # %cond.store
|
||||
; PROMOTE_AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vmovlps %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_2: # %else
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; PROMOTE_AVX2-NEXT: vmovmskpd %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB1_1
|
||||
; PROMOTE_AVX2-NEXT: # %bb.2: # %else
|
||||
; PROMOTE_AVX2-NEXT: testb $2, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB1_3
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_1: # %cond.store
|
||||
; PROMOTE_AVX2-NEXT: vmovq %xmm1, %rcx
|
||||
; PROMOTE_AVX2-NEXT: vmovlps %xmm0, (%rcx)
|
||||
; PROMOTE_AVX2-NEXT: testb $2, %al
|
||||
; PROMOTE_AVX2-NEXT: je .LBB1_4
|
||||
; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vmovhps %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
%gep = getelementptr double, double *%base, <2 x i32> %ind
|
||||
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %gep, i32 4, <2 x i1> %mask)
|
||||
@ -273,38 +279,44 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas
|
||||
;
|
||||
; WIDEN_AVX2-LABEL: test_scatter_v2i32_data:
|
||||
; WIDEN_AVX2: # %bb.0:
|
||||
; WIDEN_AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: je .LBB3_2
|
||||
; WIDEN_AVX2-NEXT: # %bb.1: # %cond.store
|
||||
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB3_2: # %else
|
||||
; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; WIDEN_AVX2-NEXT: vmovmskpd %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB3_1
|
||||
; WIDEN_AVX2-NEXT: # %bb.2: # %else
|
||||
; WIDEN_AVX2-NEXT: testb $2, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB3_3
|
||||
; WIDEN_AVX2-NEXT: .LBB3_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
; WIDEN_AVX2-NEXT: .LBB3_1: # %cond.store
|
||||
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rcx
|
||||
; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rcx)
|
||||
; WIDEN_AVX2-NEXT: testb $2, %al
|
||||
; WIDEN_AVX2-NEXT: je .LBB3_4
|
||||
; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: .LBB3_3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vextractps $1, %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB3_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
;
|
||||
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data:
|
||||
; PROMOTE_AVX2: # %bb.0:
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: je .LBB3_2
|
||||
; PROMOTE_AVX2-NEXT: # %bb.1: # %cond.store
|
||||
; PROMOTE_AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vmovss %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_2: # %else
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; PROMOTE_AVX2-NEXT: vmovmskpd %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB3_1
|
||||
; PROMOTE_AVX2-NEXT: # %bb.2: # %else
|
||||
; PROMOTE_AVX2-NEXT: testb $2, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB3_3
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_1: # %cond.store
|
||||
; PROMOTE_AVX2-NEXT: vmovq %xmm1, %rcx
|
||||
; PROMOTE_AVX2-NEXT: vmovss %xmm0, (%rcx)
|
||||
; PROMOTE_AVX2-NEXT: testb $2, %al
|
||||
; PROMOTE_AVX2-NEXT: je .LBB3_4
|
||||
; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
|
||||
ret void
|
||||
@ -425,20 +437,23 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
|
||||
; WIDEN_AVX2-NEXT: vmovq %rdi, %xmm3
|
||||
; WIDEN_AVX2-NEXT: vpbroadcastq %xmm3, %xmm3
|
||||
; WIDEN_AVX2-NEXT: vpaddq %xmm1, %xmm3, %xmm1
|
||||
; WIDEN_AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: je .LBB5_2
|
||||
; WIDEN_AVX2-NEXT: # %bb.1: # %cond.store
|
||||
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB5_2: # %else
|
||||
; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; WIDEN_AVX2-NEXT: vmovmskpd %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB5_1
|
||||
; WIDEN_AVX2-NEXT: # %bb.2: # %else
|
||||
; WIDEN_AVX2-NEXT: testb $2, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB5_3
|
||||
; WIDEN_AVX2-NEXT: .LBB5_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
; WIDEN_AVX2-NEXT: .LBB5_1: # %cond.store
|
||||
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rcx
|
||||
; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rcx)
|
||||
; WIDEN_AVX2-NEXT: testb $2, %al
|
||||
; WIDEN_AVX2-NEXT: je .LBB5_4
|
||||
; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: .LBB5_3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vextractps $1, %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB5_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
;
|
||||
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data_index:
|
||||
@ -450,20 +465,23 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
|
||||
; PROMOTE_AVX2-NEXT: vmovq %rdi, %xmm3
|
||||
; PROMOTE_AVX2-NEXT: vpbroadcastq %xmm3, %xmm3
|
||||
; PROMOTE_AVX2-NEXT: vpaddq %xmm1, %xmm3, %xmm1
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: je .LBB5_2
|
||||
; PROMOTE_AVX2-NEXT: # %bb.1: # %cond.store
|
||||
; PROMOTE_AVX2-NEXT: vmovq %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vmovss %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_2: # %else
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; PROMOTE_AVX2-NEXT: vmovmskpd %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB5_1
|
||||
; PROMOTE_AVX2-NEXT: # %bb.2: # %else
|
||||
; PROMOTE_AVX2-NEXT: testb $2, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB5_3
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_1: # %cond.store
|
||||
; PROMOTE_AVX2-NEXT: vmovq %xmm1, %rcx
|
||||
; PROMOTE_AVX2-NEXT: vmovss %xmm0, (%rcx)
|
||||
; PROMOTE_AVX2-NEXT: testb $2, %al
|
||||
; PROMOTE_AVX2-NEXT: je .LBB5_4
|
||||
; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
%gep = getelementptr i32, i32 *%base, <2 x i32> %ind
|
||||
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %gep, i32 4, <2 x i1> %mask)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -3,8 +3,10 @@
|
||||
|
||||
define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %passthru) {
|
||||
; CHECK-LABEL: @scalarize_v2i64(
|
||||
; CHECK-NEXT: [[MASK0:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
|
||||
; CHECK-NEXT: br i1 [[MASK0]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: cond.load:
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i64 0
|
||||
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
|
||||
@ -12,8 +14,9 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass
|
||||
; CHECK-NEXT: br label [[ELSE]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
|
||||
; CHECK-NEXT: br i1 [[MASK1]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i2 [[SCALAR_MASK]], -2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i2 [[TMP3]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: cond.load1:
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i64 1
|
||||
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
|
||||
|
@ -4,24 +4,27 @@
|
||||
define <2 x i64> @scalarize_v2i64(<2 x i64>* %p, <2 x i1> %mask, <2 x i64> %passthru) {
|
||||
; CHECK-LABEL: @scalarize_v2i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i2 [[SCALAR_MASK]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: cond.load:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP5]], i64 0
|
||||
; CHECK-NEXT: br label [[ELSE]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP6]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i2 [[SCALAR_MASK]], -2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i2 [[TMP7]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: cond.load1:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP8]], i64 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP10]], i64 1
|
||||
; CHECK-NEXT: br label [[ELSE2]]
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP11]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
|
||||
;
|
||||
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 128, <2 x i1> %mask, <2 x i64> %passthru)
|
||||
@ -62,24 +65,27 @@ define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64>* %p, <2 x i64> %passthru)
|
||||
define <2 x i24> @scalarize_v2i24(<2 x i24>* %p, <2 x i1> %mask, <2 x i24> %passthru) {
|
||||
; CHECK-LABEL: @scalarize_v2i24(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i24>* [[P:%.*]] to i24*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i2 [[SCALAR_MASK]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: cond.load:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, i24* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i24, i24* [[TMP3]], align 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i24, i24* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i24, i24* [[TMP4]], align 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP5]], i64 0
|
||||
; CHECK-NEXT: br label [[ELSE]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP6]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i2 [[SCALAR_MASK]], -2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i2 [[TMP7]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: cond.load1:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i24, i24* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i24, i24* [[TMP7]], align 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP8]], i64 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i24, i24* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i24, i24* [[TMP9]], align 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP10]], i64 1
|
||||
; CHECK-NEXT: br label [[ELSE2]]
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP11]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
|
||||
;
|
||||
%ret = call <2 x i24> @llvm.masked.load.v2i24.p0v2i24(<2 x i24>* %p, i32 8, <2 x i1> %mask, <2 x i24> %passthru)
|
||||
@ -90,24 +96,27 @@ define <2 x i24> @scalarize_v2i24(<2 x i24>* %p, <2 x i1> %mask, <2 x i24> %pass
|
||||
define <2 x i48> @scalarize_v2i48(<2 x i48>* %p, <2 x i1> %mask, <2 x i48> %passthru) {
|
||||
; CHECK-LABEL: @scalarize_v2i48(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i48>* [[P:%.*]] to i48*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i2 [[SCALAR_MASK]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: cond.load:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, i48* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i48, i48* [[TMP3]], align 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i48, i48* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i48, i48* [[TMP4]], align 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP5]], i64 0
|
||||
; CHECK-NEXT: br label [[ELSE]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP6]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i2 [[SCALAR_MASK]], -2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i2 [[TMP7]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: cond.load1:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i48, i48* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i48, i48* [[TMP7]], align 2
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP8]], i64 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i48, i48* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i48, i48* [[TMP9]], align 2
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP10]], i64 1
|
||||
; CHECK-NEXT: br label [[ELSE2]]
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP11]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
|
||||
;
|
||||
%ret = call <2 x i48> @llvm.masked.load.v2i48.p0v2i48(<2 x i48>* %p, i32 16, <2 x i1> %mask, <2 x i48> %passthru)
|
||||
|
@ -4,20 +4,23 @@
|
||||
define void @scalarize_v2i64(<2 x i64>* %p, <2 x i1> %mask, <2 x i64> %data) {
|
||||
; CHECK-LABEL: @scalarize_v2i64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_STORE:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i2 [[SCALAR_MASK]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i2 [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[COND_STORE:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: cond.store:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP4]], i64* [[TMP5]], align 8
|
||||
; CHECK-NEXT: br label [[ELSE]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: cond.store1:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP7]], align 8
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
|
||||
; CHECK-NEXT: store i64 [[TMP8]], i64* [[TMP9]], align 8
|
||||
; CHECK-NEXT: br label [[ELSE2]]
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: ret void
|
||||
|
Loading…
x
Reference in New Issue
Block a user