mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[X86][SSE] Add general 32-bit LOAD + VZEXT_MOVL support to EltsFromConsecutiveLoads
This patch adds support for consecutive (load/undef elements) 32-bit loads, followed by trailing undef/zero elements to be combined to a single MOVD load. Differential Revision: http://reviews.llvm.org/D16729 llvm-svn: 259796
This commit is contained in:
parent
fcca55983b
commit
da26d272a9
@ -5601,6 +5601,24 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||
}
|
||||
}
|
||||
|
||||
auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) {
|
||||
SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(),
|
||||
LDBase->getBasePtr(), LDBase->getPointerInfo(),
|
||||
LDBase->isVolatile(), LDBase->isNonTemporal(),
|
||||
LDBase->isInvariant(), LDBase->getAlignment());
|
||||
|
||||
if (LDBase->hasAnyUseOfValue(1)) {
|
||||
SDValue NewChain =
|
||||
DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
|
||||
SDValue(NewLd.getNode(), 1));
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
|
||||
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
|
||||
SDValue(NewLd.getNode(), 1));
|
||||
}
|
||||
|
||||
return NewLd;
|
||||
};
|
||||
|
||||
// LOAD - all consecutive load/undefs (must start/end with a load).
|
||||
// If we have found an entire vector of loads and undefs, then return a large
|
||||
// load of the entire vector width starting at the base pointer.
|
||||
@ -5616,23 +5634,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||
if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
|
||||
return SDValue();
|
||||
|
||||
SDValue NewLd = SDValue();
|
||||
|
||||
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
|
||||
LDBase->getPointerInfo(), LDBase->isVolatile(),
|
||||
LDBase->isNonTemporal(), LDBase->isInvariant(),
|
||||
LDBase->getAlignment());
|
||||
|
||||
if (LDBase->hasAnyUseOfValue(1)) {
|
||||
SDValue NewChain =
|
||||
DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
|
||||
SDValue(NewLd.getNode(), 1));
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
|
||||
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
|
||||
SDValue(NewLd.getNode(), 1));
|
||||
}
|
||||
|
||||
return NewLd;
|
||||
return CreateLoad(VT, LDBase);
|
||||
}
|
||||
|
||||
int LoadSize =
|
||||
@ -5667,6 +5669,19 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
|
||||
|
||||
return DAG.getBitcast(VT, ResNode);
|
||||
}
|
||||
|
||||
// VZEXT_MOVL - consecutive 32-bit load/undefs followed by zeros/undefs.
|
||||
if (IsConsecutiveLoad && FirstLoadedElt == 0 && LoadSize == 32 &&
|
||||
((VT.is128BitVector() && TLI.isTypeLegal(MVT::v4i32)) ||
|
||||
(VT.is256BitVector() && TLI.isTypeLegal(MVT::v8i32)) ||
|
||||
(VT.is512BitVector() && TLI.isTypeLegal(MVT::v16i32)))) {
|
||||
MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
|
||||
SDValue V = CreateLoad(MVT::i32, LDBase);
|
||||
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, V);
|
||||
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, V);
|
||||
return DAG.getBitcast(VT, V);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -3046,6 +3046,18 @@ let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
|
||||
|
||||
// Represent the same patterns above but in the form they appear for
|
||||
// 512-bit types
|
||||
def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
|
||||
def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
|
||||
def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
|
||||
}
|
||||
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
|
||||
(v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
|
||||
|
@ -418,21 +418,18 @@ define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline s
|
||||
define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
|
||||
; SSE-LABEL: merge_8i16_i16_34uuuuuu:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: pinsrw $0, 6(%rdi), %xmm0
|
||||
; SSE-NEXT: pinsrw $1, 8(%rdi), %xmm0
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: merge_8i16_i16_34uuuuuu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpinsrw $0, 6(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrw $1, 8(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: merge_8i16_i16_34uuuuuu:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE-NEXT: pinsrw $0, 6(%eax), %xmm0
|
||||
; X32-SSE-NEXT: pinsrw $1, 8(%eax), %xmm0
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 3
|
||||
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 4
|
||||
@ -537,42 +534,20 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noin
|
||||
}
|
||||
|
||||
define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp {
|
||||
; SSE2-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movzbl (%rdi), %eax
|
||||
; SSE2-NEXT: movzbl 1(%rdi), %ecx
|
||||
; SSE2-NEXT: shll $8, %ecx
|
||||
; SSE2-NEXT: orl %eax, %ecx
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: pinsrw $0, %ecx, %xmm0
|
||||
; SSE2-NEXT: movzbl 3(%rdi), %eax
|
||||
; SSE2-NEXT: shll $8, %eax
|
||||
; SSE2-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE41-NEXT: pinsrb $0, (%rdi), %xmm0
|
||||
; SSE41-NEXT: pinsrb $1, 1(%rdi), %xmm0
|
||||
; SSE41-NEXT: pinsrb $3, 3(%rdi), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $1, 1(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $3, 3(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $0, (%eax), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $1, 1(%eax), %xmm0
|
||||
; X32-SSE-NEXT: pinsrb $3, 3(%eax), %xmm0
|
||||
; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-SSE-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
|
||||
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1
|
||||
|
@ -542,37 +542,15 @@ define <8 x i32> @merge_8i32_i32_1u3u5zu8(i32* %ptr) nounwind uwtable noinline s
|
||||
}
|
||||
|
||||
define <16 x i16> @merge_16i16_i16_89zzzuuuuuuuuuuuz(i16* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX1-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrw $0, 16(%rdi), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpinsrw $1, 18(%rdi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrw $0, 16(%rdi), %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpinsrw $1, 18(%rdi), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrw $0, 16(%rdi), %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpinsrw $1, 18(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
; AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
|
||||
; X32-AVX: # BB#0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; X32-AVX-NEXT: vpinsrw $0, 16(%eax), %xmm0, %xmm1
|
||||
; X32-AVX-NEXT: vpinsrw $1, 18(%eax), %xmm1, %xmm1
|
||||
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 8
|
||||
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 9
|
||||
@ -704,17 +682,13 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF(i16* %ptr) nounwind uwtable
|
||||
define <32 x i8> @merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i8* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpinsrb $0, 4(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $1, 5(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrb $3, 7(%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
|
||||
; X32-AVX: # BB#0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX-NEXT: vpinsrb $0, 4(%eax), %xmm0, %xmm0
|
||||
; X32-AVX-NEXT: vpinsrb $1, 5(%eax), %xmm0, %xmm0
|
||||
; X32-AVX-NEXT: vpinsrb $3, 7(%eax), %xmm0, %xmm0
|
||||
; X32-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 4
|
||||
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 5
|
||||
@ -729,41 +703,15 @@ define <32 x i8> @merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i8* %ptr) nounw
|
||||
}
|
||||
|
||||
define <32 x i8> @merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu(i8* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX1-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrb $0, 2(%rdi), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpinsrb $1, 3(%rdi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrb $3, 5(%rdi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrb $0, 2(%rdi), %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpinsrb $1, 3(%rdi), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpinsrb $3, 5(%rdi), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $0, 2(%rdi), %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpinsrb $1, 3(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpinsrb $3, 5(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
; AVX-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; X32-AVX: # BB#0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; X32-AVX-NEXT: vpinsrb $0, 2(%eax), %xmm0, %xmm1
|
||||
; X32-AVX-NEXT: vpinsrb $1, 3(%eax), %xmm1, %xmm1
|
||||
; X32-AVX-NEXT: vpinsrb $3, 5(%eax), %xmm1, %xmm1
|
||||
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 2
|
||||
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 3
|
||||
|
@ -570,31 +570,20 @@ define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) n
|
||||
define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrw $0, 4(%rdi), %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpinsrw $1, 6(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpinsrw $0, 4(%rdi), %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpinsrw $1, 6(%rdi), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
|
||||
; X32-AVX512F: # BB#0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; X32-AVX512F-NEXT: vpinsrw $0, 4(%eax), %xmm0, %xmm1
|
||||
; X32-AVX512F-NEXT: vpinsrw $1, 6(%eax), %xmm1, %xmm1
|
||||
; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-AVX512F-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
|
||||
%ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
|
||||
@ -651,34 +640,20 @@ define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
|
||||
define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpinsrb $0, 1(%rdi), %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpinsrb $1, 2(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpinsrb $3, 4(%rdi), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpinsrb $0, 1(%rdi), %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpinsrb $1, 2(%rdi), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpinsrb $3, 4(%rdi), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
|
||||
; X32-AVX512F: # BB#0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; X32-AVX512F-NEXT: vpinsrb $0, 1(%eax), %xmm0, %xmm1
|
||||
; X32-AVX512F-NEXT: vpinsrb $1, 2(%eax), %xmm1, %xmm1
|
||||
; X32-AVX512F-NEXT: vpinsrb $3, 4(%eax), %xmm1, %xmm1
|
||||
; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-AVX512F-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
|
||||
%ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
|
||||
|
Loading…
Reference in New Issue
Block a user