1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Add isel patterns for selecting masked SUBV_BROADCAST with bitcasts. Remove combineBitcastForMaskedOp.

Add test cases for the merge masked versions to make sure we have all those covered.

llvm-svn: 324210
This commit is contained in:
Craig Topper 2018-02-05 08:37:37 +00:00
parent e2c558f83f
commit 9d71e524e3
4 changed files with 302 additions and 79 deletions

View File

@ -31361,50 +31361,6 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
// If this is a bitcasted op that can be represented as another type, push the
// the bitcast to the inputs. This allows more opportunities for pattern
// matching masked instructions. This is called when we know that the operation
// is used as one of the inputs of a vselect.
static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// Make sure we have a bitcast.
if (OrigOp.getOpcode() != ISD::BITCAST)
return false;
SDValue Op = OrigOp.getOperand(0);
// If the operation is used by anything other than the bitcast, we shouldn't
// do this combine as that would replicate the operation.
if (!Op.hasOneUse())
return false;
MVT VT = OrigOp.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc DL(Op.getNode());
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case X86ISD::SUBV_BROADCAST: {
unsigned EltSize = EltVT.getSizeInBits();
if (EltSize != 32 && EltSize != 64)
return false;
// Only change element size, not type.
if (VT.isInteger() != Op.getSimpleValueType().isInteger())
return false;
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = MVT::getVectorVT(EltVT,
Op0.getSimpleValueType().getSizeInBits() / EltSize);
Op0 = DAG.getBitcast(Op0VT, Op.getOperand(0));
DCI.AddToWorklist(Op0.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0));
return true;
}
}
return false;
}
/// Do target-specific dag combines on SELECT and VSELECT nodes.
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@ -31770,17 +31726,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
// Look for vselects with LHS/RHS being bitcasted from an operation that
// can be executed on another type. Push the bitcast to the inputs of
// the operation. This exposes opportunities for using masking instructions.
if (N->getOpcode() == ISD::VSELECT && DCI.isAfterLegalizeVectorOps() &&
CondVT.getVectorElementType() == MVT::i1) {
if (combineBitcastForMaskedOp(LHS, DAG, DCI))
return SDValue(N, 0);
if (combineBitcastForMaskedOp(RHS, DAG, DCI))
return SDValue(N, 0);
}
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);

View File

@ -1487,6 +1487,41 @@ def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
(bc_v16f32 (v16i32 immAllZerosV))),
(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
VR512:$src0),
(VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
(v16i32 immAllZerosV)),
(VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
VR512:$src0),
(VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
(bc_v8f64 (v16i32 immAllZerosV))),
(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
VR512:$src0),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
(bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
VR512:$src0),
(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
@ -1506,6 +1541,25 @@ def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK8WM:$mask,
(bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
(bc_v8f32 (v8i32 immAllZerosV))),
(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
VR256X:$src0),
(VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
(v8i32 immAllZerosV)),
(VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
VR256X:$src0),
(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
@ -1535,6 +1589,24 @@ defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2"
defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
v4f64x_info, v2f64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
(bc_v4f64 (v8i32 immAllZerosV))),
(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
VR256X:$src0),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
(bc_v4i64 (v8i32 immAllZerosV))),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
VR256X:$src0),
(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
@ -1550,6 +1622,41 @@ defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
v16f32_info, v8f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
(bc_v16f32 (v16i32 immAllZerosV))),
(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
VR512:$src0),
(VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
(v16i32 immAllZerosV)),
(VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
VR512:$src0),
(VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
(bc_v8f64 (v16i32 immAllZerosV))),
(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
VR512:$src0),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
(bc_v8i64 (v16i32 immAllZerosV))),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
VR512:$src0),
(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,

View File

@ -878,6 +878,7 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>;
def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>;
// 512-bit bitconvert pattern fragments
def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;

View File

@ -1511,8 +1511,22 @@ define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask)
ret <2 x double> %res
}
define <8 x float> @test_broadcast_2f64_8f32(<2 x double> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2f64_8f32:
define <8 x float> @test_broadcast_2f64_8f32_mask(<2 x double> *%p, i8 %mask, <8 x float> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_2f64_8f32_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%1 = load <2 x double>, <2 x double> *%p
%2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%3 = bitcast <4 x double> %2 to <8 x float>
%mask.cast = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> %passthru
ret <8 x float> %res
}
define <8 x float> @test_broadcast_2f64_8f32_maskz(<2 x double> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2f64_8f32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
@ -1525,8 +1539,22 @@ define <8 x float> @test_broadcast_2f64_8f32(<2 x double> *%p, i8 %mask) nounwin
ret <8 x float> %res
}
define <8 x i32> @test_broadcast_2i64_8i32(<2 x i64> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2i64_8i32:
define <8 x i32> @test_broadcast_2i64_8i32_mask(<2 x i64> *%p, i8 %mask, <8 x i32> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_2i64_8i32_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%1 = load <2 x i64>, <2 x i64> *%p
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%3 = bitcast <4 x i64> %2 to <8 x i32>
%mask.cast = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> %passthru
ret <8 x i32> %res
}
define <8 x i32> @test_broadcast_2i64_8i32_maskz(<2 x i64> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2i64_8i32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
@ -1539,8 +1567,22 @@ define <8 x i32> @test_broadcast_2i64_8i32(<2 x i64> *%p, i8 %mask) nounwind {
ret <8 x i32> %res
}
define <16 x float> @test_broadcast_2f64_16f32(<2 x double> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2f64_16f32:
define <16 x float> @test_broadcast_2f64_16f32_mask(<2 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_2f64_16f32_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%1 = load <2 x double>, <2 x double> *%p
%2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%3 = bitcast <8 x double> %2 to <16 x float>
%mask.cast = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
ret <16 x float> %res
}
define <16 x float> @test_broadcast_2f64_16f32_maskz(<2 x double> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2f64_16f32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
@ -1553,8 +1595,22 @@ define <16 x float> @test_broadcast_2f64_16f32(<2 x double> *%p, i16 %mask) noun
ret <16 x float> %res
}
define <16 x i32> @test_broadcast_2i64_16i32(<2 x i64> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2i64_16i32:
define <16 x i32> @test_broadcast_2i64_16i32_mask(<2 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_2i64_16i32_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%1 = load <2 x i64>, <2 x i64> *%p
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%3 = bitcast <8 x i64> %2 to <16 x i32>
%mask.cast = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
ret <16 x i32> %res
}
define <16 x i32> @test_broadcast_2i64_16i32_maskz(<2 x i64> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_2i64_16i32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
@ -1567,8 +1623,22 @@ define <16 x i32> @test_broadcast_2i64_16i32(<2 x i64> *%p, i16 %mask) nounwind
ret <16 x i32> %res
}
define <16 x float> @test_broadcast_4f64_16f32(<4 x double> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4f64_16f32:
define <16 x float> @test_broadcast_4f64_16f32_mask(<4 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_4f64_16f32_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%1 = load <4 x double>, <4 x double> *%p
%2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <8 x double> %2 to <16 x float>
%mask.cast = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
ret <16 x float> %res
}
define <16 x float> @test_broadcast_4f64_16f32_maskz(<4 x double> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4f64_16f32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
@ -1581,8 +1651,22 @@ define <16 x float> @test_broadcast_4f64_16f32(<4 x double> *%p, i16 %mask) noun
ret <16 x float> %res
}
define <16 x i32> @test_broadcast_4i64_16i32(<4 x i64> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4i64_16i32:
define <16 x i32> @test_broadcast_4i64_16i32_mask(<4 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_4i64_16i32_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%1 = load <4 x i64>, <4 x i64> *%p
%2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <8 x i64> %2 to <16 x i32>
%mask.cast = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
ret <16 x i32> %res
}
define <16 x i32> @test_broadcast_4i64_16i32_maskz(<4 x i64> *%p, i16 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4i64_16i32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
@ -1595,8 +1679,23 @@ define <16 x i32> @test_broadcast_4i64_16i32(<4 x i64> *%p, i16 %mask) nounwind
ret <16 x i32> %res
}
define <4 x double> @test_broadcast_4f32_4f64(<4 x float> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4f32_4f64:
define <4 x double> @test_broadcast_4f32_4f64_mask(<4 x float> *%p, i8 %mask, <4 x double> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_4f32_4f64_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%1 = load <4 x float>, <4 x float> *%p
%2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <8 x float> %2 to <4 x double>
%mask.cast = bitcast i8 %mask to <8 x i1>
%mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> %passthru
ret <4 x double> %res
}
define <4 x double> @test_broadcast_4f32_4f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4f32_4f64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
@ -1610,8 +1709,23 @@ define <4 x double> @test_broadcast_4f32_4f64(<4 x float> *%p, i8 %mask) nounwin
ret <4 x double> %res
}
define <4 x i64> @test_broadcast_4i32_4i64(<4 x i32> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4i32_4i64:
define <4 x i64> @test_broadcast_4i32_4i64_mask(<4 x i32> *%p, i8 %mask, <4 x i64> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_4i32_4i64_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%1 = load <4 x i32>, <4 x i32> *%p
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <8 x i32> %2 to <4 x i64>
%mask.cast = bitcast i8 %mask to <8 x i1>
%mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> %passthru
ret <4 x i64> %res
}
define <4 x i64> @test_broadcast_4i32_4i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4i32_4i64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
@ -1625,8 +1739,22 @@ define <4 x i64> @test_broadcast_4i32_4i64(<4 x i32> *%p, i8 %mask) nounwind {
ret <4 x i64> %res
}
define <8 x double> @test_broadcast_4f32_8f64(<4 x float> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4f32_8f64:
define <8 x double> @test_broadcast_4f32_8f64_mask(<4 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_4f32_8f64_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%1 = load <4 x float>, <4 x float> *%p
%2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <16 x float> %2 to <8 x double>
%mask.cast = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
ret <8 x double> %res
}
define <8 x double> @test_broadcast_4f32_8f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4f32_8f64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
@ -1639,8 +1767,22 @@ define <8 x double> @test_broadcast_4f32_8f64(<4 x float> *%p, i8 %mask) nounwin
ret <8 x double> %res
}
define <8 x i64> @test_broadcast_4i32_8i64(<4 x i32> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4i32_8i64:
define <8 x i64> @test_broadcast_4i32_8i64_mask(<4 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_4i32_8i64_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%1 = load <4 x i32>, <4 x i32> *%p
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%3 = bitcast <16 x i32> %2 to <8 x i64>
%mask.cast = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
ret <8 x i64> %res
}
define <8 x i64> @test_broadcast_4i32_8i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_4i32_8i64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
@ -1653,8 +1795,22 @@ define <8 x i64> @test_broadcast_4i32_8i64(<4 x i32> *%p, i8 %mask) nounwind {
ret <8 x i64> %res
}
define <8 x double> @test_broadcast_8f32_8f64(<8 x float> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_8f32_8f64:
define <8 x double> @test_broadcast_8f32_8f64_mask(<8 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_8f32_8f64_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%1 = load <8 x float>, <8 x float> *%p
%2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <16 x float> %2 to <8 x double>
%mask.cast = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
ret <8 x double> %res
}
define <8 x double> @test_broadcast_8f32_8f64_maskz(<8 x float> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_8f32_8f64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
@ -1667,8 +1823,22 @@ define <8 x double> @test_broadcast_8f32_8f64(<8 x float> *%p, i8 %mask) nounwin
ret <8 x double> %res
}
define <8 x i64> @test_broadcast_8i32_8i64(<8 x i32> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_8i32_8i64:
define <8 x i64> @test_broadcast_8i32_8i64_mask(<8 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
; CHECK-LABEL: test_broadcast_8i32_8i64_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%1 = load <8 x i32>, <8 x i32> *%p
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <16 x i32> %2 to <8 x i64>
%mask.cast = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
ret <8 x i64> %res
}
define <8 x i64> @test_broadcast_8i32_8i64_maskz(<8 x i32> *%p, i8 %mask) nounwind {
; CHECK-LABEL: test_broadcast_8i32_8i64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]