diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 30405fcb627..4d23dc9c057 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17604,6 +17604,27 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; Index = DAG.getConstant(Elt, DL, Index.getValueType()); } + } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && + !BCNumEltsChanged && VecVT.getVectorElementType() == ScalarVT) { + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0 + // -> extract_vector_elt a, 0 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1 + // -> extract_vector_elt a, 1 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2 + // -> extract_vector_elt b, 0 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3 + // -> extract_vector_elt b, 1 + SDLoc SL(N); + EVT ConcatVT = VecOp.getOperand(0).getValueType(); + unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); + SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL, + Index.getValueType()); + + SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, + ConcatVT.getVectorElementType(), + ConcatOp, NewIdx); + return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt); } // Make sure we found a non-volatile load and the extractelement is diff --git a/test/CodeGen/AMDGPU/reduction.ll b/test/CodeGen/AMDGPU/reduction.ll index 0c605f79d98..ed9bc10abd5 100644 --- a/test/CodeGen/AMDGPU/reduction.ll +++ b/test/CodeGen/AMDGPU/reduction.ll @@ -438,10 +438,8 @@ entry: ; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 ; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 ; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} +; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; FIXME: Extra canonicalize leftover -; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX9-NEXT: v_max_f16_e32 v0, [[MAX]], [[TMP]] ; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 @@ -466,11 +464,7 @@ entry: ; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 ; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 ; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} - -; FIXME: Extra canonicalize leftover -; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX9-NEXT: v_min_f16_e32 v0, [[MIN]], [[TMP]] - +; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 @@ -507,10 +501,7 @@ entry: ; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 ; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 ; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} - -; FIXME: Extra canonicalize leftover -; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX9-NEXT: v_max_f16_e32 v0, [[MAX]], [[TMP]] +; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 @@ -549,11 +540,7 @@ entry: ; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1 ; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0 ; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}} - -; FIXME: Extra canonicalize leftover -; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX9-NEXT: v_min_f16_e32 v0, [[MIN]], [[TMP]] - +; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1