mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 19:42:54 +02:00
Fix (movhps load) lowering / pattern to match more cases. rdar://10050549
llvm-svn: 138848
This commit is contained in:
parent
928959bc52
commit
bbabe9ff60
@ -6236,8 +6236,11 @@ SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
|
|||||||
if (HasSSE2 && VT == MVT::v2f64)
|
if (HasSSE2 && VT == MVT::v2f64)
|
||||||
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
|
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
|
||||||
|
|
||||||
// v4f32 or v4i32
|
// v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
|
||||||
return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
|
return DAG.getNode(ISD::BITCAST, dl, VT,
|
||||||
|
getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
@ -691,11 +691,12 @@ let Predicates = [HasSSE1] in {
|
|||||||
// MOVHPS patterns
|
// MOVHPS patterns
|
||||||
def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
||||||
(MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
|
(MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
|
||||||
|
|
||||||
def : Pat<(X86Movlhps VR128:$src1,
|
def : Pat<(X86Movlhps VR128:$src1,
|
||||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||||
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
||||||
def : Pat<(X86Movlhps VR128:$src1,
|
def : Pat<(X86Movlhps VR128:$src1,
|
||||||
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
|
||||||
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
// MOVLHPS patterns
|
// MOVLHPS patterns
|
||||||
|
@ -24,3 +24,22 @@ define <2 x i64> @hdi(<2 x i64> %p) nounwind optsize ssp {
|
|||||||
ret <2 x i64> %shuffle
|
ret <2 x i64> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; rdar://10050549
|
||||||
|
%struct.Float2 = type { float, float }
|
||||||
|
|
||||||
|
define <4 x float> @loadhpi(%struct.Float2* %vPtr, <4 x float> %vecin1) nounwind readonly ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: loadhpi
|
||||||
|
; CHECK-NOT: movq
|
||||||
|
; CHECK: movhps (
|
||||||
|
%tmp1 = bitcast %struct.Float2* %vPtr to <1 x i64>*
|
||||||
|
%addptr7 = getelementptr inbounds <1 x i64>* %tmp1, i64 0
|
||||||
|
%tmp2 = bitcast <1 x i64>* %addptr7 to float*
|
||||||
|
%tmp3 = load float* %tmp2, align 4
|
||||||
|
%vec = insertelement <4 x float> undef, float %tmp3, i32 0
|
||||||
|
%addptr.i12 = getelementptr inbounds float* %tmp2, i64 1
|
||||||
|
%tmp4 = load float* %addptr.i12, align 4
|
||||||
|
%vecin2 = insertelement <4 x float> %vec, float %tmp4, i32 1
|
||||||
|
%shuffle = shufflevector <4 x float> %vecin1, <4 x float> %vecin2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||||
|
ret <4 x float> %shuffle
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user