mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
Update insertps handling based on feedback. Move to a v4f32 style
to support vector arguments and scalar arguments correctly. Update lowering and fix comment to refer to pinsr* instead of insertps. llvm-svn: 76921
This commit is contained in:
parent
62c8b40b66
commit
c205a8da9d
@ -4383,11 +4383,12 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
|
||||
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
|
||||
// combine either bitwise AND or insert of float 0.0 to set these bits.
|
||||
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
|
||||
// Create this as a scalar to vector..
|
||||
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
|
||||
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
|
||||
} else if (EVT == MVT::i32) {
|
||||
// InsertPS works with constant index.
|
||||
if (isa<ConstantSDNode>(N2))
|
||||
return Op;
|
||||
} else if (EVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
|
||||
// PINSR* works with constant index.
|
||||
return Op;
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW",
|
||||
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
||||
def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
||||
SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
|
||||
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
||||
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
@ -3596,32 +3596,28 @@ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
|
||||
// in the target vector.
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
|
||||
def match_rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
|
||||
def match_rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(X86insrtps VR128:$src1, (loadf32 addr:$src2),
|
||||
(X86insrtps VR128:$src1,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
||||
imm:$src3))]>, OpSize;
|
||||
}
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def INSERTPSrr : SS4AIi8<0x21, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
"insertps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst, (int_x86_sse41_insertps VR128:$src1,
|
||||
VR128:$src2, imm:$src3))]>;
|
||||
}
|
||||
|
||||
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
|
||||
|
||||
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
|
||||
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
|
||||
|
Loading…
Reference in New Issue
Block a user