mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
- SSE4.1 extractfps extracts a f32 into a gr32 register. Very useful! Not. Fix the instruction specification and teaches lowering code to use it only when the only use is a store instruction.
llvm-svn: 48746
This commit is contained in:
parent
3282759e08
commit
dbdf48276a
@ -699,7 +699,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||
|
||||
if (Subtarget->is64Bit()) {
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
|
||||
@ -3718,6 +3718,19 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
|
||||
SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
|
||||
DAG.getValueType(VT));
|
||||
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
|
||||
} else if (VT == MVT::f32) {
|
||||
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
|
||||
// the result back to FR32 register. It's only worth matching if the
|
||||
// result has a single use which is a store.
|
||||
if (!Op.hasOneUse())
|
||||
return SDOperand();
|
||||
SDNode *User = *Op.Val->use_begin();
|
||||
if (User->getOpcode() != ISD::STORE)
|
||||
return SDOperand();
|
||||
SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
|
||||
Op.getOperand(1));
|
||||
return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
|
||||
}
|
||||
return SDOperand();
|
||||
}
|
||||
@ -3728,8 +3741,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
|
||||
if (!isa<ConstantSDNode>(Op.getOperand(1)))
|
||||
return SDOperand();
|
||||
|
||||
if (Subtarget->hasSSE41())
|
||||
return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
|
||||
if (Subtarget->hasSSE41()) {
|
||||
SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
|
||||
if (Res.Val)
|
||||
return Res;
|
||||
}
|
||||
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
// TODO: handle v16i8.
|
||||
|
@ -3380,19 +3380,22 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
|
||||
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
|
||||
|
||||
|
||||
/// SS41I_extractf32 - SSE 4.1 extract 32 bits to fp reg or memory destination
|
||||
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
|
||||
/// destination
|
||||
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs FR32:$dst),
|
||||
// Not worth matching to rr form of extractps since the result is in GPR32.
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs GR32:$dst),
|
||||
(ins VR128:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set FR32:$dst,
|
||||
(extractelt (v4f32 VR128:$src1), imm:$src2))]>, OpSize;
|
||||
[/*(set GR32:$dst,
|
||||
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>,
|
||||
OpSize;
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(store (extractelt (v4f32 VR128:$src1), imm:$src2),
|
||||
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
|
||||
addr:$dst)]>, OpSize;
|
||||
}
|
||||
|
||||
|
30
test/CodeGen/X86/vec_extract-sse4.ll
Normal file
30
test/CodeGen/X86/vec_extract-sse4.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
|
||||
; RUN: grep extractps %t | count 1
|
||||
; RUN: grep pextrd %t | count 2
|
||||
; RUN: grep pshufd %t | count 1
|
||||
|
||||
define void @t1(float* %R, <4 x float>* %P1) {
|
||||
%X = load <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 3
|
||||
store float %tmp, float* %R
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @t2(<4 x float>* %P1) {
|
||||
%X = load <4 x float>* %P1
|
||||
%tmp = extractelement <4 x float> %X, i32 2
|
||||
ret float %tmp
|
||||
}
|
||||
|
||||
define void @t3(i32* %R, <4 x i32>* %P1) {
|
||||
%X = load <4 x i32>* %P1
|
||||
%tmp = extractelement <4 x i32> %X, i32 3
|
||||
store i32 %tmp, i32* %R
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @t4(<4 x i32>* %P1) {
|
||||
%X = load <4 x i32>* %P1
|
||||
%tmp = extractelement <4 x i32> %X, i32 3
|
||||
ret i32 %tmp
|
||||
}
|
Loading…
Reference in New Issue
Block a user