mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
Remove intrinsic specific instructions for SSE/SSE2/AVX floating point max/min instructions. Lower them to target specific nodes and use those patterns instead. This also allows them to be commuted if UnsafeFPMath is enabled.
llvm-svn: 171227
This commit is contained in:
parent
f9fc85f71d
commit
93fdde7fff
@ -10400,6 +10400,35 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
|
||||
// SSE/SSE2/AVX floating point max/min intrinsics.
|
||||
case Intrinsic::x86_sse_max_ps:
|
||||
case Intrinsic::x86_sse2_max_pd:
|
||||
case Intrinsic::x86_avx_max_ps_256:
|
||||
case Intrinsic::x86_avx_max_pd_256:
|
||||
case Intrinsic::x86_sse_min_ps:
|
||||
case Intrinsic::x86_sse2_min_pd:
|
||||
case Intrinsic::x86_avx_min_ps_256:
|
||||
case Intrinsic::x86_avx_min_pd_256: {
|
||||
unsigned Opcode;
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
case Intrinsic::x86_sse_max_ps:
|
||||
case Intrinsic::x86_sse2_max_pd:
|
||||
case Intrinsic::x86_avx_max_ps_256:
|
||||
case Intrinsic::x86_avx_max_pd_256:
|
||||
Opcode = X86ISD::FMAX;
|
||||
break;
|
||||
case Intrinsic::x86_sse_min_ps:
|
||||
case Intrinsic::x86_sse2_min_pd:
|
||||
case Intrinsic::x86_avx_min_ps_256:
|
||||
case Intrinsic::x86_avx_min_pd_256:
|
||||
Opcode = X86ISD::FMIN;
|
||||
break;
|
||||
}
|
||||
return DAG.getNode(Opcode, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
}
|
||||
|
||||
// AVX2 variable shift intrinsics
|
||||
case Intrinsic::x86_avx2_psllv_d:
|
||||
case Intrinsic::x86_avx2_psllv_q:
|
||||
|
@ -710,21 +710,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
|
||||
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
|
||||
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
|
||||
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
|
||||
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
|
||||
{ X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
|
||||
{ X86::MAXSDrr, X86::MAXSDrm, 0 },
|
||||
{ X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
|
||||
{ X86::MAXSSrr, X86::MAXSSrm, 0 },
|
||||
{ X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
|
||||
{ X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
|
||||
{ X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
|
||||
{ X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
|
||||
{ X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
|
||||
{ X86::MINSDrr, X86::MINSDrm, 0 },
|
||||
{ X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
|
||||
{ X86::MINSSrr, X86::MINSSrm, 0 },
|
||||
{ X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
|
||||
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
|
||||
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
|
||||
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
|
||||
@ -896,21 +888,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
|
||||
{ X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
|
||||
{ X86::VMAXPDrr, X86::VMAXPDrm, 0 },
|
||||
{ X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, 0 },
|
||||
{ X86::VMAXPSrr, X86::VMAXPSrm, 0 },
|
||||
{ X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, 0 },
|
||||
{ X86::VMAXSDrr, X86::VMAXSDrm, 0 },
|
||||
{ X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
|
||||
{ X86::VMAXSSrr, X86::VMAXSSrm, 0 },
|
||||
{ X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
|
||||
{ X86::VMINPDrr, X86::VMINPDrm, 0 },
|
||||
{ X86::VMINPDrr_Int, X86::VMINPDrm_Int, 0 },
|
||||
{ X86::VMINPSrr, X86::VMINPSrm, 0 },
|
||||
{ X86::VMINPSrr_Int, X86::VMINPSrm_Int, 0 },
|
||||
{ X86::VMINSDrr, X86::VMINSDrm, 0 },
|
||||
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
|
||||
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
|
||||
{ X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
|
||||
{ X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
|
||||
{ X86::VMULPDrr, X86::VMULPDrm, 0 },
|
||||
{ X86::VMULPSrr, X86::VMULPSrm, 0 },
|
||||
@ -1037,13 +1021,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
|
||||
{ X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
|
||||
{ X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
|
||||
{ X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, 0 },
|
||||
{ X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
|
||||
{ X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, 0 },
|
||||
{ X86::VMINPDYrr, X86::VMINPDYrm, 0 },
|
||||
{ X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, 0 },
|
||||
{ X86::VMINPSYrr, X86::VMINPSYrm, 0 },
|
||||
{ X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, 0 },
|
||||
{ X86::VMULPDYrr, X86::VMULPDYrm, 0 },
|
||||
{ X86::VMULPSYrr, X86::VMULPSYrm, 0 },
|
||||
{ X86::VORPDYrr, X86::VORPDYrm, 0 },
|
||||
|
@ -217,27 +217,6 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
|
||||
pat_rm, IIC_DEFAULT, d>;
|
||||
}
|
||||
|
||||
/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
|
||||
multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
string asm, string SSEVer, string FPSizeStr,
|
||||
X86MemOperand x86memop, PatFrag mem_frag,
|
||||
Domain d, OpndItins itins, bit Is2Addr = 1> {
|
||||
def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
|
||||
RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
|
||||
def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
|
||||
RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Non-instruction patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2864,37 +2843,6 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
itins.d, Is2Addr>, XD;
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
|
||||
SizeItins itins> {
|
||||
let Predicates = [HasAVX] in {
|
||||
defm V#NAME#PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
|
||||
SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
|
||||
|
||||
defm V#NAME#PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem,
|
||||
memopv2f64, SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
|
||||
|
||||
defm V#NAME#PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
|
||||
!strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem,
|
||||
memopv8f32, SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
|
||||
|
||||
defm V#NAME#PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
|
||||
!strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem,
|
||||
memopv4f64, SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem,
|
||||
memopv4f32, SSEPackedSingle, itins.s, 1>, TB;
|
||||
|
||||
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem,
|
||||
memopv2f64, SSEPackedDouble, itins.d, 1>, TB, OpSize;
|
||||
}
|
||||
}
|
||||
|
||||
// Binary Arithmetic instructions
|
||||
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
|
||||
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
|
||||
@ -2902,9 +2850,7 @@ let isCommutable = 0 in {
|
||||
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
|
||||
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
|
||||
defm MAX : basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
|
||||
defm MIN : basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
|
Loading…
Reference in New Issue
Block a user