1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

Remove intrinsic specific instructions for SSE/SSE2/AVX floating point max/min instructions. Lower them to target specific nodes and use those patterns instead. This also allows them to be commuted if UnsafeFPMath is enabled.

llvm-svn: 171227
This commit is contained in:
Craig Topper 2012-12-29 16:44:25 +00:00
parent f9fc85f71d
commit 93fdde7fff
3 changed files with 29 additions and 74 deletions

View File

@ -10400,6 +10400,35 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
// SSE/SSE2/AVX floating point max/min intrinsics.
case Intrinsic::x86_sse_max_ps:
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_max_ps:
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
Opcode = X86ISD::FMIN;
break;
}
return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
// AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:

View File

@ -710,21 +710,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
{ X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
{ X86::MAXSDrr, X86::MAXSDrm, 0 },
{ X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
{ X86::MAXSSrr, X86::MAXSSrm, 0 },
{ X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
{ X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
{ X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
{ X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
{ X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
{ X86::MINSDrr, X86::MINSDrm, 0 },
{ X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
{ X86::MINSSrr, X86::MINSSrm, 0 },
{ X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
@ -896,21 +888,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
{ X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
{ X86::VMAXPDrr, X86::VMAXPDrm, 0 },
{ X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, 0 },
{ X86::VMAXPSrr, X86::VMAXPSrm, 0 },
{ X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, 0 },
{ X86::VMAXSDrr, X86::VMAXSDrm, 0 },
{ X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
{ X86::VMAXSSrr, X86::VMAXSSrm, 0 },
{ X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
{ X86::VMINPDrr, X86::VMINPDrm, 0 },
{ X86::VMINPDrr_Int, X86::VMINPDrm_Int, 0 },
{ X86::VMINPSrr, X86::VMINPSrm, 0 },
{ X86::VMINPSrr_Int, X86::VMINPSrm_Int, 0 },
{ X86::VMINSDrr, X86::VMINSDrm, 0 },
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
{ X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
{ X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
{ X86::VMULPDrr, X86::VMULPDrm, 0 },
{ X86::VMULPSrr, X86::VMULPSrm, 0 },
@ -1037,13 +1021,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
{ X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
{ X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
{ X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, 0 },
{ X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
{ X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, 0 },
{ X86::VMINPDYrr, X86::VMINPDYrm, 0 },
{ X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, 0 },
{ X86::VMINPSYrr, X86::VMINPSYrm, 0 },
{ X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, 0 },
{ X86::VMULPDYrr, X86::VMULPDYrm, 0 },
{ X86::VMULPSYrr, X86::VMULPSYrm, 0 },
{ X86::VORPDYrr, X86::VORPDYrm, 0 },

View File

@ -217,27 +217,6 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
pat_rm, IIC_DEFAULT, d>;
}
/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
string asm, string SSEVer, string FPSizeStr,
X86MemOperand x86memop, PatFrag mem_frag,
Domain d, OpndItins itins, bit Is2Addr = 1> {
def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
}
//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
@ -2864,37 +2843,6 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
itins.d, Is2Addr>, XD;
}
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
SizeItins itins> {
let Predicates = [HasAVX] in {
defm V#NAME#PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem,
memopv2f64, SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
defm V#NAME#PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
!strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem,
memopv8f32, SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
defm V#NAME#PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
!strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem,
memopv4f64, SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem,
memopv4f32, SSEPackedSingle, itins.s, 1>, TB;
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem,
memopv2f64, SSEPackedDouble, itins.d, 1>, TB, OpSize;
}
}
// Binary Arithmetic instructions
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
@ -2902,9 +2850,7 @@ let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
defm MAX : basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
defm MIN : basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
}
let isCodeGenOnly = 1 in {