1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[FastISel][X86] Add support for the sqrt intrinsic.

llvm-svn: 210720
This commit is contained in:
Juergen Ributzka 2014-06-11 23:11:02 +00:00
parent e3f06ad8fe
commit f8b4c7a498
2 changed files with 78 additions and 0 deletions

View File

@ -1781,6 +1781,58 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
return true;
}
case Intrinsic::sqrt: {
if (!Subtarget->hasSSE1())
return false;
Type *RetTy = I.getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
// Unfortunatelly we can't use FastEmit_r, because the AVX version of FSQRT
// is not generated by FastISel yet.
// FIXME: Update this code once tablegen can handle it.
static const unsigned SqrtOpc[2][2] = {
{X86::SQRTSSr, X86::VSQRTSSr},
{X86::SQRTSDr, X86::VSQRTSDr}
};
bool HasAVX = Subtarget->hasAVX();
unsigned Opc;
const TargetRegisterClass *RC;
switch (VT.SimpleTy) {
default: return false;
case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
}
const Value *SrcVal = I.getArgOperand(0);
unsigned SrcReg = getRegForValue(SrcVal);
if (SrcReg == 0)
return false;
unsigned ImplicitDefReg = 0;
if (HasAVX) {
ImplicitDefReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
}
unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg);
if (ImplicitDefReg)
MIB.addReg(ImplicitDefReg);
MIB.addReg(SrcReg);
UpdateValueMap(&I, ResultReg);
return true;
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:

26
test/CodeGen/X86/sqrt.ll Normal file
View File

@ -0,0 +1,26 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
define float @test_sqrt_f32(float %a) {
; SSE2-LABEL: test_sqrt_f32
; SSE2: sqrtss %xmm0, %xmm0
; AVX-LABEL: test_sqrt_f32
; AVX: vsqrtss %xmm0, %xmm0
%res = call float @llvm.sqrt.f32(float %a)
ret float %res
}
declare float @llvm.sqrt.f32(float) nounwind readnone
define double @test_sqrt_f64(double %a) {
; SSE2-LABEL: test_sqrt_f64
; SSE2: sqrtsd %xmm0, %xmm0
; AVX-LABEL: test_sqrt_f64
; AVX: vsqrtsd %xmm0, %xmm0
%res = call double @llvm.sqrt.f64(double %a)
ret double %res
}
declare double @llvm.sqrt.f64(double) nounwind readnone