diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 06b482e2f5f..b44101a11c0 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -17,6 +17,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_int : Intrinsic<[], [llvm_i8_ty]>; } +//===----------------------------------------------------------------------===// +// 3DNow! + +let TargetPrefix = "x86" in { + def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// 3DNow! extensions + +let TargetPrefix = "x86" in { + def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnowa_pswapd : + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // SSE1 diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 00ff97ea4e8..912dff0f1d0 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41, FeaturePOPCNT]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", - "Enable 3DNow! instructions">; + "Enable 3DNow! instructions", + [FeatureMMX]>; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", "Enable 3DNow! Athlon instructions", [Feature3DNow]>; @@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit, FeatureAES, FeatureCLMUL]>; def : Proc<"k6", [FeatureMMX]>; -def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; -def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>; -def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"k6-2", [Feature3DNow]>; +def : Proc<"k6-3", [Feature3DNow]>; +def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; @@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; -def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; -def : Proc<"c3", [FeatureMMX, Feature3DNow]>; +def : Proc<"winchip2", [Feature3DNow]>; +def : Proc<"c3", [Feature3DNow]>; def : Proc<"c3-2", [FeatureSSE1]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 45d1c6bc9d2..dd4f6a5a85a 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -12,66 +12,91 @@ // //===----------------------------------------------------------------------===// -// FIXME: We don't support any intrinsics for these instructions yet. - -class I3DNow o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, TB, Requires<[Has3DNow]> { +class I3DNow o, Format F, dag outs, dag ins, string asm, list pat> + : I, TB, Requires<[Has3DNow]> { } -class I3DNow_binop o, Format F, dag ins, string Mnemonic> - : I, - TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode { +class I3DNow_binop o, Format F, dag ins, string Mnemonic, list pat> + : I3DNow, + Has3DNow0F0FOpcode { + // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. + let isAsmParserOnly = 1; + let Constraints = "$src1 = $dst"; +} + +class I3DNow_conv o, Format F, dag ins, string Mnemonic, list pat> + : I3DNow, + Has3DNow0F0FOpcode { // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. let isAsmParserOnly = 1; } - -let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. - // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. - multiclass I3DNow_binop_rm opc, string Mn> { - def rr : I3DNow_binop; - def rm : I3DNow_binop; - } +multiclass I3DNow_binop_rm opc, string Mn> { + def rr : I3DNow_binop; + def rm : I3DNow_binop; } -defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">; -defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">; -defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">; -defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">; -defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">; -defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">; -defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">; -defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">; -defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">; -defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">; -defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">; -defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">; -defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">; -defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">; -defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">; -defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">; -defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">; -defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">; -defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">; +multiclass I3DNow_binop_rm_int opc, string Mn, string Ver = ""> { + def rr : I3DNow_binop( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>; + def rm : I3DNow_binop( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, + (bitconvert (load_mmx addr:$src2))))]>; +} + +multiclass I3DNow_conv_rm opc, string Mn> { + def rr : I3DNow_conv; + def rm : I3DNow_conv; +} + +multiclass I3DNow_conv_rm_int opc, string Mn, string Ver = ""> { + def rr : I3DNow_conv( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>; + def rm : I3DNow_conv( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) + (bitconvert (load_mmx addr:$src))))]>; +} + +defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">; +defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">; +defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">; +defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">; +defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">; +defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">; +defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">; +defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">; +defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">; +defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">; +defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">; +defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">; +defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">; +defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">; +defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">; +defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">; +defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">; +defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; +defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), "prefetch $addr", []>; - + // FIXME: Diassembler gets a bogus decode conflict. -let isAsmParserOnly = 1 in { +let isAsmParserOnly = 1 in def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), "prefetchw $addr", []>; -} // "3DNowA" instructions -defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">; -defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">; -defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">; -defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">; -defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">; +defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; +defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">; +defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">; +defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">; +defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">; diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll new file mode 100644 index 00000000000..0b27bf2d185 --- /dev/null +++ b/test/CodeGen/X86/3dnow-intrinsics.ll @@ -0,0 +1,297 @@ +; RUN: llc < %s -march=x86 -mattr=+3dnow | FileCheck %s + +define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { +; CHECK: pavgusb +entry: + %0 = bitcast x86_mmx %a.coerce to <8 x i8> + %1 = bitcast x86_mmx %b.coerce to <8 x i8> + %2 = bitcast <8 x i8> %0 to x86_mmx + %3 = bitcast <8 x i8> %1 to x86_mmx + %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3) + %5 = bitcast x86_mmx %4 to <8 x i8> + ret <8 x i8> %5 +} + +declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone + +define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { +; CHECK: pf2id +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0) + %2 = bitcast x86_mmx %1 to <2 x i32> + ret <2 x i32> %2 +} + +declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone + +define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfacc +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfadd +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone + +define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfcmpeq +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x i32> + ret <2 x i32> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone + +define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfcmpge +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x i32> + ret <2 x i32> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone + +define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfcmpgt +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x i32> + ret <2 x i32> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfmax +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfmin +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfmul +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone { +; CHECK: pfrcp +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0) + %2 = bitcast x86_mmx %1 to <2 x float> + ret <2 x float> %2 +} + +declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone + +define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfrcpit1 +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfrcpit2 +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone { +; CHECK: pfrsqrt +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0) + %2 = bitcast x86_mmx %1 to <2 x float> + ret <2 x float> %2 +} + +declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone + +define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfrsqit1 +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfsub +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfsubr +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone { +; CHECK: pi2fd +entry: + %0 = bitcast x86_mmx %a.coerce to <2 x i32> + %1 = bitcast <2 x i32> %0 to x86_mmx + %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone + +define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { +; CHECK: pmulhrw +entry: + %0 = bitcast x86_mmx %a.coerce to <4 x i16> + %1 = bitcast x86_mmx %b.coerce to <4 x i16> + %2 = bitcast <4 x i16> %0 to x86_mmx + %3 = bitcast <4 x i16> %1 to x86_mmx + %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3) + %5 = bitcast x86_mmx %4 to <4 x i16> + ret <4 x i16> %5 +} + +declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone + +define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { +; CHECK: pf2iw +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0) + %2 = bitcast x86_mmx %1 to <2 x i32> + ret <2 x i32> %2 +} + +declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone + +define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfnacc +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { +; CHECK: pfpnacc +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = bitcast <2 x float> %b to x86_mmx + %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone + +define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone { +; CHECK: pi2fw +entry: + %0 = bitcast x86_mmx %a.coerce to <2 x i32> + %1 = bitcast <2 x i32> %0 to x86_mmx + %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) + %3 = bitcast x86_mmx %2 to <2 x float> + ret <2 x float> %3 +} + +declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone + +define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { +; CHECK: pswapd +entry: + %0 = bitcast <2 x float> %a to x86_mmx + %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) + %2 = bitcast x86_mmx %1 to <2 x float> + ret <2 x float> %2 +} + +define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { +; CHECK: pswapd +entry: + %0 = bitcast <2 x i32> %a to x86_mmx + %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) + %2 = bitcast x86_mmx %1 to <2 x i32> + ret <2 x i32> %2 +} + +declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone