mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Support for encoding all FMA4 instructions and tablegen patterns for all
remaining FMA4 instructions and intrinsics with tests. llvm-svn: 145525
This commit is contained in:
parent
2d9e4aa665
commit
2dfb343ffa
@ -1825,10 +1825,138 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
// FMA4
|
||||
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmaddsub_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmaddsub_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsubadd_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma4_vfmsubadd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -83,12 +83,74 @@ multiclass fma4s<bits<8> opc, string OpcodeStr> {
|
||||
|
||||
}
|
||||
|
||||
multiclass fma4p<bits<8> opc, string OpcodeStr> {
|
||||
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
|
||||
[]>, XOP_W;
|
||||
def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_W;
|
||||
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>;
|
||||
def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
|
||||
[]>, XOP_W;
|
||||
def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_W;
|
||||
def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">;
|
||||
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
|
||||
defm VFMADDPS4 : fma4p<0x68, "vfmaddps">;
|
||||
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">;
|
||||
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">;
|
||||
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">;
|
||||
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">;
|
||||
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">;
|
||||
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">;
|
||||
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">;
|
||||
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">;
|
||||
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">;
|
||||
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">;
|
||||
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">;
|
||||
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">;
|
||||
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">;
|
||||
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">;
|
||||
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">;
|
||||
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">;
|
||||
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">;
|
||||
}
|
||||
|
||||
// FMA4 Intrinsics patterns
|
||||
|
||||
// VFMADD
|
||||
def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
|
||||
@ -97,3 +159,290 @@ def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
|
||||
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv8f32 addr:$src3)),
|
||||
(VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1,
|
||||
(alignedloadv8f32 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv4f64 addr:$src3)),
|
||||
(VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1,
|
||||
(alignedloadv4f64 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
// VFMSUB
|
||||
def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv8f32 addr:$src3)),
|
||||
(VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1,
|
||||
(alignedloadv8f32 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv4f64 addr:$src3)),
|
||||
(VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1,
|
||||
(alignedloadv4f64 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
// VFNMADD
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv8f32 addr:$src3)),
|
||||
(VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1,
|
||||
(alignedloadv8f32 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv4f64 addr:$src3)),
|
||||
(VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1,
|
||||
(alignedloadv4f64 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
// VFNMSUB
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv8f32 addr:$src3)),
|
||||
(VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1,
|
||||
(alignedloadv8f32 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv4f64 addr:$src3)),
|
||||
(VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1,
|
||||
(alignedloadv4f64 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
// VFMADDSUB
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv8f32 addr:$src3)),
|
||||
(VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1,
|
||||
(alignedloadv8f32 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv4f64 addr:$src3)),
|
||||
(VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1,
|
||||
(alignedloadv4f64 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
// VFMSUBADD
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2,
|
||||
(alignedloadv4f32 addr:$src3)),
|
||||
(VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2,
|
||||
(alignedloadv2f64 addr:$src3)),
|
||||
(VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
|
||||
VR128:$src3),
|
||||
(VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv8f32 addr:$src3)),
|
||||
(VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1,
|
||||
(alignedloadv8f32 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
(VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2,
|
||||
(alignedloadv4f64 addr:$src3)),
|
||||
(VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
|
||||
def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1,
|
||||
(alignedloadv4f64 addr:$src2),
|
||||
VR256:$src3),
|
||||
(VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
|
||||
|
@ -1,4 +1,12 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=fma4 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
|
||||
|
||||
; VFMADD
|
||||
define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfmaddss
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmaddsd
|
||||
@ -7,3 +15,229 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfmaddps
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmaddpd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK: vfmaddps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK: vfmaddpd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFMSUB
|
||||
define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfmsubss
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmsubsd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfmsubps
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmsubpd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK: vfmsubps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK: vfmsubpd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFNMADD
|
||||
define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfnmaddss
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfnmaddsd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfnmaddps
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfnmaddpd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK: vfnmaddps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK: vfnmaddpd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFNMSUB
|
||||
define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfnmsubss
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfnmsubsd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfnmsubps
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfnmsubpd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK: vfnmsubps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK: vfnmsubpd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFMADDSUB
|
||||
define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfmaddsubps
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmaddsubpd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK: vfmaddsubps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK: vfmaddsubpd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
||||
; VFMSUBADD
|
||||
define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
|
||||
; CHECK: vfmsubaddps
|
||||
%res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x float > %res
|
||||
}
|
||||
declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
|
||||
|
||||
define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
|
||||
; CHECK: vfmsubaddpd
|
||||
%res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 2 x double > %res
|
||||
}
|
||||
declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
|
||||
|
||||
define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
|
||||
; CHECK: vfmsubaddps
|
||||
; CHECK: ymm
|
||||
%res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
|
||||
ret < 8 x float > %res
|
||||
}
|
||||
declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
|
||||
|
||||
define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
|
||||
; CHECK: vfmsubaddpd
|
||||
; CHECK: ymm
|
||||
%res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
|
||||
ret < 4 x double > %res
|
||||
}
|
||||
declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
|
||||
|
@ -1,5 +1,18 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// vfmadd
|
||||
// CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x01,0x10]
|
||||
vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x01,0x10]
|
||||
vfmaddss %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
|
||||
vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
|
||||
vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
@ -11,3 +24,368 @@
|
||||
// CHECK: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
|
||||
vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
|
||||
vfmaddps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddps %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x01,0x10]
|
||||
vfmaddps %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
|
||||
vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x01,0x10]
|
||||
vfmaddpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddpd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x01,0x10]
|
||||
vfmaddpd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
|
||||
vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x01,0x10]
|
||||
vfmaddps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddps %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x01,0x10]
|
||||
vfmaddps %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
|
||||
vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x01,0x10]
|
||||
vfmaddpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddpd %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x01,0x10]
|
||||
vfmaddpd %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
|
||||
vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// vfmsub
|
||||
// CHECK: vfmsubss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
|
||||
vfmsubss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubss %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x6e,0x01,0x10]
|
||||
vfmsubss %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10]
|
||||
vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0x01,0x10]
|
||||
vfmsubsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubsd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x6f,0x01,0x10]
|
||||
vfmsubsd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10]
|
||||
vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0x01,0x10]
|
||||
vfmsubps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubps %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x6c,0x01,0x10]
|
||||
vfmsubps %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
|
||||
vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0x01,0x10]
|
||||
vfmsubpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubpd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x6d,0x01,0x10]
|
||||
vfmsubpd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
|
||||
vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0x01,0x10]
|
||||
vfmsubps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubps %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x6c,0x01,0x10]
|
||||
vfmsubps %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
|
||||
vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0x01,0x10]
|
||||
vfmsubpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubpd %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x6d,0x01,0x10]
|
||||
vfmsubpd %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
|
||||
vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// vfnmadd
|
||||
// CHECK: vfnmaddss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0x01,0x10]
|
||||
vfnmaddss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddss %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x7a,0x01,0x10]
|
||||
vfnmaddss %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10]
|
||||
vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0x01,0x10]
|
||||
vfnmaddsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddsd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x7b,0x01,0x10]
|
||||
vfnmaddsd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10]
|
||||
vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0x01,0x10]
|
||||
vfnmaddps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddps %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x78,0x01,0x10]
|
||||
vfnmaddps %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
|
||||
vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0x01,0x10]
|
||||
vfnmaddpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddpd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x79,0x01,0x10]
|
||||
vfnmaddpd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
|
||||
vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmaddps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0x01,0x10]
|
||||
vfnmaddps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmaddps %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x78,0x01,0x10]
|
||||
vfnmaddps %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
|
||||
vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmaddpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0x01,0x10]
|
||||
vfnmaddpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmaddpd %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x79,0x01,0x10]
|
||||
vfnmaddpd %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
|
||||
vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// vfnmsub
|
||||
// CHECK: vfnmsubss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0x01,0x10]
|
||||
vfnmsubss (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubss %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x7e,0x01,0x10]
|
||||
vfnmsubss %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10]
|
||||
vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0x01,0x10]
|
||||
vfnmsubsd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubsd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x7f,0x01,0x10]
|
||||
vfnmsubsd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10]
|
||||
vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0x01,0x10]
|
||||
vfnmsubps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubps %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x7c,0x01,0x10]
|
||||
vfnmsubps %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
|
||||
vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0x01,0x10]
|
||||
vfnmsubpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubpd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x7d,0x01,0x10]
|
||||
vfnmsubpd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
|
||||
vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfnmsubps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0x01,0x10]
|
||||
vfnmsubps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmsubps %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x7c,0x01,0x10]
|
||||
vfnmsubps %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
|
||||
vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmsubpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0x01,0x10]
|
||||
vfnmsubpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmsubpd %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x7d,0x01,0x10]
|
||||
vfnmsubpd %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
|
||||
vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// vfmaddsub
|
||||
// CHECK: vfmaddsubps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0x01,0x10]
|
||||
vfmaddsubps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsubps %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x5c,0x01,0x10]
|
||||
vfmaddsubps %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
|
||||
vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsubpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0x01,0x10]
|
||||
vfmaddsubpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsubpd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x5d,0x01,0x10]
|
||||
vfmaddsubpd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
|
||||
vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmaddsubps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0x01,0x10]
|
||||
vfmaddsubps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddsubps %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5c,0x01,0x10]
|
||||
vfmaddsubps %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
|
||||
vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddsubpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0x01,0x10]
|
||||
vfmaddsubpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddsubpd %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5d,0x01,0x10]
|
||||
vfmaddsubpd %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
|
||||
vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// vfmsubadd
|
||||
// CHECK: vfmsubaddps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0x01,0x10]
|
||||
vfmsubaddps (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubaddps %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x5e,0x01,0x10]
|
||||
vfmsubaddps %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
|
||||
vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubaddpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0x01,0x10]
|
||||
vfmsubaddpd (%rcx), %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubaddpd %xmm1, (%rcx), %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x5f,0x01,0x10]
|
||||
vfmsubaddpd %xmm1, (%rcx),%xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
|
||||
vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
|
||||
// CHECK: vfmsubaddps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0x01,0x10]
|
||||
vfmsubaddps (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubaddps %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5e,0x01,0x10]
|
||||
vfmsubaddps %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
|
||||
vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubaddpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0x01,0x10]
|
||||
vfmsubaddpd (%rcx), %ymm1, %ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubaddpd %ymm1, (%rcx), %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x5f,0x01,0x10]
|
||||
vfmsubaddpd %ymm1, (%rcx),%ymm0, %ymm0
|
||||
|
||||
// CHECK: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
|
||||
vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
|
Loading…
x
Reference in New Issue
Block a user