From ee0d5cd9520b3eddfed44e17efc60fe9173179ad Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 17 Jul 2017 17:42:48 +0000 Subject: [PATCH] [SystemZ] Add support for IBM z14 processor (2/3) This adds support for the new 32-bit vector float instructions of z14. This includes: - Enabling the instructions for the assembler/disassembler. - CodeGen for the instructions, including new LLVM intrinsics. - Scheduler description support for the instructions. - Update to the vector cost function calculations. In general, CodeGen support for the new v4f32 instructions closely matches support for the existing v2f64 instructions. llvm-svn: 308195 --- include/llvm/IR/IntrinsicsSystemZ.td | 16 + lib/Target/SystemZ/SystemZISelLowering.cpp | 51 +- lib/Target/SystemZ/SystemZISelLowering.h | 6 + lib/Target/SystemZ/SystemZInstrVector.td | 138 ++- lib/Target/SystemZ/SystemZRegisterInfo.td | 5 +- lib/Target/SystemZ/SystemZScheduleZ14.td | 23 + lib/Target/SystemZ/SystemZShortenInst.cpp | 40 + .../SystemZ/SystemZTargetTransformInfo.cpp | 3 + test/Analysis/CostModel/SystemZ/fp-arith.ll | 53 +- test/CodeGen/SystemZ/fp-abs-03.ll | 40 + test/CodeGen/SystemZ/fp-abs-04.ll | 43 + test/CodeGen/SystemZ/fp-add-01.ll | 6 +- test/CodeGen/SystemZ/fp-cmp-01.ll | 102 +- test/CodeGen/SystemZ/fp-div-01.ll | 6 +- test/CodeGen/SystemZ/fp-mul-01.ll | 6 +- test/CodeGen/SystemZ/fp-mul-06.ll | 31 +- test/CodeGen/SystemZ/fp-mul-08.ll | 31 +- test/CodeGen/SystemZ/fp-mul-10.ll | 20 + test/CodeGen/SystemZ/fp-neg-02.ll | 38 + test/CodeGen/SystemZ/fp-round-03.ll | 195 +++ test/CodeGen/SystemZ/fp-sqrt-01.ll | 8 +- test/CodeGen/SystemZ/fp-sub-01.ll | 6 +- test/CodeGen/SystemZ/vec-abs-06.ll | 47 + test/CodeGen/SystemZ/vec-add-02.ll | 24 + test/CodeGen/SystemZ/vec-cmp-07.ll | 349 ++++++ test/CodeGen/SystemZ/vec-div-02.ll | 24 + test/CodeGen/SystemZ/vec-intrinsics-02.ll | 229 ++++ test/CodeGen/SystemZ/vec-max-05.ll | 54 + test/CodeGen/SystemZ/vec-min-05.ll | 54 + test/CodeGen/SystemZ/vec-mul-03.ll | 24 + test/CodeGen/SystemZ/vec-mul-04.ll | 31 + test/CodeGen/SystemZ/vec-mul-05.ll | 31 + test/CodeGen/SystemZ/vec-neg-02.ll | 23 + test/CodeGen/SystemZ/vec-round-02.ll | 118 ++ test/CodeGen/SystemZ/vec-sqrt-02.ll | 23 + test/CodeGen/SystemZ/vec-sub-02.ll | 31 + test/MC/Disassembler/SystemZ/insns-z14.txt | 1095 +++++++++++++++++ test/MC/SystemZ/insn-bad-z13.s | 262 ++++ test/MC/SystemZ/insn-bad-z14.s | 138 +++ test/MC/SystemZ/insn-good-z14.s | 950 ++++++++++++++ 40 files changed, 4267 insertions(+), 107 deletions(-) create mode 100644 test/CodeGen/SystemZ/fp-abs-03.ll create mode 100644 test/CodeGen/SystemZ/fp-abs-04.ll create mode 100644 test/CodeGen/SystemZ/fp-neg-02.ll create mode 100644 test/CodeGen/SystemZ/fp-round-03.ll create mode 100644 test/CodeGen/SystemZ/vec-abs-06.ll create mode 100644 test/CodeGen/SystemZ/vec-add-02.ll create mode 100644 test/CodeGen/SystemZ/vec-cmp-07.ll create mode 100644 test/CodeGen/SystemZ/vec-div-02.ll create mode 100644 test/CodeGen/SystemZ/vec-mul-03.ll create mode 100644 test/CodeGen/SystemZ/vec-mul-04.ll create mode 100644 test/CodeGen/SystemZ/vec-neg-02.ll create mode 100644 test/CodeGen/SystemZ/vec-round-02.ll create mode 100644 test/CodeGen/SystemZ/vec-sqrt-02.ll create mode 100644 test/CodeGen/SystemZ/vec-sub-02.ll diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td index 89136ad9c68..98065bc51d9 100644 --- a/include/llvm/IR/IntrinsicsSystemZ.td +++ b/include/llvm/IR/IntrinsicsSystemZ.td @@ -389,6 +389,22 @@ let TargetPrefix = "s390" in { def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_s390_vfcesbs : SystemZBinaryConvCC; + def int_s390_vfchsbs : SystemZBinaryConvCC; + def int_s390_vfchesbs : SystemZBinaryConvCC; + + def int_s390_vftcisb : SystemZBinaryConvIntCC; + + def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // Instructions from the Vector Packed Decimal Facility def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">, diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 40d1f6df820..a806c9b89ee 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -419,6 +419,21 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // The vector enhancements facility 1 has instructions for these. if (Subtarget.hasVectorEnhancements1()) { + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FABS, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNAN, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); @@ -428,6 +443,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal); setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::f32, Legal); + + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); } // We have fused multiply-addition for f32 and f64 but not f128. @@ -1478,21 +1503,25 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { return true; case Intrinsic::s390_vfcedbs: + case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchdbs: + case Intrinsic::s390_vfchsbs: Opcode = SystemZISD::VFCMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchedbs: + case Intrinsic::s390_vfchesbs: Opcode = SystemZISD::VFCMPHES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vftcidb: + case Intrinsic::s390_vftcisb: Opcode = SystemZISD::VFTCI; CCValid = SystemZ::CCMASK_VCMP; return true; @@ -2332,11 +2361,15 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. -static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, - EVT VT, SDValue CmpOp0, SDValue CmpOp1) { - // There is no hardware support for v4f32, so extend the vector into - // two v2f64s and compare those. - if (CmpOp0.getValueType() == MVT::v4f32) { +SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, + SDValue CmpOp1) const { + // There is no hardware support for v4f32 (unless we have the vector + // enhancements facility 1), so extend the vector into two v2f64s + // and compare those. + if (CmpOp0.getValueType() == MVT::v4f32 && + !Subtarget.hasVectorEnhancements1()) { SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); @@ -2350,9 +2383,11 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. -static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - ISD::CondCode CC, SDValue CmpOp0, - SDValue CmpOp1) { +SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, + const SDLoc &DL, EVT VT, + ISD::CondCode CC, + SDValue CmpOp0, + SDValue CmpOp1) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); bool Invert = false; SDValue Cmp; diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 0be2721963e..abe8b7233e6 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -480,6 +480,12 @@ private: const SystemZSubtarget &Subtarget; // Implement LowerOperation for individual opcodes. + SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, + const SDLoc &DL, EVT VT, + SDValue CmpOp0, SDValue CmpOp1) const; + SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, + EVT VT, ISD::CondCode CC, + SDValue CmpOp0, SDValue CmpOp1) const; SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index 3d76974cc4f..9d8f74cdd98 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -14,7 +14,7 @@ let Predicates = [FeatureVector] in { // Register move. def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; - def VLR32 : UnaryAliasVRR; + def VLR32 : UnaryAliasVRR; def VLR64 : UnaryAliasVRR; // Load GR from VR element. @@ -141,7 +141,7 @@ let Predicates = [FeatureVector] in { // LEY and LDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VL32 : UnaryAliasVRX; + def VL32 : UnaryAliasVRX; def VL64 : UnaryAliasVRX; // Load logical element and zero. @@ -231,7 +231,7 @@ let Predicates = [FeatureVector] in { // STEY and STDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VST32 : StoreAliasVRX; + def VST32 : StoreAliasVRX; def VST64 : StoreAliasVRX; // Scatter element. @@ -935,6 +935,10 @@ let Predicates = [FeatureVector] in { def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>; def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>; + def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>; + } // Convert from fixed 64-bit. def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; @@ -966,6 +970,10 @@ let Predicates = [FeatureVector] in { def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>; def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>; + def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>; + } // Load FP integer. def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>; @@ -973,18 +981,38 @@ let Predicates = [FeatureVector] in { def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; defm : VectorRounding; defm : VectorRounding; + let Predicates = [FeatureVectorEnhancements1] in { + def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>; + def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>; + defm : VectorRounding; + defm : VectorRounding; + } // Load lengthened. def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>; - def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; - def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>; + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + let isAsmParserOnly = 1 in { + def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>; + def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>; + def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>; + } + } - // Load rounded, + // Load rounded. def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>; - def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; - def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; - def : FPConversion; + def : FPConversion; + let Predicates = [FeatureVectorEnhancements1] in { + let isAsmParserOnly = 1 in { + def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>; + def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; + def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; + } + } // Maximum. multiclass VectorMax { @@ -997,8 +1025,14 @@ let Predicates = [FeatureVector] in { v128db, v128db, 3, 0>; def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag, v64db, v64db, 3, 8>; + def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb, + v128sb, v128sb, 2, 0>; + def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag, + v32sb, v32sb, 2, 8>; defm : VectorMax; defm : VectorMax; + defm : VectorMax; + defm : VectorMax; } // Minimum. @@ -1012,30 +1046,50 @@ let Predicates = [FeatureVector] in { v128db, v128db, 3, 0>; def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag, v64db, v64db, 3, 8>; + def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb, + v128sb, v128sb, 2, 0>; + def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag, + v32sb, v32sb, 2, 8>; defm : VectorMin; defm : VectorMin; + defm : VectorMin; + defm : VectorMin; } // Multiply. def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>; def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>; + def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>; + } // Multiply and add. def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>; def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>; + def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>; + } // Multiply and subtract. def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>; def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>; + def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>; + } // Negative multiply and add. let Predicates = [FeatureVectorEnhancements1] in { def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>; def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>; def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>; + def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>; + def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>; } // Negative multiply and subtract. @@ -1043,40 +1097,70 @@ let Predicates = [FeatureVector] in { def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>; def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>; def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>; + def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>; + def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>; } // Perform sign operation. def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>; def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>; def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>; + def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>; + } // Load complement. def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>; + def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>; + } // Load negative. def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>; + def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>; + } // Load positive. def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>; + def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>; + } // Square root. def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>; def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>; + def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>; + } // Subtract. def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>; def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>; + def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>; + } // Test data class immediate. let Defs = [CC] in { def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>; def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>; + def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>; + } } } @@ -1089,12 +1173,18 @@ let Predicates = [FeatureVector] in { let Defs = [CC] in { def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>; def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>; + } } // Compare and signal scalar. let Defs = [CC] in { def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>; def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + let Predicates = [FeatureVectorEnhancements1] in { + def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>; + } } // Compare equal. @@ -1103,6 +1193,12 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128f, v128sb, 2, 0>; + defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 8>; + } // Compare and signal equal. let Predicates = [FeatureVectorEnhancements1] in { @@ -1110,6 +1206,10 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 4>; defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 12>; + defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag, + v32f, v32sb, 2, 12>; } // Compare high. @@ -1118,6 +1218,12 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128f, v128sb, 2, 0>; + defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 8>; + } // Compare and signal high. let Predicates = [FeatureVectorEnhancements1] in { @@ -1125,6 +1231,10 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 4>; defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 12>; + defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag, + v32f, v32sb, 2, 12>; } // Compare high or equal. @@ -1133,6 +1243,12 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; + let Predicates = [FeatureVectorEnhancements1] in { + defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128f, v128sb, 2, 0>; + defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 8>; + } // Compare and signal high or equal. let Predicates = [FeatureVectorEnhancements1] in { @@ -1140,6 +1256,10 @@ let Predicates = [FeatureVector] in { v128g, v128db, 3, 4>; defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 12>; + defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag, + v128f, v128sb, 2, 4>; + defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag, + v32f, v32sb, 2, 12>; } } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 36809ea81dc..ef5a2642bd2 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -272,7 +272,8 @@ class TypedReg { RegisterOperand op = opin; } -def v32eb : TypedReg; +def v32f : TypedReg; +def v32sb : TypedReg; def v64g : TypedReg; def v64db : TypedReg; def v128b : TypedReg; @@ -280,7 +281,7 @@ def v128h : TypedReg; def v128f : TypedReg; def v128g : TypedReg; def v128q : TypedReg; -def v128eb : TypedReg; +def v128sb : TypedReg; def v128db : TypedReg; def v128any : TypedReg; diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td index 1cc69fffe16..f9407eb179d 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -1316,42 +1316,60 @@ def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>; def : InstRW<[VecBF], (instregex "VL(DE|ED)$")>; def : InstRW<[VecBF], (instregex "VL(DE|ED)B$")>; def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; +def : InstRW<[VecBF], (instregex "VFL(L|R)$")>; +def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>; +def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>; def : InstRW<[VecBF2], (instregex "VFI$")>; def : InstRW<[VecBF], (instregex "VFIDB$")>; def : InstRW<[VecBF], (instregex "WFIDB$")>; +def : InstRW<[VecBF2], (instregex "VFISB$")>; +def : InstRW<[VecBF], (instregex "WFISB$")>; // Sign operations def : InstRW<[VecXsPm], (instregex "VFPSO$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>; def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>; // Minimum / maximum def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>; def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>; def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>; // Test data class def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>; // Add / subtract def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>; def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; +def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>; // Multiply / multiply-and-add/subtract def : InstRW<[VecBF2], (instregex "VFM$")>; def : InstRW<[VecBF], (instregex "VFMDB$")>; def : InstRW<[VecBF], (instregex "WFMDB$")>; +def : InstRW<[VecBF2], (instregex "VFMSB$")>; +def : InstRW<[VecBF], (instregex "WFMSB$")>; def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>; def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>; def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>; // Divide / square root def : InstRW<[VecFPd], (instregex "VFD$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>; def : InstRW<[VecFPd], (instregex "VFSQ$")>; def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; +def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>; //===----------------------------------------------------------------------===// // Vector: Floating-point comparison @@ -1360,10 +1378,15 @@ def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)$")>; def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>; def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>; +def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>; //===----------------------------------------------------------------------===// // Vector: Floating-point insertion and extraction diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 7391df8342e..13ceb371a42 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -200,14 +200,26 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; + case SystemZ::WFASB: + Changed |= shortenOn001AddCC(MI, SystemZ::AEBR); + break; + case SystemZ::WFDDB: Changed |= shortenOn001(MI, SystemZ::DDBR); break; + case SystemZ::WFDSB: + Changed |= shortenOn001(MI, SystemZ::DEBR); + break; + case SystemZ::WFIDB: Changed |= shortenFPConv(MI, SystemZ::FIDBRA); break; + case SystemZ::WFISB: + Changed |= shortenFPConv(MI, SystemZ::FIEBRA); + break; + case SystemZ::WLDEB: Changed |= shortenOn01(MI, SystemZ::LDEBR); break; @@ -220,30 +232,58 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenOn001(MI, SystemZ::MDBR); break; + case SystemZ::WFMSB: + Changed |= shortenOn001(MI, SystemZ::MEEBR); + break; + case SystemZ::WFLCDB: Changed |= shortenOn01(MI, SystemZ::LCDFR); break; + case SystemZ::WFLCSB: + Changed |= shortenOn01(MI, SystemZ::LCDFR_32); + break; + case SystemZ::WFLNDB: Changed |= shortenOn01(MI, SystemZ::LNDFR); break; + case SystemZ::WFLNSB: + Changed |= shortenOn01(MI, SystemZ::LNDFR_32); + break; + case SystemZ::WFLPDB: Changed |= shortenOn01(MI, SystemZ::LPDFR); break; + case SystemZ::WFLPSB: + Changed |= shortenOn01(MI, SystemZ::LPDFR_32); + break; + case SystemZ::WFSQDB: Changed |= shortenOn01(MI, SystemZ::SQDBR); break; + case SystemZ::WFSQSB: + Changed |= shortenOn01(MI, SystemZ::SQEBR); + break; + case SystemZ::WFSDB: Changed |= shortenOn001AddCC(MI, SystemZ::SDBR); break; + case SystemZ::WFSSB: + Changed |= shortenOn001AddCC(MI, SystemZ::SEBR); + break; + case SystemZ::WFCDB: Changed |= shortenOn01(MI, SystemZ::CDBR); break; + case SystemZ::WFCSB: + Changed |= shortenOn01(MI, SystemZ::CEBR); + break; + case SystemZ::VL32: // For z13 we prefer LDE over LE to avoid partial register dependencies. Changed |= shortenOn0(MI, SystemZ::LDE32); diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 9ac768b2189..506dc742799 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -372,6 +372,9 @@ int SystemZTTIImpl::getArithmeticInstrCost( Opcode == Instruction::FMul || Opcode == Instruction::FDiv) { switch (ScalarBits) { case 32: { + // The vector enhancements facility 1 provides v4f32 instructions. + if (ST->hasVectorEnhancements1()) + return NumVectors; // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); diff --git a/test/Analysis/CostModel/SystemZ/fp-arith.ll b/test/Analysis/CostModel/SystemZ/fp-arith.ll index 08a7c291138..5f92db1abab 100644 --- a/test/Analysis/CostModel/SystemZ/fp-arith.ll +++ b/test/Analysis/CostModel/SystemZ/fp-arith.ll @@ -1,4 +1,7 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-Z13 %s +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-Z14 %s ; ; Note: The scalarized vector instructions cost is not including any ; extracts, due to the undef operands @@ -21,13 +24,17 @@ define void @fadd() { ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fadd float undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fadd double undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fadd fp128 undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fadd <2 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fadd <2 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fadd <2 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fadd <2 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fadd <4 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fadd <4 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fadd <4 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fadd <4 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fadd <8 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fadd <8 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fadd <8 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fadd <8 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fadd <16 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fadd <16 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fadd <16 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fadd <16 x double> undef, undef ret void; @@ -49,13 +56,17 @@ define void @fsub() { ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fsub float undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fsub double undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fsub fp128 undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fsub <2 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fsub <2 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fsub <2 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fsub <2 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fsub <4 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fsub <4 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fsub <4 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fsub <4 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fsub <8 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fsub <8 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fsub <8 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fsub <8 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fsub <16 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fsub <16 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fsub <16 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fsub <16 x double> undef, undef ret void; @@ -77,13 +88,17 @@ define void @fmul() { ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fmul float undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fmul double undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fmul fp128 undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fmul <2 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fmul <2 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fmul <2 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fmul <2 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fmul <4 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fmul <4 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fmul <4 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fmul <4 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fmul <8 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fmul <8 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fmul <8 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fmul <8 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fmul <16 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fmul <16 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fmul <16 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fmul <16 x double> undef, undef ret void; @@ -105,13 +120,17 @@ define void @fdiv() { ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fdiv float undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fdiv double undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fdiv fp128 undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fdiv <2 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fdiv <2 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fdiv <2 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fdiv <2 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fdiv <4 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fdiv <4 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fdiv <4 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fdiv <4 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fdiv <8 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fdiv <8 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fdiv <8 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fdiv <8 x double> undef, undef -; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fdiv <16 x float> undef, undef +; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fdiv <16 x float> undef, undef +; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fdiv <16 x float> undef, undef ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fdiv <16 x double> undef, undef ret void; diff --git a/test/CodeGen/SystemZ/fp-abs-03.ll b/test/CodeGen/SystemZ/fp-abs-03.ll new file mode 100644 index 00000000000..ccb69642a2c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-abs-03.ll @@ -0,0 +1,40 @@ +; Test floating-point absolute on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f32. +declare float @llvm.fabs.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: lpdfr %f0, %f0 +; CHECK: br %r14 + %res = call float @llvm.fabs.f32(float %f) + ret float %res +} + +; Test f64. +declare double @llvm.fabs.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: lpdfr %f0, %f0 +; CHECK: br %r14 + %res = call double @llvm.fabs.f64(double %f) + ret double %res +} + +; Test f128. With the loads and stores, a pure absolute would probably +; be better implemented using an NI on the upper byte. Do some extra +; processing so that using FPRs is unequivocally better. +declare fp128 @llvm.fabs.f128(fp128 %f) +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: lpxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 , fp128 *%ptr + %abs = call fp128 @llvm.fabs.f128(fp128 %orig) + %op2 = load fp128 , fp128 *%ptr2 + %res = fdiv fp128 %abs, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-abs-04.ll b/test/CodeGen/SystemZ/fp-abs-04.ll new file mode 100644 index 00000000000..59064795b98 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-abs-04.ll @@ -0,0 +1,43 @@ +; Test negated floating-point absolute on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f32. +declare float @llvm.fabs.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: lndfr %f0, %f0 +; CHECK: br %r14 + %abs = call float @llvm.fabs.f32(float %f) + %res = fsub float -0.0, %abs + ret float %res +} + +; Test f64. +declare double @llvm.fabs.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: lndfr %f0, %f0 +; CHECK: br %r14 + %abs = call double @llvm.fabs.f64(double %f) + %res = fsub double -0.0, %abs + ret double %res +} + +; Test f128. With the loads and stores, a pure negative-absolute would +; probably be better implemented using an OI on the upper byte. Do some +; extra processing so that using FPRs is unequivocally better. +declare fp128 @llvm.fabs.f128(fp128 %f) +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: lnxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 , fp128 *%ptr + %abs = call fp128 @llvm.fabs.f128(fp128 %orig) + %negabs = fsub fp128 0xL00000000000000008000000000000000, %abs + %op2 = load fp128 , fp128 *%ptr2 + %res = fdiv fp128 %negabs, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll index 5b0ed0513a3..219607d628d 100644 --- a/test/CodeGen/SystemZ/fp-add-01.ll +++ b/test/CodeGen/SystemZ/fp-add-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit floating-point addition. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: aeb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll index 075c7aa3dd8..146b16bc695 100644 --- a/test/CodeGen/SystemZ/fp-cmp-01.ll +++ b/test/CodeGen/SystemZ/fp-cmp-01.ll @@ -1,7 +1,10 @@ ; Test 32-bit floating-point comparison. The tests assume a z10 implementation ; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare float @foo() @@ -9,8 +12,9 @@ declare float @foo() define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { ; CHECK-LABEL: f1: ; CHECK: cebr %f0, %f2 -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq float %f1, %f2 %res = select i1 %cond, i64 %a, i64 %b @@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { ; CHECK-LABEL: f2: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %f2 = load float , float *%ptr %cond = fcmp oeq float %f1, %f2 @@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) { ; CHECK-LABEL: f3: ; CHECK: ceb %f0, 4092(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1023 %f2 = load float , float *%ptr @@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) { ; CHECK-LABEL: f4: ; CHECK: aghi %r4, 4096 ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1024 %f2 = load float , float *%ptr @@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) { ; CHECK-LABEL: f5: ; CHECK: aghi %r4, -4 ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 -1 %f2 = load float , float *%ptr @@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r5, 2 ; CHECK: ceb %f0, 400(%r1,%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr1 = getelementptr float, float *%base, i64 %index %ptr2 = getelementptr float, float *%ptr1, i64 100 @@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK-SCALAR: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 @@ -153,8 +162,9 @@ define float @f7(float *%ptr0) { define i64 @f8(i64 %a, i64 %b, float %f) { ; CHECK-LABEL: f8: ; CHECK: ltebr %f0, %f0 -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq float %f, 0.0 %res = select i1 %cond, i64 %a, i64 %b @@ -166,8 +176,9 @@ define i64 @f8(i64 %a, i64 %b, float %f) { define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f9: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp oeq float %f1, %f2 @@ -179,8 +190,9 @@ define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f10: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: blhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: blhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp one float %f1, %f2 @@ -192,8 +204,9 @@ define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f11: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp olt float %f1, %f2 @@ -205,8 +218,9 @@ define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f12: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bher %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ole float %f1, %f2 @@ -218,8 +232,9 @@ define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f13: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bler %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnle %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp oge float %f1, %f2 @@ -231,8 +246,9 @@ define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f14: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: blr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ogt float %f1, %f2 @@ -244,8 +260,9 @@ define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f15: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnlhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnlhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrlh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ueq float %f1, %f2 @@ -257,8 +274,9 @@ define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f16: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bner %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bner %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgre %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp une float %f1, %f2 @@ -270,8 +288,9 @@ define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f17: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnler %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrle %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ult float %f1, %f2 @@ -283,8 +302,9 @@ define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f18: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnlr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnlr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrl %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ule float %f1, %f2 @@ -296,8 +316,9 @@ define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f19: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp uge float %f1, %f2 @@ -309,8 +330,9 @@ define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f20: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnher %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrhe %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ugt float %f1, %f2 diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll index 0791e8db93f..ee514dc474e 100644 --- a/test/CodeGen/SystemZ/fp-div-01.ll +++ b/test/CodeGen/SystemZ/fp-div-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit floating-point division. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: deb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll index 3b72d25e0b5..126567b218a 100644 --- a/test/CodeGen/SystemZ/fp-mul-01.ll +++ b/test/CodeGen/SystemZ/fp-mul-01.ll @@ -1,6 +1,8 @@ ; Test multiplication of two f32s, producing an f32 result. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: meeb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll index 896fafecbda..581e44eeaa2 100644 --- a/test/CodeGen/SystemZ/fp-mul-06.ll +++ b/test/CodeGen/SystemZ/fp-mul-06.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define float @f1(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f1: -; CHECK: maebr %f4, %f0, %f2 -; CHECK: ler %f0, %f4 +; CHECK-SCALAR: maebr %f4, %f0, %f2 +; CHECK-SCALAR: ler %f0, %f4 +; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) ret float %res @@ -14,7 +18,8 @@ define float @f1(float %f1, float %f2, float %acc) { define float @f2(float %f1, float *%ptr, float %acc) { ; CHECK-LABEL: f2: ; CHECK: maeb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %f2 = load float , float *%ptr %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) @@ -24,7 +29,8 @@ define float @f2(float %f1, float *%ptr, float %acc) { define float @f3(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f3: ; CHECK: maeb %f2, %f0, 4092(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1023 %f2 = load float , float *%ptr @@ -39,7 +45,8 @@ define float @f4(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: maeb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1024 %f2 = load float , float *%ptr @@ -54,7 +61,8 @@ define float @f5(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: maeb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 -1 %f2 = load float , float *%ptr @@ -66,7 +74,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: maeb %f2, %f0, 0(%r1,%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 %index %f2 = load float , float *%ptr @@ -78,7 +87,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 2 ; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1023 %ptr = getelementptr float, float *%base, i64 %index2 @@ -92,7 +102,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) { ; CHECK: sllg %r1, %r3, 2 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: maeb %f2, %f0, 0(%r1) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1024 %ptr = getelementptr float, float *%base, i64 %index2 diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll index 5e5538bfacc..5b1f9b96c08 100644 --- a/test/CodeGen/SystemZ/fp-mul-08.ll +++ b/test/CodeGen/SystemZ/fp-mul-08.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define float @f1(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f1: -; CHECK: msebr %f4, %f0, %f2 -; CHECK: ler %f0, %f4 +; CHECK-SCALAR: msebr %f4, %f0, %f2 +; CHECK-SCALAR: ler %f0, %f4 +; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %negacc = fsub float -0.0, %acc %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) @@ -15,7 +19,8 @@ define float @f1(float %f1, float %f2, float %acc) { define float @f2(float %f1, float *%ptr, float %acc) { ; CHECK-LABEL: f2: ; CHECK: mseb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %f2 = load float , float *%ptr %negacc = fsub float -0.0, %acc @@ -26,7 +31,8 @@ define float @f2(float %f1, float *%ptr, float %acc) { define float @f3(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f3: ; CHECK: mseb %f2, %f0, 4092(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1023 %f2 = load float , float *%ptr @@ -42,7 +48,8 @@ define float @f4(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: mseb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1024 %f2 = load float , float *%ptr @@ -58,7 +65,8 @@ define float @f5(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: mseb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 -1 %f2 = load float , float *%ptr @@ -71,7 +79,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mseb %f2, %f0, 0(%r1,%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 %index %f2 = load float , float *%ptr @@ -84,7 +93,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1023 %ptr = getelementptr float, float *%base, i64 %index2 @@ -99,7 +109,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) { ; CHECK: sllg %r1, %r3, 2 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: mseb %f2, %f0, 0(%r1) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1024 %ptr = getelementptr float, float *%base, i64 %index2 diff --git a/test/CodeGen/SystemZ/fp-mul-10.ll b/test/CodeGen/SystemZ/fp-mul-10.ll index 977e5c60e3a..c23a6a202ad 100644 --- a/test/CodeGen/SystemZ/fp-mul-10.ll +++ b/test/CodeGen/SystemZ/fp-mul-10.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare double @llvm.fma.f64(double %f1, double %f2, double %f3) +declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: @@ -21,3 +22,22 @@ define double @f2(double %f1, double %f2, double %acc) { ret double %negres } +define float @f3(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f3: +; CHECK: wfnmasb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + %negres = fsub float -0.0, %res + ret float %negres +} + +define float @f4(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f4: +; CHECK: wfnmssb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + %negres = fsub float -0.0, %res + ret float %negres +} + diff --git a/test/CodeGen/SystemZ/fp-neg-02.ll b/test/CodeGen/SystemZ/fp-neg-02.ll new file mode 100644 index 00000000000..c904d19947b --- /dev/null +++ b/test/CodeGen/SystemZ/fp-neg-02.ll @@ -0,0 +1,38 @@ +; Test floating-point negation on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f32. +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: lcdfr %f0, %f0 +; CHECK: br %r14 + %res = fsub float -0.0, %f + ret float %res +} + +; Test f64. +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: lcdfr %f0, %f0 +; CHECK: br %r14 + %res = fsub double -0.0, %f + ret double %res +} + +; Test f128. With the loads and stores, a pure negation would probably +; be better implemented using an XI on the upper byte. Do some extra +; processing so that using FPRs is unequivocally better. +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: lcxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 , fp128 *%ptr + %negzero = fpext float -0.0 to fp128 + %neg = fsub fp128 0xL00000000000000008000000000000000, %orig + %op2 = load fp128 , fp128 *%ptr2 + %res = fdiv fp128 %neg, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-round-03.ll b/test/CodeGen/SystemZ/fp-round-03.ll new file mode 100644 index 00000000000..1a8296357bc --- /dev/null +++ b/test/CodeGen/SystemZ/fp-round-03.ll @@ -0,0 +1,195 @@ +; Test rounding functions for z14 and above. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test rint for f32. +declare float @llvm.rint.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: fiebra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call float @llvm.rint.f32(float %f) + ret float %res +} + +; Test rint for f64. +declare double @llvm.rint.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: fidbra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call double @llvm.rint.f64(double %f) + ret double %res +} + +; Test rint for f128. +declare fp128 @llvm.rint.f128(fp128 %f) +define void @f3(fp128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: fixbr %f0, 0, %f0 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.rint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test nearbyint for f32. +declare float @llvm.nearbyint.f32(float %f) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: fiebra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.nearbyint.f32(float %f) + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.nearbyint.f64(double %f) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: fidbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.nearbyint.f64(double %f) + ret double %res +} + +; Test nearbyint for f128. +declare fp128 @llvm.nearbyint.f128(fp128 %f) +define void @f6(fp128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: fixbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.nearbyint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test floor for f32. +declare float @llvm.floor.f32(float %f) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: fiebra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.floor.f32(float %f) + ret float %res +} + +; Test floor for f64. +declare double @llvm.floor.f64(double %f) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: fidbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.floor.f64(double %f) + ret double %res +} + +; Test floor for f128. +declare fp128 @llvm.floor.f128(fp128 %f) +define void @f9(fp128 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: fixbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.floor.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test ceil for f32. +declare float @llvm.ceil.f32(float %f) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: fiebra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.ceil.f32(float %f) + ret float %res +} + +; Test ceil for f64. +declare double @llvm.ceil.f64(double %f) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: fidbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.ceil.f64(double %f) + ret double %res +} + +; Test ceil for f128. +declare fp128 @llvm.ceil.f128(fp128 %f) +define void @f12(fp128 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: fixbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.ceil.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test trunc for f32. +declare float @llvm.trunc.f32(float %f) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.trunc.f32(float %f) + ret float %res +} + +; Test trunc for f64. +declare double @llvm.trunc.f64(double %f) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: fidbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.trunc.f64(double %f) + ret double %res +} + +; Test trunc for f128. +declare fp128 @llvm.trunc.f128(fp128 %f) +define void @f15(fp128 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: fixbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.trunc.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test round for f32. +declare float @llvm.round.f32(float %f) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.round.f32(float %f) + ret float %res +} + +; Test round for f64. +declare double @llvm.round.f64(double %f) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: fidbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.round.f64(double %f) + ret double %res +} + +; Test round for f128. +declare fp128 @llvm.round.f128(fp128 %f) +define void @f18(fp128 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: fixbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.round.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll index 3680207e7f2..85a46bc2d7f 100644 --- a/test/CodeGen/SystemZ/fp-sqrt-01.ll +++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit square root. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @llvm.sqrt.f32(float) declare float @sqrtf(float) @@ -77,7 +79,7 @@ define float @f6(float *%base, i64 %index) { ; to use SQEB if possible. define void @f7(float *%ptr) { ; CHECK-LABEL: f7: -; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK-SCALAR: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK: br %r14 %val0 = load volatile float , float *%ptr %val1 = load volatile float , float *%ptr @@ -160,7 +162,7 @@ define float @f8(float %dummy, float %val) { ; CHECK: sqebr %f0, %f2 ; CHECK: cebr %f0, %f0 ; CHECK: bnor %r14 -; CHECK: ler %f0, %f2 +; CHECK: {{ler|ldr}} %f0, %f2 ; CHECK: jg sqrtf@PLT %res = tail call float @sqrtf(float %val) ret float %res diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll index f4185ca3108..41f72e1810e 100644 --- a/test/CodeGen/SystemZ/fp-sub-01.ll +++ b/test/CodeGen/SystemZ/fp-sub-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit floating-point subtraction. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: seb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/vec-abs-06.ll b/test/CodeGen/SystemZ/vec-abs-06.ll new file mode 100644 index 00000000000..8eee1d9d250 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-abs-06.ll @@ -0,0 +1,47 @@ +; Test f32 and v4f32 absolute on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.fabs.f32(float) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) + +; Test a plain absolute. +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vflpsb %v24, %v24 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.fabs.v4f32(<4 x float> %val) + ret <4 x float> %ret +} + +; Test a negative absolute. +define <4 x float> @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: vflnsb %v24, %v24 +; CHECK: br %r14 + %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %val) + %ret = fsub <4 x float> , %abs + ret <4 x float> %ret +} + +; Test an f32 absolute that uses vector registers. +define float @f3(<4 x float> %val) { +; CHECK-LABEL: f3: +; CHECK: wflpsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = call float @llvm.fabs.f32(float %scalar) + ret float %ret +} + +; Test an f32 negative absolute that uses vector registers. +define float @f4(<4 x float> %val) { +; CHECK-LABEL: f4: +; CHECK: wflnsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %abs = call float @llvm.fabs.f32(float %scalar) + %ret = fsub float -0.0, %abs + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-add-02.ll b/test/CodeGen/SystemZ/vec-add-02.ll new file mode 100644 index 00000000000..97a9b84a063 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-add-02.ll @@ -0,0 +1,24 @@ +; Test vector addition on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 addition. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfasb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fadd <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 addition that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfasb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fadd float %scalar1, %scalar2 + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-cmp-07.ll b/test/CodeGen/SystemZ/vec-cmp-07.ll new file mode 100644 index 00000000000..f272ba4bd75 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-cmp-07.ll @@ -0,0 +1,349 @@ +; Test f32 and v4f32 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfcesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp oeq <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp one <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f3: +; CHECK: vfchsb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp ogt <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f4: +; CHECK: vfchesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp oge <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfchesb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = fcmp ole <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfchsb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = fcmp olt <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ueq <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f8: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp une <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f9: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ugt <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f10: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uge <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f11: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ule <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f12: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ult <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ord <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uno <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f15: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp oeq <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp one <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f17: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ogt <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f18: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp oge <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f19: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ole <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f20: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp olt <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ueq <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f22: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp une <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f23: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ugt <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f24: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uge <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f25: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ule <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f26: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ult <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ord <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uno <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test an f32 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) { +; CHECK-LABEL: f29: +; CHECK: wfcsb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <4 x float> %vec, i32 0 + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/vec-div-02.ll b/test/CodeGen/SystemZ/vec-div-02.ll new file mode 100644 index 00000000000..74e3b5148ad --- /dev/null +++ b/test/CodeGen/SystemZ/vec-div-02.ll @@ -0,0 +1,24 @@ +; Test vector division on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 division. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfdsb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fdiv <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 division that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfdsb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fdiv float %scalar1, %scalar2 + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-intrinsics-02.ll b/test/CodeGen/SystemZ/vec-intrinsics-02.ll index 27ee83fc774..84c6a078403 100644 --- a/test/CodeGen/SystemZ/vec-intrinsics-02.ll +++ b/test/CodeGen/SystemZ/vec-intrinsics-02.ll @@ -6,8 +6,17 @@ declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>) declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32) declare <16 x i8> @llvm.s390.vlrl(i32, i8 *) declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *) + +declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>) +declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>) +declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>) +declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32) +declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32) + declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32) declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32) +declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32) +declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32) ; VBPERM. define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) { @@ -192,6 +201,208 @@ define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) { ret void } +; VFCESBS with no processing of the result. +define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfcesbs: +; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFCESBS, returning 1 if any elements are equal (CC != 3). +define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfcesbs_any_bool: +; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -536870912 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp ne i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCESBS, storing to %ptr if any elements are equal. +define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfcesbs_any_store: +; CHECK-NOT: %r +; CHECK: vfcesbs %v24, %v24, %v26 +; CHECK-NEXT: {{bor|bnler}} %r14 +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp ule i32 %cc, 2 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VFCHSBS with no processing of the result. +define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchsbs: +; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFCHSBS, returning 1 if not all elements are higher. +define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchsbs_notall_bool: +; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 36 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp sge i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCHSBS, storing to %ptr if not all elements are higher. +define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfchsbs_notall_store: +; CHECK-NOT: %r +; CHECK: vfchsbs %v24, %v24, %v26 +; CHECK-NEXT: {{bher|ber}} %r14 +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VFCHESBS with no processing of the result. +define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchesbs: +; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFCHESBS, returning 1 if neither element is higher or equal. +define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchesbs_none_bool: +; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 35 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCHESBS, storing to %ptr if neither element is higher or equal. +define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfchesbs_none_store: +; CHECK-NOT: %r +; CHECK: vfchesbs %v24, %v24, %v26 +; CHECK-NEXT: {{bnor|bler}} %r14 +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp uge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VFTCISB with the lowest useful class selector and no processing of the result. +define i32 @test_vftcisb(<4 x float> %a) { +; CHECK-LABEL: test_vftcisb: +; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFTCISB with the highest useful class selector, returning 1 if all elements +; have the right class (CC == 0). +define i32 @test_vftcisb_all_bool(<4 x float> %a) { +; CHECK-LABEL: test_vftcisb_all_bool: +; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094 +; CHECK: afi %r2, -268435456 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 0 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFISB with a rounding mode not usable via standard intrinsics. +define <4 x float> @test_vfisb_0_4(<4 x float> %a) { +; CHECK-LABEL: test_vfisb_0_4: +; CHECK: vfisb %v24, %v24, 0, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4) + ret <4 x float> %res +} + +; VFISB with IEEE-inexact exception suppressed. +define <4 x float> @test_vfisb_4_0(<4 x float> %a) { +; CHECK-LABEL: test_vfisb_4_0: +; CHECK: vfisb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0) + ret <4 x float> %res +} + ; VFMAXDB. define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vfmaxdb: @@ -210,3 +421,21 @@ define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) { ret <2 x double> %res } +; VFMAXSB. +define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfmaxsb: +; CHECK: vfmaxsb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4) + ret <4 x float> %res +} + +; VFMINSB. +define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfminsb: +; CHECK: vfminsb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4) + ret <4 x float> %res +} + diff --git a/test/CodeGen/SystemZ/vec-max-05.ll b/test/CodeGen/SystemZ/vec-max-05.ll index 44efac76423..47400b8c66b 100644 --- a/test/CodeGen/SystemZ/vec-max-05.ll +++ b/test/CodeGen/SystemZ/vec-max-05.ll @@ -6,6 +6,10 @@ declare double @fmax(double, double) declare double @llvm.maxnum.f64(double, double) declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) +declare float @fmaxf(float, float) +declare float @llvm.maxnum.f32(float, float) +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) + ; Test the fmax library function. define double @f1(double %dummy, double %val1, double %val2) { ; CHECK-LABEL: f1: @@ -56,3 +60,53 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the fmaxf library function. +define float @f11(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f11: +; CHECK: wfmaxsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @fmaxf(float %val1, float %val2) readnone + ret float %ret +} + +; Test the f32 maxnum intrinsic. +define float @f12(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f12: +; CHECK: wfmaxsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @llvm.maxnum.f32(float %val1, float %val2) + ret float %ret +} + +; Test a f32 constant compare/select resulting in maxnum. +define float @f13(float %dummy, float %val) { +; CHECK-LABEL: f13: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfmaxsb %f0, %f2, [[REG]], 4 +; CHECK: br %r14 + %cmp = fcmp ogt float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test a f32 constant compare/select resulting in maxnan. +define float @f14(float %dummy, float %val) { +; CHECK-LABEL: f14: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfmaxsb %f0, %f2, [[REG]], 1 +; CHECK: br %r14 + %cmp = fcmp ugt float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test the v4f32 maxnum intrinsic. +define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f15: +; CHECK: vfmaxsb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %val1, <4 x float> %val2) + ret <4 x float> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-min-05.ll b/test/CodeGen/SystemZ/vec-min-05.ll index c2d8726addf..b84ea6b6b4f 100644 --- a/test/CodeGen/SystemZ/vec-min-05.ll +++ b/test/CodeGen/SystemZ/vec-min-05.ll @@ -6,6 +6,10 @@ declare double @fmin(double, double) declare double @llvm.minnum.f64(double, double) declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) +declare float @fminf(float, float) +declare float @llvm.minnum.f32(float, float) +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) + ; Test the fmin library function. define double @f1(double %dummy, double %val1, double %val2) { ; CHECK-LABEL: f1: @@ -56,3 +60,53 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the fminf library function. +define float @f11(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f11: +; CHECK: wfminsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @fminf(float %val1, float %val2) readnone + ret float %ret +} + +; Test the f32 minnum intrinsic. +define float @f12(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f12: +; CHECK: wfminsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @llvm.minnum.f32(float %val1, float %val2) + ret float %ret +} + +; Test a f32 constant compare/select resulting in minnum. +define float @f13(float %dummy, float %val) { +; CHECK-LABEL: f13: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfminsb %f0, %f2, [[REG]], 4 +; CHECK: br %r14 + %cmp = fcmp olt float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test a f32 constant compare/select resulting in minnan. +define float @f14(float %dummy, float %val) { +; CHECK-LABEL: f14: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfminsb %f0, %f2, [[REG]], 1 +; CHECK: br %r14 + %cmp = fcmp ult float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test the v4f32 minnum intrinsic. +define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f15: +; CHECK: vfminsb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.minnum.v4f32(<4 x float> %val1, <4 x float> %val2) + ret <4 x float> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-mul-03.ll b/test/CodeGen/SystemZ/vec-mul-03.ll new file mode 100644 index 00000000000..3733db9fb33 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-mul-03.ll @@ -0,0 +1,24 @@ +; Test vector multiplication on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 multiplication. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfmsb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fmul <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 multiplication that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfmsb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fmul float %scalar1, %scalar2 + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-mul-04.ll b/test/CodeGen/SystemZ/vec-mul-04.ll new file mode 100644 index 00000000000..d96f0b6a745 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-mul-04.ll @@ -0,0 +1,31 @@ +; Test vector multiply-and-add on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) + +; Test a v4f32 multiply-and-add. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f1: +; CHECK: vfmasb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %val3) + ret <4 x float> %ret +} + +; Test a v4f32 multiply-and-subtract. +define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f2: +; CHECK: vfmssb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <4 x float> , %val3 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %negval3) + ret <4 x float> %ret +} diff --git a/test/CodeGen/SystemZ/vec-mul-05.ll b/test/CodeGen/SystemZ/vec-mul-05.ll index c05437d4923..90a1f7a7efd 100644 --- a/test/CodeGen/SystemZ/vec-mul-05.ll +++ b/test/CodeGen/SystemZ/vec-mul-05.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) ; Test a v2f64 negative multiply-and-add. define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1, @@ -30,3 +31,33 @@ define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, %negret = fsub <2 x double> , %ret ret <2 x double> %negret } + +; Test a v4f32 negative multiply-and-add. +define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f3: +; CHECK: vfnmasb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %val3) + %negret = fsub <4 x float> , %ret + ret <4 x float> %negret +} + +; Test a v4f32 negative multiply-and-subtract. +define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f4: +; CHECK: vfnmssb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <4 x float> , %val3 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %negval3) + %negret = fsub <4 x float> , %ret + ret <4 x float> %negret +} diff --git a/test/CodeGen/SystemZ/vec-neg-02.ll b/test/CodeGen/SystemZ/vec-neg-02.ll new file mode 100644 index 00000000000..07ce037542f --- /dev/null +++ b/test/CodeGen/SystemZ/vec-neg-02.ll @@ -0,0 +1,23 @@ +; Test vector negation on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 negation. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vflcsb %v24, %v26 +; CHECK: br %r14 + %ret = fsub <4 x float> , %val + ret <4 x float> %ret +} + +; Test an f32 negation that uses vector registers. +define float @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: wflcsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = fsub float -0.0, %scalar + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-round-02.ll b/test/CodeGen/SystemZ/vec-round-02.ll new file mode 100644 index 00000000000..bcd66ea803d --- /dev/null +++ b/test/CodeGen/SystemZ/vec-round-02.ll @@ -0,0 +1,118 @@ +; Test v4f32 rounding on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.rint.f32(float) +declare float @llvm.nearbyint.f32(float) +declare float @llvm.floor.f32(float) +declare float @llvm.ceil.f32(float) +declare float @llvm.trunc.f32(float) +declare float @llvm.round.f32(float) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <4 x float> @llvm.round.v4f32(<4 x float>) + +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vfisb %v24, %v24, 0, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: vfisb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f3(<4 x float> %val) { +; CHECK-LABEL: f3: +; CHECK: vfisb %v24, %v24, 4, 7 +; CHECK: br %r14 + %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f4(<4 x float> %val) { +; CHECK-LABEL: f4: +; CHECK: vfisb %v24, %v24, 4, 6 +; CHECK: br %r14 + %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f5(<4 x float> %val) { +; CHECK-LABEL: f5: +; CHECK: vfisb %v24, %v24, 4, 5 +; CHECK: br %r14 + %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f6(<4 x float> %val) { +; CHECK-LABEL: f6: +; CHECK: vfisb %v24, %v24, 4, 1 +; CHECK: br %r14 + %res = call <4 x float> @llvm.round.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define float @f7(<4 x float> %val) { +; CHECK-LABEL: f7: +; CHECK: wfisb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.rint.f32(float %scalar) + ret float %res +} + +define float @f8(<4 x float> %val) { +; CHECK-LABEL: f8: +; CHECK: wfisb %f0, %v24, 4, 0 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.nearbyint.f32(float %scalar) + ret float %res +} + +define float @f9(<4 x float> %val) { +; CHECK-LABEL: f9: +; CHECK: wfisb %f0, %v24, 4, 7 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.floor.f32(float %scalar) + ret float %res +} + +define float @f10(<4 x float> %val) { +; CHECK-LABEL: f10: +; CHECK: wfisb %f0, %v24, 4, 6 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.ceil.f32(float %scalar) + ret float %res +} + +define float @f11(<4 x float> %val) { +; CHECK-LABEL: f11: +; CHECK: wfisb %f0, %v24, 4, 5 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.trunc.f32(float %scalar) + ret float %res +} + +define float @f12(<4 x float> %val) { +; CHECK-LABEL: f12: +; CHECK: wfisb %f0, %v24, 4, 1 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.round.f32(float %scalar) + ret float %res +} diff --git a/test/CodeGen/SystemZ/vec-sqrt-02.ll b/test/CodeGen/SystemZ/vec-sqrt-02.ll new file mode 100644 index 00000000000..6970d9db669 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-sqrt-02.ll @@ -0,0 +1,23 @@ +; Test f32 and v4f32 square root on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) + +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vfsqsb %v24, %v24 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %val) + ret <4 x float> %ret +} + +define float @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: wfsqsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = call float @llvm.sqrt.f32(float %scalar) + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-sub-02.ll b/test/CodeGen/SystemZ/vec-sub-02.ll new file mode 100644 index 00000000000..83c76b5d4aa --- /dev/null +++ b/test/CodeGen/SystemZ/vec-sub-02.ll @@ -0,0 +1,31 @@ +; Test vector subtraction on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 subtraction. +define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfssb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fsub <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 subtraction that uses vector registers. +define float @f7(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f7: +; CHECK: wfssb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fsub float %scalar1, %scalar2 + ret float %ret +} + +; Test a v2f32 subtraction, which gets promoted to v4f32. +define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) { +; No particular output expected, but must compile. + %ret = fsub <2 x float> %val1, %val2 + ret <2 x float> %ret +} diff --git a/test/MC/Disassembler/SystemZ/insns-z14.txt b/test/MC/Disassembler/SystemZ/insns-z14.txt index 07d2f233f20..c6b55fd5a26 100644 --- a/test/MC/Disassembler/SystemZ/insns-z14.txt +++ b/test/MC/Disassembler/SystemZ/insns-z14.txt @@ -629,6 +629,147 @@ # CHECK: vdp %v13, %v17, %v21, 121, 11 0xe6 0xd1 0x50 0xb7 0x96 0x7a +# CHECK: vfasb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xe3 + +# CHECK: vfasb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xe3 + +# CHECK: vfasb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xe3 + +# CHECK: vfasb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xe3 + +# CHECK: vfasb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xe3 + +# CHECK: vfcesb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xe8 + +# CHECK: vfcesb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xe8 + +# CHECK: vfcesb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xe8 + +# CHECK: vfcesb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xe8 + +# CHECK: vfcesb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xe8 + +# CHECK: vfcesbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x10 0x20 0xe8 + +# CHECK: vfcesbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x10 0x22 0xe8 + +# CHECK: vfcesbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x10 0x24 0xe8 + +# CHECK: vfcesbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x10 0x28 0xe8 + +# CHECK: vfcesbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x10 0x2a 0xe8 + +# CHECK: vfchsb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xeb + +# CHECK: vfchsb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xeb + +# CHECK: vfchsb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xeb + +# CHECK: vfchsb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xeb + +# CHECK: vfchsb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xeb + +# CHECK: vfchsbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x10 0x20 0xeb + +# CHECK: vfchsbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x10 0x22 0xeb + +# CHECK: vfchsbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x10 0x24 0xeb + +# CHECK: vfchsbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x10 0x28 0xeb + +# CHECK: vfchsbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x10 0x2a 0xeb + +# CHECK: vfchesb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xea + +# CHECK: vfchesb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xea + +# CHECK: vfchesb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xea + +# CHECK: vfchesb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xea + +# CHECK: vfchesb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xea + +# CHECK: vfchesbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x10 0x20 0xea + +# CHECK: vfchesbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x10 0x22 0xea + +# CHECK: vfchesbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x10 0x24 0xea + +# CHECK: vfchesbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x10 0x28 0xea + +# CHECK: vfchesbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x10 0x2a 0xea + +# CHECK: vfdsb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xe5 + +# CHECK: vfdsb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xe5 + +# CHECK: vfdsb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xe5 + +# CHECK: vfdsb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xe5 + +# CHECK: vfdsb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xe5 + +# CHECK: vfisb %v0, %v0, 0, 0 +0xe7 0x00 0x00 0x00 0x20 0xc7 + +# CHECK: vfisb %v0, %v0, 0, 15 +0xe7 0x00 0x00 0xf0 0x20 0xc7 + +# CHECK: vfisb %v0, %v0, 4, 0 +0xe7 0x00 0x00 0x04 0x20 0xc7 + +# CHECK: vfisb %v0, %v0, 7, 0 +0xe7 0x00 0x00 0x07 0x20 0xc7 + +# CHECK: vfisb %v0, %v31, 0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xc7 + +# CHECK: vfisb %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xc7 + +# CHECK: vfisb %v14, %v17, 4, 10 +0xe7 0xe1 0x00 0xa4 0x24 0xc7 + # CHECK: vfkedb %v0, %v0, %v0 0xe7 0x00 0x00 0x04 0x30 0xe8 @@ -659,6 +800,36 @@ # CHECK: vfkedbs %v18, %v3, %v20 0xe7 0x23 0x40 0x14 0x3a 0xe8 +# CHECK: vfkesb %v0, %v0, %v0 +0xe7 0x00 0x00 0x04 0x20 0xe8 + +# CHECK: vfkesb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x04 0x22 0xe8 + +# CHECK: vfkesb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x04 0x24 0xe8 + +# CHECK: vfkesb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x04 0x28 0xe8 + +# CHECK: vfkesb %v18, %v3, %v20 +0xe7 0x23 0x40 0x04 0x2a 0xe8 + +# CHECK: vfkesbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x14 0x20 0xe8 + +# CHECK: vfkesbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x14 0x22 0xe8 + +# CHECK: vfkesbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x14 0x24 0xe8 + +# CHECK: vfkesbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x14 0x28 0xe8 + +# CHECK: vfkesbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x14 0x2a 0xe8 + # CHECK: vfkhdb %v0, %v0, %v0 0xe7 0x00 0x00 0x04 0x30 0xeb @@ -689,6 +860,36 @@ # CHECK: vfkhdbs %v18, %v3, %v20 0xe7 0x23 0x40 0x14 0x3a 0xeb +# CHECK: vfkhsb %v0, %v0, %v0 +0xe7 0x00 0x00 0x04 0x20 0xeb + +# CHECK: vfkhsb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x04 0x22 0xeb + +# CHECK: vfkhsb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x04 0x24 0xeb + +# CHECK: vfkhsb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x04 0x28 0xeb + +# CHECK: vfkhsb %v18, %v3, %v20 +0xe7 0x23 0x40 0x04 0x2a 0xeb + +# CHECK: vfkhsbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x14 0x20 0xeb + +# CHECK: vfkhsbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x14 0x22 0xeb + +# CHECK: vfkhsbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x14 0x24 0xeb + +# CHECK: vfkhsbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x14 0x28 0xeb + +# CHECK: vfkhsbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x14 0x2a 0xeb + # CHECK: vfkhedb %v0, %v0, %v0 0xe7 0x00 0x00 0x04 0x30 0xea @@ -719,6 +920,111 @@ # CHECK: vfkhedbs %v18, %v3, %v20 0xe7 0x23 0x40 0x14 0x3a 0xea +# CHECK: vfkhesb %v0, %v0, %v0 +0xe7 0x00 0x00 0x04 0x20 0xea + +# CHECK: vfkhesb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x04 0x22 0xea + +# CHECK: vfkhesb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x04 0x24 0xea + +# CHECK: vfkhesb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x04 0x28 0xea + +# CHECK: vfkhesb %v18, %v3, %v20 +0xe7 0x23 0x40 0x04 0x2a 0xea + +# CHECK: vfkhesbs %v0, %v0, %v0 +0xe7 0x00 0x00 0x14 0x20 0xea + +# CHECK: vfkhesbs %v0, %v0, %v31 +0xe7 0x00 0xf0 0x14 0x22 0xea + +# CHECK: vfkhesbs %v0, %v31, %v0 +0xe7 0x0f 0x00 0x14 0x24 0xea + +# CHECK: vfkhesbs %v31, %v0, %v0 +0xe7 0xf0 0x00 0x14 0x28 0xea + +# CHECK: vfkhesbs %v18, %v3, %v20 +0xe7 0x23 0x40 0x14 0x2a 0xea + +# CHECK: vfpsosb %v0, %v0, 3 +0xe7 0x00 0x00 0x30 0x20 0xcc + +# CHECK: vfpsosb %v0, %v0, 15 +0xe7 0x00 0x00 0xf0 0x20 0xcc + +# CHECK: vfpsosb %v0, %v15, 3 +0xe7 0x0f 0x00 0x30 0x20 0xcc + +# CHECK: vfpsosb %v0, %v31, 3 +0xe7 0x0f 0x00 0x30 0x24 0xcc + +# CHECK: vfpsosb %v15, %v0, 3 +0xe7 0xf0 0x00 0x30 0x20 0xcc + +# CHECK: vfpsosb %v31, %v0, 3 +0xe7 0xf0 0x00 0x30 0x28 0xcc + +# CHECK: vfpsosb %v14, %v17, 7 +0xe7 0xe1 0x00 0x70 0x24 0xcc + +# CHECK: vflcsb %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xcc + +# CHECK: vflcsb %v0, %v15 +0xe7 0x0f 0x00 0x00 0x20 0xcc + +# CHECK: vflcsb %v0, %v31 +0xe7 0x0f 0x00 0x00 0x24 0xcc + +# CHECK: vflcsb %v15, %v0 +0xe7 0xf0 0x00 0x00 0x20 0xcc + +# CHECK: vflcsb %v31, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xcc + +# CHECK: vflcsb %v14, %v17 +0xe7 0xe1 0x00 0x00 0x24 0xcc + +# CHECK: vflnsb %v0, %v0 +0xe7 0x00 0x00 0x10 0x20 0xcc + +# CHECK: vflnsb %v0, %v15 +0xe7 0x0f 0x00 0x10 0x20 0xcc + +# CHECK: vflnsb %v0, %v31 +0xe7 0x0f 0x00 0x10 0x24 0xcc + +# CHECK: vflnsb %v15, %v0 +0xe7 0xf0 0x00 0x10 0x20 0xcc + +# CHECK: vflnsb %v31, %v0 +0xe7 0xf0 0x00 0x10 0x28 0xcc + +# CHECK: vflnsb %v14, %v17 +0xe7 0xe1 0x00 0x10 0x24 0xcc + +# CHECK: vflpsb %v0, %v0 +0xe7 0x00 0x00 0x20 0x20 0xcc + +# CHECK: vflpsb %v0, %v15 +0xe7 0x0f 0x00 0x20 0x20 0xcc + +# CHECK: vflpsb %v0, %v31 +0xe7 0x0f 0x00 0x20 0x24 0xcc + +# CHECK: vflpsb %v15, %v0 +0xe7 0xf0 0x00 0x20 0x20 0xcc + +# CHECK: vflpsb %v31, %v0 +0xe7 0xf0 0x00 0x20 0x28 0xcc + +# CHECK: vflpsb %v14, %v17 +0xe7 0xe1 0x00 0x20 0x24 0xcc + # CHECK: vfmax %v0, %v0, %v0, 0, 0, 0 0xe7 0x00 0x00 0x00 0x00 0xef @@ -761,6 +1067,24 @@ # CHECK: vfmaxdb %v18, %v3, %v20, 12 0xe7 0x23 0x40 0xc0 0x3a 0xef +# CHECK: vfmaxsb %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x20 0xef + +# CHECK: vfmaxsb %v0, %v0, %v0, 4 +0xe7 0x00 0x00 0x40 0x20 0xef + +# CHECK: vfmaxsb %v0, %v0, %v31, 0 +0xe7 0x00 0xf0 0x00 0x22 0xef + +# CHECK: vfmaxsb %v0, %v31, %v0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xef + +# CHECK: vfmaxsb %v31, %v0, %v0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xef + +# CHECK: vfmaxsb %v18, %v3, %v20, 12 +0xe7 0x23 0x40 0xc0 0x2a 0xef + # CHECK: vfmin %v0, %v0, %v0, 0, 0, 0 0xe7 0x00 0x00 0x00 0x00 0xee @@ -803,6 +1127,75 @@ # CHECK: vfmindb %v18, %v3, %v20, 12 0xe7 0x23 0x40 0xc0 0x3a 0xee +# CHECK: vfminsb %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x20 0xee + +# CHECK: vfminsb %v0, %v0, %v0, 4 +0xe7 0x00 0x00 0x40 0x20 0xee + +# CHECK: vfminsb %v0, %v0, %v31, 0 +0xe7 0x00 0xf0 0x00 0x22 0xee + +# CHECK: vfminsb %v0, %v31, %v0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xee + +# CHECK: vfminsb %v31, %v0, %v0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xee + +# CHECK: vfminsb %v18, %v3, %v20, 12 +0xe7 0x23 0x40 0xc0 0x2a 0xee + +# CHECK: vfmasb %v0, %v0, %v0, %v0 +0xe7 0x00 0x02 0x00 0x00 0x8f + +# CHECK: vfmasb %v0, %v0, %v0, %v31 +0xe7 0x00 0x02 0x00 0xf1 0x8f + +# CHECK: vfmasb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf2 0x00 0x02 0x8f + +# CHECK: vfmasb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x02 0x00 0x04 0x8f + +# CHECK: vfmasb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x02 0x00 0x08 0x8f + +# CHECK: vfmasb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x00 0x97 0x8f + +# CHECK: vfmsb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xe7 + +# CHECK: vfmsb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xe7 + +# CHECK: vfmsb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xe7 + +# CHECK: vfmsb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xe7 + +# CHECK: vfmsb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xe7 + +# CHECK: vfmssb %v0, %v0, %v0, %v0 +0xe7 0x00 0x02 0x00 0x00 0x8e + +# CHECK: vfmssb %v0, %v0, %v0, %v31 +0xe7 0x00 0x02 0x00 0xf1 0x8e + +# CHECK: vfmssb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf2 0x00 0x02 0x8e + +# CHECK: vfmssb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x02 0x00 0x04 0x8e + +# CHECK: vfmssb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x02 0x00 0x08 0x8e + +# CHECK: vfmssb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x00 0x97 0x8e + # CHECK: vfnma %v0, %v0, %v0, %v0, 0, 0 0xe7 0x00 0x00 0x00 0x00 0x9f @@ -845,6 +1238,24 @@ # CHECK: vfnmadb %v13, %v17, %v21, %v25 0xe7 0xd1 0x53 0x00 0x97 0x9f +# CHECK: vfnmasb %v0, %v0, %v0, %v0 +0xe7 0x00 0x02 0x00 0x00 0x9f + +# CHECK: vfnmasb %v0, %v0, %v0, %v31 +0xe7 0x00 0x02 0x00 0xf1 0x9f + +# CHECK: vfnmasb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf2 0x00 0x02 0x9f + +# CHECK: vfnmasb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x02 0x00 0x04 0x9f + +# CHECK: vfnmasb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x02 0x00 0x08 0x9f + +# CHECK: vfnmasb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x00 0x97 0x9f + # CHECK: vfnms %v0, %v0, %v0, %v0, 0, 0 0xe7 0x00 0x00 0x00 0x00 0x9e @@ -887,6 +1298,78 @@ # CHECK: vfnmsdb %v13, %v17, %v21, %v25 0xe7 0xd1 0x53 0x00 0x97 0x9e +# CHECK: vfnmssb %v0, %v0, %v0, %v0 +0xe7 0x00 0x02 0x00 0x00 0x9e + +# CHECK: vfnmssb %v0, %v0, %v0, %v31 +0xe7 0x00 0x02 0x00 0xf1 0x9e + +# CHECK: vfnmssb %v0, %v0, %v31, %v0 +0xe7 0x00 0xf2 0x00 0x02 0x9e + +# CHECK: vfnmssb %v0, %v31, %v0, %v0 +0xe7 0x0f 0x02 0x00 0x04 0x9e + +# CHECK: vfnmssb %v31, %v0, %v0, %v0 +0xe7 0xf0 0x02 0x00 0x08 0x9e + +# CHECK: vfnmssb %v13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x00 0x97 0x9e + +# CHECK: vfssb %v0, %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xe2 + +# CHECK: vfssb %v0, %v0, %v31 +0xe7 0x00 0xf0 0x00 0x22 0xe2 + +# CHECK: vfssb %v0, %v31, %v0 +0xe7 0x0f 0x00 0x00 0x24 0xe2 + +# CHECK: vfssb %v31, %v0, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xe2 + +# CHECK: vfssb %v18, %v3, %v20 +0xe7 0x23 0x40 0x00 0x2a 0xe2 + +# CHECK: vfsqsb %v0, %v0 +0xe7 0x00 0x00 0x00 0x20 0xce + +# CHECK: vfsqsb %v0, %v15 +0xe7 0x0f 0x00 0x00 0x20 0xce + +# CHECK: vfsqsb %v0, %v31 +0xe7 0x0f 0x00 0x00 0x24 0xce + +# CHECK: vfsqsb %v15, %v0 +0xe7 0xf0 0x00 0x00 0x20 0xce + +# CHECK: vfsqsb %v31, %v0 +0xe7 0xf0 0x00 0x00 0x28 0xce + +# CHECK: vfsqsb %v14, %v17 +0xe7 0xe1 0x00 0x00 0x24 0xce + +# CHECK: vftcisb %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x20 0x4a + +# CHECK: vftcisb %v0, %v0, 4095 +0xe7 0x00 0xff 0xf0 0x20 0x4a + +# CHECK: vftcisb %v0, %v15, 0 +0xe7 0x0f 0x00 0x00 0x20 0x4a + +# CHECK: vftcisb %v0, %v31, 0 +0xe7 0x0f 0x00 0x00 0x24 0x4a + +# CHECK: vftcisb %v15, %v0, 0 +0xe7 0xf0 0x00 0x00 0x20 0x4a + +# CHECK: vftcisb %v31, %v0, 0 +0xe7 0xf0 0x00 0x00 0x28 0x4a + +# CHECK: vftcisb %v4, %v21, 1656 +0xe7 0x45 0x67 0x80 0x24 0x4a + # CHECK: vlip %v0, 0, 0 0xe6 0x00 0x00 0x00 0x00 0x49 @@ -1400,6 +1883,216 @@ # CHECK: vupkz %v18, 1383(%r4), 3 0xe6 0x03 0x45 0x67 0x21 0x3c +# CHECK: wfasb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe3 + +# CHECK: wfasb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe3 + +# CHECK: wfasb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xe3 + +# CHECK: wfasb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xe3 + +# CHECK: wfasb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xe3 + +# CHECK: wfasb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xe3 + +# CHECK: wfcsb %f0, %f0 +0xe7 0x00 0x00 0x00 0x20 0xcb + +# CHECK: wfcsb %f0, %f0 +0xe7 0x00 0x00 0x00 0x20 0xcb + +# CHECK: wfcsb %f0, %f15 +0xe7 0x0f 0x00 0x00 0x20 0xcb + +# CHECK: wfcsb %f0, %v31 +0xe7 0x0f 0x00 0x00 0x24 0xcb + +# CHECK: wfcsb %f15, %f0 +0xe7 0xf0 0x00 0x00 0x20 0xcb + +# CHECK: wfcsb %v31, %f0 +0xe7 0xf0 0x00 0x00 0x28 0xcb + +# CHECK: wfcsb %f14, %v17 +0xe7 0xe1 0x00 0x00 0x24 0xcb + +# CHECK: wfcesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe8 + +# CHECK: wfcesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe8 + +# CHECK: wfcesb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xe8 + +# CHECK: wfcesb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xe8 + +# CHECK: wfcesb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xe8 + +# CHECK: wfcesb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xe8 + +# CHECK: wfcesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xe8 + +# CHECK: wfcesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xe8 + +# CHECK: wfcesbs %f0, %f0, %v31 +0xe7 0x00 0xf0 0x18 0x22 0xe8 + +# CHECK: wfcesbs %f0, %v31, %f0 +0xe7 0x0f 0x00 0x18 0x24 0xe8 + +# CHECK: wfcesbs %v31, %f0, %f0 +0xe7 0xf0 0x00 0x18 0x28 0xe8 + +# CHECK: wfcesbs %v18, %f3, %v20 +0xe7 0x23 0x40 0x18 0x2a 0xe8 + +# CHECK: wfchsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xeb + +# CHECK: wfchsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xeb + +# CHECK: wfchsb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xeb + +# CHECK: wfchsb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xeb + +# CHECK: wfchsb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xeb + +# CHECK: wfchsb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xeb + +# CHECK: wfchsbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xeb + +# CHECK: wfchsbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xeb + +# CHECK: wfchsbs %f0, %f0, %v31 +0xe7 0x00 0xf0 0x18 0x22 0xeb + +# CHECK: wfchsbs %f0, %v31, %f0 +0xe7 0x0f 0x00 0x18 0x24 0xeb + +# CHECK: wfchsbs %v31, %f0, %f0 +0xe7 0xf0 0x00 0x18 0x28 0xeb + +# CHECK: wfchsbs %v18, %f3, %v20 +0xe7 0x23 0x40 0x18 0x2a 0xeb + +# CHECK: wfchesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xea + +# CHECK: wfchesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xea + +# CHECK: wfchesb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xea + +# CHECK: wfchesb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xea + +# CHECK: wfchesb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xea + +# CHECK: wfchesb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xea + +# CHECK: wfchesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xea + +# CHECK: wfchesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xea + +# CHECK: wfchesbs %f0, %f0, %v31 +0xe7 0x00 0xf0 0x18 0x22 0xea + +# CHECK: wfchesbs %f0, %v31, %f0 +0xe7 0x0f 0x00 0x18 0x24 0xea + +# CHECK: wfchesbs %v31, %f0, %f0 +0xe7 0xf0 0x00 0x18 0x28 0xea + +# CHECK: wfchesbs %v18, %f3, %v20 +0xe7 0x23 0x40 0x18 0x2a 0xea + +# CHECK: wfdsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe5 + +# CHECK: wfdsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe5 + +# CHECK: wfdsb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xe5 + +# CHECK: wfdsb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xe5 + +# CHECK: wfdsb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xe5 + +# CHECK: wfdsb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xe5 + +# CHECK: wfisb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc7 + +# CHECK: wfisb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc7 + +# CHECK: wfisb %f0, %f0, 0, 15 +0xe7 0x00 0x00 0xf8 0x20 0xc7 + +# CHECK: wfisb %f0, %f0, 4, 0 +0xe7 0x00 0x00 0x0c 0x20 0xc7 + +# CHECK: wfisb %f0, %f0, 7, 0 +0xe7 0x00 0x00 0x0f 0x20 0xc7 + +# CHECK: wfisb %f0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xc7 + +# CHECK: wfisb %v31, %f0, 0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xc7 + +# CHECK: wfisb %f14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x24 0xc7 + +# CHECK: wfksb %f0, %f0 +0xe7 0x00 0x00 0x00 0x20 0xca + +# CHECK: wfksb %f0, %f0 +0xe7 0x00 0x00 0x00 0x20 0xca + +# CHECK: wfksb %f0, %f15 +0xe7 0x0f 0x00 0x00 0x20 0xca + +# CHECK: wfksb %f0, %v31 +0xe7 0x0f 0x00 0x00 0x24 0xca + +# CHECK: wfksb %f15, %f0 +0xe7 0xf0 0x00 0x00 0x20 0xca + +# CHECK: wfksb %v31, %f0 +0xe7 0xf0 0x00 0x00 0x28 0xca + +# CHECK: wfksb %f14, %v17 +0xe7 0xe1 0x00 0x00 0x24 0xca + # CHECK: wfkedb %f0, %f0, %f0 0xe7 0x00 0x00 0x0c 0x30 0xe8 @@ -1436,6 +2129,42 @@ # CHECK: wfkedbs %v18, %f3, %v20 0xe7 0x23 0x40 0x1c 0x3a 0xe8 +# CHECK: wfkesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x0c 0x20 0xe8 + +# CHECK: wfkesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x0c 0x20 0xe8 + +# CHECK: wfkesb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x0c 0x22 0xe8 + +# CHECK: wfkesb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x0c 0x24 0xe8 + +# CHECK: wfkesb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x0c 0x28 0xe8 + +# CHECK: wfkesb %v18, %f3, %v20 +0xe7 0x23 0x40 0x0c 0x2a 0xe8 + +# CHECK: wfkesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x1c 0x20 0xe8 + +# CHECK: wfkesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x1c 0x20 0xe8 + +# CHECK: wfkesbs %f0, %f0, %v31 +0xe7 0x00 0xf0 0x1c 0x22 0xe8 + +# CHECK: wfkesbs %f0, %v31, %f0 +0xe7 0x0f 0x00 0x1c 0x24 0xe8 + +# CHECK: wfkesbs %v31, %f0, %f0 +0xe7 0xf0 0x00 0x1c 0x28 0xe8 + +# CHECK: wfkesbs %v18, %f3, %v20 +0xe7 0x23 0x40 0x1c 0x2a 0xe8 + # CHECK: wfkhdb %f0, %f0, %f0 0xe7 0x00 0x00 0x0c 0x30 0xeb @@ -1472,6 +2201,42 @@ # CHECK: wfkhdbs %v18, %f3, %v20 0xe7 0x23 0x40 0x1c 0x3a 0xeb +# CHECK: wfkhsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x0c 0x20 0xeb + +# CHECK: wfkhsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x0c 0x20 0xeb + +# CHECK: wfkhsb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x0c 0x22 0xeb + +# CHECK: wfkhsb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x0c 0x24 0xeb + +# CHECK: wfkhsb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x0c 0x28 0xeb + +# CHECK: wfkhsb %v18, %f3, %v20 +0xe7 0x23 0x40 0x0c 0x2a 0xeb + +# CHECK: wfkhsbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x1c 0x20 0xeb + +# CHECK: wfkhsbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x1c 0x20 0xeb + +# CHECK: wfkhsbs %f0, %f0, %v31 +0xe7 0x00 0xf0 0x1c 0x22 0xeb + +# CHECK: wfkhsbs %f0, %v31, %f0 +0xe7 0x0f 0x00 0x1c 0x24 0xeb + +# CHECK: wfkhsbs %v31, %f0, %f0 +0xe7 0xf0 0x00 0x1c 0x28 0xeb + +# CHECK: wfkhsbs %v18, %f3, %v20 +0xe7 0x23 0x40 0x1c 0x2a 0xeb + # CHECK: wfkhedb %f0, %f0, %f0 0xe7 0x00 0x00 0x0c 0x30 0xea @@ -1508,6 +2273,129 @@ # CHECK: wfkhedbs %v18, %f3, %v20 0xe7 0x23 0x40 0x1c 0x3a 0xea +# CHECK: wfkhesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x0c 0x20 0xea + +# CHECK: wfkhesb %f0, %f0, %f0 +0xe7 0x00 0x00 0x0c 0x20 0xea + +# CHECK: wfkhesb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x0c 0x22 0xea + +# CHECK: wfkhesb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x0c 0x24 0xea + +# CHECK: wfkhesb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x0c 0x28 0xea + +# CHECK: wfkhesb %v18, %f3, %v20 +0xe7 0x23 0x40 0x0c 0x2a 0xea + +# CHECK: wfkhesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x1c 0x20 0xea + +# CHECK: wfkhesbs %f0, %f0, %f0 +0xe7 0x00 0x00 0x1c 0x20 0xea + +# CHECK: wfkhesbs %f0, %f0, %v31 +0xe7 0x00 0xf0 0x1c 0x22 0xea + +# CHECK: wfkhesbs %f0, %v31, %f0 +0xe7 0x0f 0x00 0x1c 0x24 0xea + +# CHECK: wfkhesbs %v31, %f0, %f0 +0xe7 0xf0 0x00 0x1c 0x28 0xea + +# CHECK: wfkhesbs %v18, %f3, %v20 +0xe7 0x23 0x40 0x1c 0x2a 0xea + +# CHECK: wfpsosb %f0, %f0, 3 +0xe7 0x00 0x00 0x38 0x20 0xcc + +# CHECK: wfpsosb %f0, %f0, 3 +0xe7 0x00 0x00 0x38 0x20 0xcc + +# CHECK: wfpsosb %f0, %f0, 15 +0xe7 0x00 0x00 0xf8 0x20 0xcc + +# CHECK: wfpsosb %f0, %f15, 3 +0xe7 0x0f 0x00 0x38 0x20 0xcc + +# CHECK: wfpsosb %f0, %v31, 3 +0xe7 0x0f 0x00 0x38 0x24 0xcc + +# CHECK: wfpsosb %f15, %f0, 3 +0xe7 0xf0 0x00 0x38 0x20 0xcc + +# CHECK: wfpsosb %v31, %f0, 3 +0xe7 0xf0 0x00 0x38 0x28 0xcc + +# CHECK: wfpsosb %f14, %v17, 7 +0xe7 0xe1 0x00 0x78 0x24 0xcc + +# CHECK: wflcsb %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xcc + +# CHECK: wflcsb %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xcc + +# CHECK: wflcsb %f0, %f15 +0xe7 0x0f 0x00 0x08 0x20 0xcc + +# CHECK: wflcsb %f0, %v31 +0xe7 0x0f 0x00 0x08 0x24 0xcc + +# CHECK: wflcsb %f15, %f0 +0xe7 0xf0 0x00 0x08 0x20 0xcc + +# CHECK: wflcsb %v31, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xcc + +# CHECK: wflcsb %f14, %v17 +0xe7 0xe1 0x00 0x08 0x24 0xcc + +# CHECK: wflnsb %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xcc + +# CHECK: wflnsb %f0, %f0 +0xe7 0x00 0x00 0x18 0x20 0xcc + +# CHECK: wflnsb %f0, %f15 +0xe7 0x0f 0x00 0x18 0x20 0xcc + +# CHECK: wflnsb %f0, %v31 +0xe7 0x0f 0x00 0x18 0x24 0xcc + +# CHECK: wflnsb %f15, %f0 +0xe7 0xf0 0x00 0x18 0x20 0xcc + +# CHECK: wflnsb %v31, %f0 +0xe7 0xf0 0x00 0x18 0x28 0xcc + +# CHECK: wflnsb %f14, %v17 +0xe7 0xe1 0x00 0x18 0x24 0xcc + +# CHECK: wflpsb %f0, %f0 +0xe7 0x00 0x00 0x28 0x20 0xcc + +# CHECK: wflpsb %f0, %f0 +0xe7 0x00 0x00 0x28 0x20 0xcc + +# CHECK: wflpsb %f0, %f15 +0xe7 0x0f 0x00 0x28 0x20 0xcc + +# CHECK: wflpsb %f0, %v31 +0xe7 0x0f 0x00 0x28 0x24 0xcc + +# CHECK: wflpsb %f15, %f0 +0xe7 0xf0 0x00 0x28 0x20 0xcc + +# CHECK: wflpsb %v31, %f0 +0xe7 0xf0 0x00 0x28 0x28 0xcc + +# CHECK: wflpsb %f14, %v17 +0xe7 0xe1 0x00 0x28 0x24 0xcc + # CHECK: wfmaxdb %f0, %f0, %f0, 0 0xe7 0x00 0x00 0x08 0x30 0xef @@ -1529,6 +2417,27 @@ # CHECK: wfmaxdb %v18, %f3, %v20, 11 0xe7 0x23 0x40 0xb8 0x3a 0xef +# CHECK: wfmaxsb %f0, %f0, %f0, 0 +0xe7 0x00 0x00 0x08 0x20 0xef + +# CHECK: wfmaxsb %f0, %f0, %f0, 0 +0xe7 0x00 0x00 0x08 0x20 0xef + +# CHECK: wfmaxsb %f0, %f0, %f0, 4 +0xe7 0x00 0x00 0x48 0x20 0xef + +# CHECK: wfmaxsb %f0, %f0, %v31, 0 +0xe7 0x00 0xf0 0x08 0x22 0xef + +# CHECK: wfmaxsb %f0, %v31, %f0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xef + +# CHECK: wfmaxsb %v31, %f0, %f0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xef + +# CHECK: wfmaxsb %v18, %f3, %v20, 11 +0xe7 0x23 0x40 0xb8 0x2a 0xef + # CHECK: wfmindb %f0, %f0, %f0, 0 0xe7 0x00 0x00 0x08 0x30 0xee @@ -1550,6 +2459,87 @@ # CHECK: wfmindb %v18, %f3, %v20, 11 0xe7 0x23 0x40 0xb8 0x3a 0xee +# CHECK: wfminsb %f0, %f0, %f0, 0 +0xe7 0x00 0x00 0x08 0x20 0xee + +# CHECK: wfminsb %f0, %f0, %f0, 0 +0xe7 0x00 0x00 0x08 0x20 0xee + +# CHECK: wfminsb %f0, %f0, %f0, 4 +0xe7 0x00 0x00 0x48 0x20 0xee + +# CHECK: wfminsb %f0, %f0, %v31, 0 +0xe7 0x00 0xf0 0x08 0x22 0xee + +# CHECK: wfminsb %f0, %v31, %f0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xee + +# CHECK: wfminsb %v31, %f0, %f0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xee + +# CHECK: wfminsb %v18, %f3, %v20, 11 +0xe7 0x23 0x40 0xb8 0x2a 0xee + +# CHECK: wfmasb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x8f + +# CHECK: wfmasb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x8f + +# CHECK: wfmasb %f0, %f0, %f0, %v31 +0xe7 0x00 0x02 0x08 0xf1 0x8f + +# CHECK: wfmasb %f0, %f0, %v31, %f0 +0xe7 0x00 0xf2 0x08 0x02 0x8f + +# CHECK: wfmasb %f0, %v31, %f0, %f0 +0xe7 0x0f 0x02 0x08 0x04 0x8f + +# CHECK: wfmasb %v31, %f0, %f0, %f0 +0xe7 0xf0 0x02 0x08 0x08 0x8f + +# CHECK: wfmasb %f13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x08 0x97 0x8f + +# CHECK: wfmsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe7 + +# CHECK: wfmsb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe7 + +# CHECK: wfmsb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xe7 + +# CHECK: wfmsb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xe7 + +# CHECK: wfmsb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xe7 + +# CHECK: wfmsb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xe7 + +# CHECK: wfmssb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x8e + +# CHECK: wfmssb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x8e + +# CHECK: wfmssb %f0, %f0, %f0, %v31 +0xe7 0x00 0x02 0x08 0xf1 0x8e + +# CHECK: wfmssb %f0, %f0, %v31, %f0 +0xe7 0x00 0xf2 0x08 0x02 0x8e + +# CHECK: wfmssb %f0, %v31, %f0, %f0 +0xe7 0x0f 0x02 0x08 0x04 0x8e + +# CHECK: wfmssb %v31, %f0, %f0, %f0 +0xe7 0xf0 0x02 0x08 0x08 0x8e + +# CHECK: wfmssb %f13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x08 0x97 0x8e + # CHECK: wfnmadb %f0, %f0, %f0, %f0 0xe7 0x00 0x03 0x08 0x00 0x9f @@ -1571,6 +2561,27 @@ # CHECK: wfnmadb %f13, %v17, %v21, %v25 0xe7 0xd1 0x53 0x08 0x97 0x9f +# CHECK: wfnmasb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x9f + +# CHECK: wfnmasb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x9f + +# CHECK: wfnmasb %f0, %f0, %f0, %v31 +0xe7 0x00 0x02 0x08 0xf1 0x9f + +# CHECK: wfnmasb %f0, %f0, %v31, %f0 +0xe7 0x00 0xf2 0x08 0x02 0x9f + +# CHECK: wfnmasb %f0, %v31, %f0, %f0 +0xe7 0x0f 0x02 0x08 0x04 0x9f + +# CHECK: wfnmasb %v31, %f0, %f0, %f0 +0xe7 0xf0 0x02 0x08 0x08 0x9f + +# CHECK: wfnmasb %f13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x08 0x97 0x9f + # CHECK: wfnmsdb %f0, %f0, %f0, %f0 0xe7 0x00 0x03 0x08 0x00 0x9e @@ -1592,3 +2603,87 @@ # CHECK: wfnmsdb %f13, %v17, %v21, %v25 0xe7 0xd1 0x53 0x08 0x97 0x9e +# CHECK: wfnmssb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x9e + +# CHECK: wfnmssb %f0, %f0, %f0, %f0 +0xe7 0x00 0x02 0x08 0x00 0x9e + +# CHECK: wfnmssb %f0, %f0, %f0, %v31 +0xe7 0x00 0x02 0x08 0xf1 0x9e + +# CHECK: wfnmssb %f0, %f0, %v31, %f0 +0xe7 0x00 0xf2 0x08 0x02 0x9e + +# CHECK: wfnmssb %f0, %v31, %f0, %f0 +0xe7 0x0f 0x02 0x08 0x04 0x9e + +# CHECK: wfnmssb %v31, %f0, %f0, %f0 +0xe7 0xf0 0x02 0x08 0x08 0x9e + +# CHECK: wfnmssb %f13, %v17, %v21, %v25 +0xe7 0xd1 0x52 0x08 0x97 0x9e + +# CHECK: wfssb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe2 + +# CHECK: wfssb %f0, %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xe2 + +# CHECK: wfssb %f0, %f0, %v31 +0xe7 0x00 0xf0 0x08 0x22 0xe2 + +# CHECK: wfssb %f0, %v31, %f0 +0xe7 0x0f 0x00 0x08 0x24 0xe2 + +# CHECK: wfssb %v31, %f0, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xe2 + +# CHECK: wfssb %v18, %f3, %v20 +0xe7 0x23 0x40 0x08 0x2a 0xe2 + +# CHECK: wfsqsb %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xce + +# CHECK: wfsqsb %f0, %f0 +0xe7 0x00 0x00 0x08 0x20 0xce + +# CHECK: wfsqsb %f0, %f15 +0xe7 0x0f 0x00 0x08 0x20 0xce + +# CHECK: wfsqsb %f0, %v31 +0xe7 0x0f 0x00 0x08 0x24 0xce + +# CHECK: wfsqsb %f15, %f0 +0xe7 0xf0 0x00 0x08 0x20 0xce + +# CHECK: wfsqsb %v31, %f0 +0xe7 0xf0 0x00 0x08 0x28 0xce + +# CHECK: wfsqsb %f14, %v17 +0xe7 0xe1 0x00 0x08 0x24 0xce + +# CHECK: wftcisb %f0, %f0, 0 +0xe7 0x00 0x00 0x08 0x20 0x4a + +# CHECK: wftcisb %f0, %f0, 0 +0xe7 0x00 0x00 0x08 0x20 0x4a + +# CHECK: wftcisb %f0, %f0, 4095 +0xe7 0x00 0xff 0xf8 0x20 0x4a + +# CHECK: wftcisb %f0, %f15, 0 +0xe7 0x0f 0x00 0x08 0x20 0x4a + +# CHECK: wftcisb %f0, %v31, 0 +0xe7 0x0f 0x00 0x08 0x24 0x4a + +# CHECK: wftcisb %f15, %f0, 0 +0xe7 0xf0 0x00 0x08 0x20 0x4a + +# CHECK: wftcisb %v31, %f0, 0 +0xe7 0xf0 0x00 0x08 0x28 0x4a + +# CHECK: wftcisb %f4, %v21, 1656 +0xe7 0x45 0x67 0x88 0x24 0x4a + diff --git a/test/MC/SystemZ/insn-bad-z13.s b/test/MC/SystemZ/insn-bad-z13.s index 7bf5b5303dd..456567af2e2 100644 --- a/test/MC/SystemZ/insn-bad-z13.s +++ b/test/MC/SystemZ/insn-bad-z13.s @@ -955,6 +955,40 @@ vfaezhs %v0, %v0 vfaezhs %v0, %v0, %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfasb %v0, %v0, %v0 + + vfasb %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfcesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfcesbs %v0, %v0, %v0 + + vfcesb %v0, %v0, %v0 + vfcesbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfchsb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfchsbs %v0, %v0, %v0 + + vfchsb %v0, %v0, %v0 + vfchsbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfchesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfchesbs %v0, %v0, %v0 + + vfchesb %v0, %v0, %v0 + vfchesbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfdsb %v0, %v0, %v0 + + vfdsb %v0, %v0, %v0 + #CHECK: error: invalid operand #CHECK: vfee %v0, %v0, %v0, 0, -1 #CHECK: error: invalid operand @@ -1257,61 +1291,151 @@ vfidb %v0, %v0, -1, 0 vfidb %v0, %v0, 16, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfisb %v0, %v0, 0, 0 + + vfisb %v0, %v0, 0, 0 + #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfkedb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfkedbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfkesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfkesbs %v0, %v0, %v0 vfkedb %v0, %v0, %v0 vfkedbs %v0, %v0, %v0 + vfkesb %v0, %v0, %v0 + vfkesbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfkhdb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfkhdbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfkhsb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfkhsbs %v0, %v0, %v0 vfkhdb %v0, %v0, %v0 vfkhdbs %v0, %v0, %v0 + vfkhsb %v0, %v0, %v0 + vfkhsbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfkhedb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfkhedbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfkhesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfkhesbs %v0, %v0, %v0 vfkhedb %v0, %v0, %v0 vfkhedbs %v0, %v0, %v0 + vfkhesb %v0, %v0, %v0 + vfkhesbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfpsosb %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vflcsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vflnsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vflpsb %v0, %v0 + + vfpsosb %v0, %v0, 0 + vflcsb %v0, %v0 + vflnsb %v0, %v0 + vflpsb %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfll %v0, %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vflls %v0, %v0 + + vfll %v0, %v0, 0, 0 + vflls %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vflr %v0, %v0, 0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vflrd %v0, %v0, 0, 0 + + vflr %v0, %v0, 0, 0, 0 + vflrd %v0, %v0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfmax %v0, %v0, %v0, 0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfmaxdb %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfmaxsb %v0, %v0, %v0, 0 vfmax %v0, %v0, %v0, 0, 0, 0 vfmaxdb %v0, %v0, %v0, 0 + vfmaxsb %v0, %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfmin %v0, %v0, %v0, 0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfmindb %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfminsb %v0, %v0, %v0, 0 vfmin %v0, %v0, %v0, 0, 0, 0 vfmindb %v0, %v0, %v0, 0 + vfminsb %v0, %v0, %v0, 0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfmasb %v0, %v0, %v0, %v0 + + vfmasb %v0, %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfmsb %v0, %v0, %v0 + + vfmsb %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfmssb %v0, %v0, %v0, %v0 + + vfmssb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfnma %v0, %v0, %v0, %v0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfnmadb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfnmasb %v0, %v0, %v0, %v0 vfnma %v0, %v0, %v0, %v0, 0, 0 vfnmadb %v0, %v0, %v0, %v0 + vfnmasb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfnms %v0, %v0, %v0, %v0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: vfnmsdb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfnmssb %v0, %v0, %v0, %v0 vfnms %v0, %v0, %v0, %v0, 0, 0 vfnmsdb %v0, %v0, %v0, %v0 + vfnmssb %v0, %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfssb %v0, %v0, %v0 + + vfssb %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vfsqsb %v0, %v0 + + vfsqsb %v0, %v0 #CHECK: error: invalid operand #CHECK: vftci %v0, %v0, 0, 0, -1 @@ -1341,6 +1465,11 @@ vftcidb %v0, %v0, -1 vftcidb %v0, %v0, 4096 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: vftcisb %v0, %v0, 0 + + vftcisb %v0, %v0, 0 + #CHECK: error: invalid operand #CHECK: vgbm %v0, -1 #CHECK: error: invalid operand @@ -2607,6 +2736,45 @@ wclgdb %v0, %v0, -1, 0 wclgdb %v0, %v0, 16, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfasb %v0, %v0, %v0 + + wfasb %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfcsb %v0, %v0 + + wfcsb %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfcesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfcesbs %v0, %v0, %v0 + + wfcesb %v0, %v0, %v0 + wfcesbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchsb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchsbs %v0, %v0, %v0 + + wfchsb %v0, %v0, %v0 + wfchsbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfchesbs %v0, %v0, %v0 + + wfchesb %v0, %v0, %v0 + wfchesbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfdsb %v0, %v0, %v0 + + wfdsb %v0, %v0, %v0 + #CHECK: error: invalid operand #CHECK: wfidb %v0, %v0, 0, -1 #CHECK: error: invalid operand @@ -2621,49 +2789,138 @@ wfidb %v0, %v0, -1, 0 wfidb %v0, %v0, 16, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfisb %v0, %v0, 0, 0 + + wfisb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfksb %v0, %v0 + + wfksb %v0, %v0 + #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkedb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkedbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkesbs %v0, %v0, %v0 wfkedb %v0, %v0, %v0 wfkedbs %v0, %v0, %v0 + wfkesb %v0, %v0, %v0 + wfkesbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhdb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhdbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhsb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhsbs %v0, %v0, %v0 wfkhdb %v0, %v0, %v0 wfkhdbs %v0, %v0, %v0 + wfkhsb %v0, %v0, %v0 + wfkhsbs %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhedb %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfkhedbs %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhesb %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfkhesbs %v0, %v0, %v0 wfkhedb %v0, %v0, %v0 wfkhedbs %v0, %v0, %v0 + wfkhesb %v0, %v0, %v0 + wfkhesbs %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfpsosb %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflcsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflnsb %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflpsb %v0, %v0 + + wfpsosb %v0, %v0, 0 + wflcsb %v0, %v0 + wflnsb %v0, %v0 + wflpsb %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflls %v0, %v0 + + wflls %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wflrd %v0, %v0, 0, 0 + + wflrd %v0, %v0, 0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmaxdb %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmaxsb %v0, %v0, %v0, 0 wfmaxdb %v0, %v0, %v0, 0 + wfmaxsb %v0, %v0, %v0, 0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfmindb %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfminsb %v0, %v0, %v0, 0 wfmindb %v0, %v0, %v0, 0 + wfminsb %v0, %v0, %v0, 0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmasb %v0, %v0, %v0, %v0 + + wfmasb %v0, %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmsb %v0, %v0, %v0 + + wfmsb %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfmssb %v0, %v0, %v0, %v0 + + wfmssb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfnmadb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfnmasb %v0, %v0, %v0, %v0 wfnmadb %v0, %v0, %v0, %v0 + wfnmasb %v0, %v0, %v0, %v0 #CHECK: error: instruction requires: vector-enhancements-1 #CHECK: wfnmsdb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfnmssb %v0, %v0, %v0, %v0 wfnmsdb %v0, %v0, %v0, %v0 + wfnmssb %v0, %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfssb %v0, %v0, %v0 + + wfssb %v0, %v0, %v0 + +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wfsqsb %v0, %v0 + + wfsqsb %v0, %v0 #CHECK: error: invalid operand #CHECK: wftcidb %v0, %v0, -1 @@ -2673,6 +2930,11 @@ wftcidb %v0, %v0, -1 wftcidb %v0, %v0, 4096 +#CHECK: error: instruction requires: vector-enhancements-1 +#CHECK: wftcisb %v0, %v0, 0 + + wftcisb %v0, %v0, 0 + #CHECK: error: invalid operand #CHECK: wledb %v0, %v0, 0, -1 #CHECK: error: invalid operand diff --git a/test/MC/SystemZ/insn-bad-z14.s b/test/MC/SystemZ/insn-bad-z14.s index 4e0a250810a..0e96fd3a40b 100644 --- a/test/MC/SystemZ/insn-bad-z14.s +++ b/test/MC/SystemZ/insn-bad-z14.s @@ -213,6 +213,68 @@ vdp %v0, %v0, %v0, -1, 0 vdp %v0, %v0, %v0, 256, 0 +#CHECK: error: invalid operand +#CHECK: vfisb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vfisb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vfisb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vfisb %v0, %v0, 16, 0 + + vfisb %v0, %v0, 0, -1 + vfisb %v0, %v0, 0, 16 + vfisb %v0, %v0, -1, 0 + vfisb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: vfll %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vfll %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vfll %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vfll %v0, %v0, 16, 0 + + vfll %v0, %v0, 0, -1 + vfll %v0, %v0, 0, 16 + vfll %v0, %v0, -1, 0 + vfll %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: vflr %v0, %v0, 0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vflr %v0, %v0, 0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vflr %v0, %v0, 0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vflr %v0, %v0, 0, 16, 0 +#CHECK: error: invalid operand +#CHECK: vflr %v0, %v0, -1, 0, 0 +#CHECK: error: invalid operand +#CHECK: vflr %v0, %v0, 16, 0, 0 + + vflr %v0, %v0, 0, 0, -1 + vflr %v0, %v0, 0, 0, 16 + vflr %v0, %v0, 0, -1, 0 + vflr %v0, %v0, 0, 16, 0 + vflr %v0, %v0, -1, 0, 0 + vflr %v0, %v0, 16, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vflrd %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vflrd %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vflrd %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vflrd %v0, %v0, 16, 0 + + vflrd %v0, %v0, 0, -1 + vflrd %v0, %v0, 0, 16 + vflrd %v0, %v0, -1, 0 + vflrd %v0, %v0, 16, 0 + #CHECK: error: invalid operand #CHECK: vfmax %v0, %v0, %v0, 0, 0, -1 #CHECK: error: invalid operand @@ -241,6 +303,14 @@ vfmaxdb %v0, %v0, %v0, -1 vfmaxdb %v0, %v0, %v0, 16 +#CHECK: error: invalid operand +#CHECK: vfmaxsb %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vfmaxsb %v0, %v0, %v0, 16 + + vfmaxsb %v0, %v0, %v0, -1 + vfmaxsb %v0, %v0, %v0, 16 + #CHECK: error: invalid operand #CHECK: vfmin %v0, %v0, %v0, 0, 0, -1 #CHECK: error: invalid operand @@ -269,6 +339,14 @@ vfmindb %v0, %v0, %v0, -1 vfmindb %v0, %v0, %v0, 16 +#CHECK: error: invalid operand +#CHECK: vfminsb %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vfminsb %v0, %v0, %v0, 16 + + vfminsb %v0, %v0, %v0, -1 + vfminsb %v0, %v0, %v0, 16 + #CHECK: error: invalid operand #CHECK: vfnma %v0, %v0, %v0, %v0, 0, -1 #CHECK: error: invalid operand @@ -297,6 +375,14 @@ vfnms %v0, %v0, %v0, %v0, -1, 0 vfnms %v0, %v0, %v0, %v0, 16, 0 +#CHECK: error: invalid operand +#CHECK: vftcisb %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vftcisb %v0, %v0, 4096 + + vftcisb %v0, %v0, -1 + vftcisb %v0, %v0, 4096 + #CHECK: error: invalid operand #CHECK: vlip %v0, 0, -1 #CHECK: error: invalid operand @@ -544,6 +630,34 @@ vupkz %v0, 4096, 0 vupkz %v0, 0(%r0), 0 +#CHECK: error: invalid operand +#CHECK: wfisb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wfisb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wfisb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wfisb %v0, %v0, 16, 0 + + wfisb %v0, %v0, 0, -1 + wfisb %v0, %v0, 0, 16 + wfisb %v0, %v0, -1, 0 + wfisb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: wflrd %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wflrd %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wflrd %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wflrd %v0, %v0, 16, 0 + + wflrd %v0, %v0, 0, -1 + wflrd %v0, %v0, 0, 16 + wflrd %v0, %v0, -1, 0 + wflrd %v0, %v0, 16, 0 + #CHECK: error: invalid operand #CHECK: wfmaxdb %v0, %v0, %v0, -1 #CHECK: error: invalid operand @@ -552,6 +666,14 @@ wfmaxdb %v0, %v0, %v0, -1 wfmaxdb %v0, %v0, %v0, 16 +#CHECK: error: invalid operand +#CHECK: wfmaxsb %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: wfmaxsb %v0, %v0, %v0, 16 + + wfmaxsb %v0, %v0, %v0, -1 + wfmaxsb %v0, %v0, %v0, 16 + #CHECK: error: invalid operand #CHECK: wfmindb %v0, %v0, %v0, -1 #CHECK: error: invalid operand @@ -560,3 +682,19 @@ wfmindb %v0, %v0, %v0, -1 wfmindb %v0, %v0, %v0, 16 +#CHECK: error: invalid operand +#CHECK: wfminsb %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: wfminsb %v0, %v0, %v0, 16 + + wfminsb %v0, %v0, %v0, -1 + wfminsb %v0, %v0, %v0, 16 + +#CHECK: error: invalid operand +#CHECK: wftcisb %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: wftcisb %v0, %v0, 4096 + + wftcisb %v0, %v0, -1 + wftcisb %v0, %v0, 4096 + diff --git a/test/MC/SystemZ/insn-good-z14.s b/test/MC/SystemZ/insn-good-z14.s index c247f252659..99fb495e40a 100644 --- a/test/MC/SystemZ/insn-good-z14.s +++ b/test/MC/SystemZ/insn-good-z14.s @@ -540,6 +540,118 @@ vdp %v31, %v0, %v0, 0, 0 vdp %v13, %v17, %v21, 0x79, 11 +#CHECK: vfasb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xe3] +#CHECK: vfasb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xe3] +#CHECK: vfasb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xe3] +#CHECK: vfasb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xe3] +#CHECK: vfasb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xe3] + + vfasb %v0, %v0, %v0 + vfasb %v0, %v0, %v31 + vfasb %v0, %v31, %v0 + vfasb %v31, %v0, %v0 + vfasb %v18, %v3, %v20 + +#CHECK: vfcesb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xe8] +#CHECK: vfcesb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xe8] +#CHECK: vfcesb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xe8] +#CHECK: vfcesb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xe8] +#CHECK: vfcesb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xe8] + + vfcesb %v0, %v0, %v0 + vfcesb %v0, %v0, %v31 + vfcesb %v0, %v31, %v0 + vfcesb %v31, %v0, %v0 + vfcesb %v18, %v3, %v20 + +#CHECK: vfcesbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x20,0xe8] +#CHECK: vfcesbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x22,0xe8] +#CHECK: vfcesbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x24,0xe8] +#CHECK: vfcesbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x28,0xe8] +#CHECK: vfcesbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x2a,0xe8] + + vfcesbs %v0, %v0, %v0 + vfcesbs %v0, %v0, %v31 + vfcesbs %v0, %v31, %v0 + vfcesbs %v31, %v0, %v0 + vfcesbs %v18, %v3, %v20 + +#CHECK: vfchsb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xeb] +#CHECK: vfchsb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xeb] +#CHECK: vfchsb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xeb] +#CHECK: vfchsb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xeb] +#CHECK: vfchsb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xeb] + + vfchsb %v0, %v0, %v0 + vfchsb %v0, %v0, %v31 + vfchsb %v0, %v31, %v0 + vfchsb %v31, %v0, %v0 + vfchsb %v18, %v3, %v20 + +#CHECK: vfchsbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x20,0xeb] +#CHECK: vfchsbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x22,0xeb] +#CHECK: vfchsbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x24,0xeb] +#CHECK: vfchsbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x28,0xeb] +#CHECK: vfchsbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x2a,0xeb] + + vfchsbs %v0, %v0, %v0 + vfchsbs %v0, %v0, %v31 + vfchsbs %v0, %v31, %v0 + vfchsbs %v31, %v0, %v0 + vfchsbs %v18, %v3, %v20 + +#CHECK: vfchesb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xea] +#CHECK: vfchesb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xea] +#CHECK: vfchesb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xea] +#CHECK: vfchesb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xea] +#CHECK: vfchesb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xea] + + vfchesb %v0, %v0, %v0 + vfchesb %v0, %v0, %v31 + vfchesb %v0, %v31, %v0 + vfchesb %v31, %v0, %v0 + vfchesb %v18, %v3, %v20 + +#CHECK: vfchesbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x20,0xea] +#CHECK: vfchesbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x22,0xea] +#CHECK: vfchesbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x24,0xea] +#CHECK: vfchesbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x28,0xea] +#CHECK: vfchesbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x2a,0xea] + + vfchesbs %v0, %v0, %v0 + vfchesbs %v0, %v0, %v31 + vfchesbs %v0, %v31, %v0 + vfchesbs %v31, %v0, %v0 + vfchesbs %v18, %v3, %v20 + +#CHECK: vfdsb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xe5] +#CHECK: vfdsb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xe5] +#CHECK: vfdsb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xe5] +#CHECK: vfdsb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xe5] +#CHECK: vfdsb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xe5] + + vfdsb %v0, %v0, %v0 + vfdsb %v0, %v0, %v31 + vfdsb %v0, %v31, %v0 + vfdsb %v31, %v0, %v0 + vfdsb %v18, %v3, %v20 + +#CHECK: vfisb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc7] +#CHECK: vfisb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc7] +#CHECK: vfisb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc7] +#CHECK: vfisb %v0, %v0, 7, 0 # encoding: [0xe7,0x00,0x00,0x07,0x20,0xc7] +#CHECK: vfisb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc7] +#CHECK: vfisb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc7] +#CHECK: vfisb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc7] + + vfisb %v0, %v0, 0, 0 + vfisb %v0, %v0, 0, 15 + vfisb %v0, %v0, 4, 0 + vfisb %v0, %v0, 7, 0 + vfisb %v0, %v31, 0, 0 + vfisb %v31, %v0, 0, 0 + vfisb %v14, %v17, 4, 10 + #CHECK: vfkedb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xe8] #CHECK: vfkedb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x04,0x32,0xe8] #CHECK: vfkedb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x04,0x34,0xe8] @@ -564,6 +676,30 @@ vfkedbs %v31, %v0, %v0 vfkedbs %v18, %v3, %v20 +#CHECK: vfkesb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xe8] +#CHECK: vfkesb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x04,0x22,0xe8] +#CHECK: vfkesb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x04,0x24,0xe8] +#CHECK: vfkesb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x04,0x28,0xe8] +#CHECK: vfkesb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x04,0x2a,0xe8] + + vfkesb %v0, %v0, %v0 + vfkesb %v0, %v0, %v31 + vfkesb %v0, %v31, %v0 + vfkesb %v31, %v0, %v0 + vfkesb %v18, %v3, %v20 + +#CHECK: vfkesbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x14,0x20,0xe8] +#CHECK: vfkesbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x14,0x22,0xe8] +#CHECK: vfkesbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x14,0x24,0xe8] +#CHECK: vfkesbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x14,0x28,0xe8] +#CHECK: vfkesbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x14,0x2a,0xe8] + + vfkesbs %v0, %v0, %v0 + vfkesbs %v0, %v0, %v31 + vfkesbs %v0, %v31, %v0 + vfkesbs %v31, %v0, %v0 + vfkesbs %v18, %v3, %v20 + #CHECK: vfkhdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xeb] #CHECK: vfkhdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x04,0x32,0xeb] #CHECK: vfkhdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x04,0x34,0xeb] @@ -588,6 +724,30 @@ vfkhdbs %v31, %v0, %v0 vfkhdbs %v18, %v3, %v20 +#CHECK: vfkhsb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xeb] +#CHECK: vfkhsb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x04,0x22,0xeb] +#CHECK: vfkhsb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x04,0x24,0xeb] +#CHECK: vfkhsb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x04,0x28,0xeb] +#CHECK: vfkhsb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x04,0x2a,0xeb] + + vfkhsb %v0, %v0, %v0 + vfkhsb %v0, %v0, %v31 + vfkhsb %v0, %v31, %v0 + vfkhsb %v31, %v0, %v0 + vfkhsb %v18, %v3, %v20 + +#CHECK: vfkhsbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x14,0x20,0xeb] +#CHECK: vfkhsbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x14,0x22,0xeb] +#CHECK: vfkhsbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x14,0x24,0xeb] +#CHECK: vfkhsbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x14,0x28,0xeb] +#CHECK: vfkhsbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x14,0x2a,0xeb] + + vfkhsbs %v0, %v0, %v0 + vfkhsbs %v0, %v0, %v31 + vfkhsbs %v0, %v31, %v0 + vfkhsbs %v31, %v0, %v0 + vfkhsbs %v18, %v3, %v20 + #CHECK: vfkhedb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xea] #CHECK: vfkhedb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x04,0x32,0xea] #CHECK: vfkhedb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x04,0x34,0xea] @@ -612,6 +772,154 @@ vfkhedbs %v31, %v0, %v0 vfkhedbs %v18, %v3, %v20 +#CHECK: vfkhesb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xea] +#CHECK: vfkhesb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x04,0x22,0xea] +#CHECK: vfkhesb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x04,0x24,0xea] +#CHECK: vfkhesb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x04,0x28,0xea] +#CHECK: vfkhesb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x04,0x2a,0xea] + + vfkhesb %v0, %v0, %v0 + vfkhesb %v0, %v0, %v31 + vfkhesb %v0, %v31, %v0 + vfkhesb %v31, %v0, %v0 + vfkhesb %v18, %v3, %v20 + +#CHECK: vfkhesbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x14,0x20,0xea] +#CHECK: vfkhesbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x14,0x22,0xea] +#CHECK: vfkhesbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x14,0x24,0xea] +#CHECK: vfkhesbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x14,0x28,0xea] +#CHECK: vfkhesbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x14,0x2a,0xea] + + vfkhesbs %v0, %v0, %v0 + vfkhesbs %v0, %v0, %v31 + vfkhesbs %v0, %v31, %v0 + vfkhesbs %v31, %v0, %v0 + vfkhesbs %v18, %v3, %v20 + +#CHECK: vfpsosb %v0, %v0, 3 # encoding: [0xe7,0x00,0x00,0x30,0x20,0xcc] +#CHECK: vfpsosb %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xcc] +#CHECK: vfpsosb %v0, %v15, 3 # encoding: [0xe7,0x0f,0x00,0x30,0x20,0xcc] +#CHECK: vfpsosb %v0, %v31, 3 # encoding: [0xe7,0x0f,0x00,0x30,0x24,0xcc] +#CHECK: vfpsosb %v15, %v0, 3 # encoding: [0xe7,0xf0,0x00,0x30,0x20,0xcc] +#CHECK: vfpsosb %v31, %v0, 3 # encoding: [0xe7,0xf0,0x00,0x30,0x28,0xcc] +#CHECK: vfpsosb %v14, %v17, 7 # encoding: [0xe7,0xe1,0x00,0x70,0x24,0xcc] + + vfpsosb %v0, %v0, 3 + vfpsosb %v0, %v0, 15 + vfpsosb %v0, %v15, 3 + vfpsosb %v0, %v31, 3 + vfpsosb %v15, %v0, 3 + vfpsosb %v31, %v0, 3 + vfpsosb %v14, %v17, 7 + +#CHECK: vflcsb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xcc] +#CHECK: vflcsb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xcc] +#CHECK: vflcsb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xcc] +#CHECK: vflcsb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xcc] +#CHECK: vflcsb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xcc] +#CHECK: vflcsb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xcc] + + vflcsb %v0, %v0 + vflcsb %v0, %v15 + vflcsb %v0, %v31 + vflcsb %v15, %v0 + vflcsb %v31, %v0 + vflcsb %v14, %v17 + +#CHECK: vflnsb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x20,0xcc] +#CHECK: vflnsb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x10,0x20,0xcc] +#CHECK: vflnsb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x10,0x24,0xcc] +#CHECK: vflnsb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x20,0xcc] +#CHECK: vflnsb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x28,0xcc] +#CHECK: vflnsb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x10,0x24,0xcc] + + vflnsb %v0, %v0 + vflnsb %v0, %v15 + vflnsb %v0, %v31 + vflnsb %v15, %v0 + vflnsb %v31, %v0 + vflnsb %v14, %v17 + +#CHECK: vflpsb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x20,0x20,0xcc] +#CHECK: vflpsb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x20,0x20,0xcc] +#CHECK: vflpsb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x20,0x24,0xcc] +#CHECK: vflpsb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x20,0x20,0xcc] +#CHECK: vflpsb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x20,0x28,0xcc] +#CHECK: vflpsb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x20,0x24,0xcc] + + vflpsb %v0, %v0 + vflpsb %v0, %v15 + vflpsb %v0, %v31 + vflpsb %v15, %v0 + vflpsb %v31, %v0 + vflpsb %v14, %v17 + +#CHECK: vfll %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc4] +#CHECK: vfll %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc4] +#CHECK: vfll %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xc4] +#CHECK: vfll %v0, %v15, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xc4] +#CHECK: vfll %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc4] +#CHECK: vfll %v15, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xc4] +#CHECK: vfll %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc4] +#CHECK: vfll %v14, %v17, 11, 9 # encoding: [0xe7,0xe1,0x00,0x09,0xb4,0xc4] + + vfll %v0, %v0, 0, 0 + vfll %v0, %v0, 15, 0 + vfll %v0, %v0, 0, 15 + vfll %v0, %v15, 0, 0 + vfll %v0, %v31, 0, 0 + vfll %v15, %v0, 0, 0 + vfll %v31, %v0, 0, 0 + vfll %v14, %v17, 11, 9 + +#CHECK: vflls %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc4] +#CHECK: vflls %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xc4] +#CHECK: vflls %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc4] +#CHECK: vflls %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xc4] +#CHECK: vflls %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc4] +#CHECK: vflls %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xc4] + + vflls %v0, %v0 + vflls %v0, %v15 + vflls %v0, %v31 + vflls %v15, %v0 + vflls %v31, %v0 + vflls %v14, %v17 + +#CHECK: vflr %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc5] +#CHECK: vflr %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc5] +#CHECK: vflr %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc5] +#CHECK: vflr %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc5] +#CHECK: vflr %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc5] +#CHECK: vflr %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc5] +#CHECK: vflr %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc5] +#CHECK: vflr %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc5] + + vflr %v0, %v0, 0, 0, 0 + vflr %v0, %v0, 15, 0, 0 + vflr %v0, %v0, 0, 0, 15 + vflr %v0, %v0, 0, 4, 0 + vflr %v0, %v0, 0, 12, 0 + vflr %v0, %v31, 0, 0, 0 + vflr %v31, %v0, 0, 0, 0 + vflr %v14, %v17, 11, 4, 10 + +#CHECK: vflrd %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc5] +#CHECK: vflrd %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc5] +#CHECK: vflrd %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc5] +#CHECK: vflrd %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] +#CHECK: vflrd %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc5] +#CHECK: vflrd %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc5] +#CHECK: vflrd %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc5] + + vflrd %v0, %v0, 0, 0 + vflrd %v0, %v0, 0, 15 + vflrd %v0, %v0, 4, 0 + vflrd %v0, %v0, 12, 0 + vflrd %v0, %v31, 0, 0 + vflrd %v31, %v0, 0, 0 + vflrd %v14, %v17, 4, 10 + #CHECK: vfmax %v0, %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xef] #CHECK: vfmax %v0, %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xef] #CHECK: vfmax %v0, %v0, %v0, 0, 15, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xef] @@ -644,6 +952,20 @@ vfmaxdb %v31, %v0, %v0, 0 vfmaxdb %v18, %v3, %v20, 12 +#CHECK: vfmaxsb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xef] +#CHECK: vfmaxsb %v0, %v0, %v0, 4 # encoding: [0xe7,0x00,0x00,0x40,0x20,0xef] +#CHECK: vfmaxsb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xef] +#CHECK: vfmaxsb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xef] +#CHECK: vfmaxsb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xef] +#CHECK: vfmaxsb %v18, %v3, %v20, 12 # encoding: [0xe7,0x23,0x40,0xc0,0x2a,0xef] + + vfmaxsb %v0, %v0, %v0, 0 + vfmaxsb %v0, %v0, %v0, 4 + vfmaxsb %v0, %v0, %v31, 0 + vfmaxsb %v0, %v31, %v0, 0 + vfmaxsb %v31, %v0, %v0, 0 + vfmaxsb %v18, %v3, %v20, 12 + #CHECK: vfmin %v0, %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xee] #CHECK: vfmin %v0, %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xee] #CHECK: vfmin %v0, %v0, %v0, 0, 15, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xee] @@ -676,6 +998,60 @@ vfmindb %v31, %v0, %v0, 0 vfmindb %v18, %v3, %v20, 12 +#CHECK: vfminsb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xee] +#CHECK: vfminsb %v0, %v0, %v0, 4 # encoding: [0xe7,0x00,0x00,0x40,0x20,0xee] +#CHECK: vfminsb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xee] +#CHECK: vfminsb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xee] +#CHECK: vfminsb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xee] +#CHECK: vfminsb %v18, %v3, %v20, 12 # encoding: [0xe7,0x23,0x40,0xc0,0x2a,0xee] + + vfminsb %v0, %v0, %v0, 0 + vfminsb %v0, %v0, %v0, 4 + vfminsb %v0, %v0, %v31, 0 + vfminsb %v0, %v31, %v0, 0 + vfminsb %v31, %v0, %v0, 0 + vfminsb %v18, %v3, %v20, 12 + +#CHECK: vfmasb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8f] +#CHECK: vfmasb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8f] +#CHECK: vfmasb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8f] +#CHECK: vfmasb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8f] +#CHECK: vfmasb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8f] +#CHECK: vfmasb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0x8f] + + vfmasb %v0, %v0, %v0, %v0 + vfmasb %v0, %v0, %v0, %v31 + vfmasb %v0, %v0, %v31, %v0 + vfmasb %v0, %v31, %v0, %v0 + vfmasb %v31, %v0, %v0, %v0 + vfmasb %v13, %v17, %v21, %v25 + +#CHECK: vfmsb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xe7] +#CHECK: vfmsb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xe7] +#CHECK: vfmsb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xe7] +#CHECK: vfmsb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xe7] +#CHECK: vfmsb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xe7] + + vfmsb %v0, %v0, %v0 + vfmsb %v0, %v0, %v31 + vfmsb %v0, %v31, %v0 + vfmsb %v31, %v0, %v0 + vfmsb %v18, %v3, %v20 + +#CHECK: vfmssb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8e] +#CHECK: vfmssb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8e] +#CHECK: vfmssb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8e] +#CHECK: vfmssb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8e] +#CHECK: vfmssb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8e] +#CHECK: vfmssb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0x8e] + + vfmssb %v0, %v0, %v0, %v0 + vfmssb %v0, %v0, %v0, %v31 + vfmssb %v0, %v0, %v31, %v0 + vfmssb %v0, %v31, %v0, %v0 + vfmssb %v31, %v0, %v0, %v0 + vfmssb %v13, %v17, %v21, %v25 + #CHECK: vfnma %v0, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x9f] #CHECK: vfnma %v0, %v0, %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x9f] #CHECK: vfnma %v0, %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0x9f] @@ -708,6 +1084,20 @@ vfnmadb %v31, %v0, %v0, %v0 vfnmadb %v13, %v17, %v21, %v25 +#CHECK: vfnmasb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x9f] +#CHECK: vfnmasb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x9f] +#CHECK: vfnmasb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x9f] +#CHECK: vfnmasb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x9f] +#CHECK: vfnmasb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x9f] +#CHECK: vfnmasb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0x9f] + + vfnmasb %v0, %v0, %v0, %v0 + vfnmasb %v0, %v0, %v0, %v31 + vfnmasb %v0, %v0, %v31, %v0 + vfnmasb %v0, %v31, %v0, %v0 + vfnmasb %v31, %v0, %v0, %v0 + vfnmasb %v13, %v17, %v21, %v25 + #CHECK: vfnms %v0, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x9e] #CHECK: vfnms %v0, %v0, %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x9e] #CHECK: vfnms %v0, %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0x9e] @@ -740,6 +1130,62 @@ vfnmsdb %v31, %v0, %v0, %v0 vfnmsdb %v13, %v17, %v21, %v25 +#CHECK: vfnmssb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x9e] +#CHECK: vfnmssb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x9e] +#CHECK: vfnmssb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x9e] +#CHECK: vfnmssb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x9e] +#CHECK: vfnmssb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x9e] +#CHECK: vfnmssb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0x9e] + + vfnmssb %v0, %v0, %v0, %v0 + vfnmssb %v0, %v0, %v0, %v31 + vfnmssb %v0, %v0, %v31, %v0 + vfnmssb %v0, %v31, %v0, %v0 + vfnmssb %v31, %v0, %v0, %v0 + vfnmssb %v13, %v17, %v21, %v25 + +#CHECK: vfssb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xe2] +#CHECK: vfssb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xe2] +#CHECK: vfssb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xe2] +#CHECK: vfssb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xe2] +#CHECK: vfssb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xe2] + + vfssb %v0, %v0, %v0 + vfssb %v0, %v0, %v31 + vfssb %v0, %v31, %v0 + vfssb %v31, %v0, %v0 + vfssb %v18, %v3, %v20 + +#CHECK: vfsqsb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xce] +#CHECK: vfsqsb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xce] +#CHECK: vfsqsb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xce] +#CHECK: vfsqsb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xce] +#CHECK: vfsqsb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xce] +#CHECK: vfsqsb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xce] + + vfsqsb %v0, %v0 + vfsqsb %v0, %v15 + vfsqsb %v0, %v31 + vfsqsb %v15, %v0 + vfsqsb %v31, %v0 + vfsqsb %v14, %v17 + +#CHECK: vftcisb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x4a] +#CHECK: vftcisb %v0, %v0, 4095 # encoding: [0xe7,0x00,0xff,0xf0,0x20,0x4a] +#CHECK: vftcisb %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x4a] +#CHECK: vftcisb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x4a] +#CHECK: vftcisb %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x4a] +#CHECK: vftcisb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x4a] +#CHECK: vftcisb %v4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x80,0x24,0x4a] + + vftcisb %v0, %v0, 0 + vftcisb %v0, %v0, 4095 + vftcisb %v0, %v15, 0 + vftcisb %v0, %v31, 0 + vftcisb %v15, %v0, 0 + vftcisb %v31, %v0, 0 + vftcisb %v4, %v21, 0x678 + #CHECK: vlip %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x49] #CHECK: vlip %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x49] #CHECK: vlip %v0, 65535, 0 # encoding: [0xe6,0x00,0xff,0xff,0x00,0x49] @@ -1132,6 +1578,168 @@ vupkz %v31, 0, 0 vupkz %v18, 1383(%r4), 3 +#CHECK: wfasb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe3] +#CHECK: wfasb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe3] +#CHECK: wfasb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe3] +#CHECK: wfasb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xe3] +#CHECK: wfasb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xe3] +#CHECK: wfasb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xe3] + + wfasb %v0, %v0, %v0 + wfasb %f0, %f0, %f0 + wfasb %v0, %v0, %v31 + wfasb %v0, %v31, %v0 + wfasb %v31, %v0, %v0 + wfasb %v18, %v3, %v20 + +#CHECK: wfcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xcb] +#CHECK: wfcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xcb] +#CHECK: wfcsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xcb] +#CHECK: wfcsb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xcb] +#CHECK: wfcsb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xcb] +#CHECK: wfcsb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xcb] +#CHECK: wfcsb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xcb] + + wfcsb %v0, %v0 + wfcsb %f0, %f0 + wfcsb %v0, %v15 + wfcsb %v0, %v31 + wfcsb %v15, %v0 + wfcsb %v31, %v0 + wfcsb %v14, %v17 + +#CHECK: wfcesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe8] +#CHECK: wfcesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe8] +#CHECK: wfcesb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe8] +#CHECK: wfcesb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xe8] +#CHECK: wfcesb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xe8] +#CHECK: wfcesb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xe8] + + wfcesb %v0, %v0, %v0 + wfcesb %f0, %f0, %f0 + wfcesb %v0, %v0, %v31 + wfcesb %v0, %v31, %v0 + wfcesb %v31, %v0, %v0 + wfcesb %v18, %v3, %v20 + +#CHECK: wfcesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xe8] +#CHECK: wfcesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xe8] +#CHECK: wfcesbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x22,0xe8] +#CHECK: wfcesbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x24,0xe8] +#CHECK: wfcesbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x28,0xe8] +#CHECK: wfcesbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x2a,0xe8] + + wfcesbs %v0, %v0, %v0 + wfcesbs %f0, %f0, %f0 + wfcesbs %v0, %v0, %v31 + wfcesbs %v0, %v31, %v0 + wfcesbs %v31, %v0, %v0 + wfcesbs %v18, %v3, %v20 + +#CHECK: wfchsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xeb] +#CHECK: wfchsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xeb] +#CHECK: wfchsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xeb] +#CHECK: wfchsb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xeb] +#CHECK: wfchsb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xeb] +#CHECK: wfchsb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xeb] + + wfchsb %v0, %v0, %v0 + wfchsb %f0, %f0, %f0 + wfchsb %v0, %v0, %v31 + wfchsb %v0, %v31, %v0 + wfchsb %v31, %v0, %v0 + wfchsb %v18, %v3, %v20 + +#CHECK: wfchsbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xeb] +#CHECK: wfchsbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xeb] +#CHECK: wfchsbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x22,0xeb] +#CHECK: wfchsbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x24,0xeb] +#CHECK: wfchsbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x28,0xeb] +#CHECK: wfchsbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x2a,0xeb] + + wfchsbs %v0, %v0, %v0 + wfchsbs %f0, %f0, %f0 + wfchsbs %v0, %v0, %v31 + wfchsbs %v0, %v31, %v0 + wfchsbs %v31, %v0, %v0 + wfchsbs %v18, %v3, %v20 + +#CHECK: wfchesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xea] +#CHECK: wfchesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xea] +#CHECK: wfchesb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xea] +#CHECK: wfchesb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xea] +#CHECK: wfchesb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xea] +#CHECK: wfchesb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xea] + + wfchesb %v0, %v0, %v0 + wfchesb %f0, %f0, %f0 + wfchesb %v0, %v0, %v31 + wfchesb %v0, %v31, %v0 + wfchesb %v31, %v0, %v0 + wfchesb %v18, %v3, %v20 + +#CHECK: wfchesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xea] +#CHECK: wfchesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xea] +#CHECK: wfchesbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x22,0xea] +#CHECK: wfchesbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x24,0xea] +#CHECK: wfchesbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x28,0xea] +#CHECK: wfchesbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x2a,0xea] + + wfchesbs %v0, %v0, %v0 + wfchesbs %f0, %f0, %f0 + wfchesbs %v0, %v0, %v31 + wfchesbs %v0, %v31, %v0 + wfchesbs %v31, %v0, %v0 + wfchesbs %v18, %v3, %v20 + +#CHECK: wfdsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe5] +#CHECK: wfdsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe5] +#CHECK: wfdsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe5] +#CHECK: wfdsb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xe5] +#CHECK: wfdsb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xe5] +#CHECK: wfdsb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xe5] + + wfdsb %v0, %v0, %v0 + wfdsb %f0, %f0, %f0 + wfdsb %v0, %v0, %v31 + wfdsb %v0, %v31, %v0 + wfdsb %v31, %v0, %v0 + wfdsb %v18, %v3, %v20 + +#CHECK: wfisb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc7] +#CHECK: wfisb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc7] +#CHECK: wfisb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc7] +#CHECK: wfisb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc7] +#CHECK: wfisb %f0, %f0, 7, 0 # encoding: [0xe7,0x00,0x00,0x0f,0x20,0xc7] +#CHECK: wfisb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc7] +#CHECK: wfisb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc7] +#CHECK: wfisb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc7] + + wfisb %v0, %v0, 0, 0 + wfisb %f0, %f0, 0, 0 + wfisb %v0, %v0, 0, 15 + wfisb %v0, %v0, 4, 0 + wfisb %v0, %v0, 7, 0 + wfisb %v0, %v31, 0, 0 + wfisb %v31, %v0, 0, 0 + wfisb %v14, %v17, 4, 10 + +#CHECK: wfksb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xca] +#CHECK: wfksb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xca] +#CHECK: wfksb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xca] +#CHECK: wfksb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xca] +#CHECK: wfksb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xca] +#CHECK: wfksb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xca] +#CHECK: wfksb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xca] + + wfksb %v0, %v0 + wfksb %f0, %f0 + wfksb %v0, %v15 + wfksb %v0, %v31 + wfksb %v15, %v0 + wfksb %v31, %v0 + wfksb %v14, %v17 + #CHECK: wfkedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xe8] #CHECK: wfkedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xe8] #CHECK: wfkedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x32,0xe8] @@ -1160,6 +1768,34 @@ wfkedbs %v31, %v0, %v0 wfkedbs %v18, %v3, %v20 +#CHECK: wfkesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xe8] +#CHECK: wfkesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xe8] +#CHECK: wfkesb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x22,0xe8] +#CHECK: wfkesb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x0c,0x24,0xe8] +#CHECK: wfkesb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x0c,0x28,0xe8] +#CHECK: wfkesb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x0c,0x2a,0xe8] + + wfkesb %v0, %v0, %v0 + wfkesb %f0, %f0, %f0 + wfkesb %v0, %v0, %v31 + wfkesb %v0, %v31, %v0 + wfkesb %v31, %v0, %v0 + wfkesb %v18, %v3, %v20 + +#CHECK: wfkesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x1c,0x20,0xe8] +#CHECK: wfkesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x1c,0x20,0xe8] +#CHECK: wfkesbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x1c,0x22,0xe8] +#CHECK: wfkesbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x1c,0x24,0xe8] +#CHECK: wfkesbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x1c,0x28,0xe8] +#CHECK: wfkesbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x1c,0x2a,0xe8] + + wfkesbs %v0, %v0, %v0 + wfkesbs %f0, %f0, %f0 + wfkesbs %v0, %v0, %v31 + wfkesbs %v0, %v31, %v0 + wfkesbs %v31, %v0, %v0 + wfkesbs %v18, %v3, %v20 + #CHECK: wfkhdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xeb] #CHECK: wfkhdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xeb] #CHECK: wfkhdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x32,0xeb] @@ -1188,6 +1824,34 @@ wfkhdbs %v31, %v0, %v0 wfkhdbs %v18, %v3, %v20 +#CHECK: wfkhsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xeb] +#CHECK: wfkhsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xeb] +#CHECK: wfkhsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x22,0xeb] +#CHECK: wfkhsb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x0c,0x24,0xeb] +#CHECK: wfkhsb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x0c,0x28,0xeb] +#CHECK: wfkhsb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x0c,0x2a,0xeb] + + wfkhsb %v0, %v0, %v0 + wfkhsb %f0, %f0, %f0 + wfkhsb %v0, %v0, %v31 + wfkhsb %v0, %v31, %v0 + wfkhsb %v31, %v0, %v0 + wfkhsb %v18, %v3, %v20 + +#CHECK: wfkhsbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x1c,0x20,0xeb] +#CHECK: wfkhsbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x1c,0x20,0xeb] +#CHECK: wfkhsbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x1c,0x22,0xeb] +#CHECK: wfkhsbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x1c,0x24,0xeb] +#CHECK: wfkhsbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x1c,0x28,0xeb] +#CHECK: wfkhsbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x1c,0x2a,0xeb] + + wfkhsbs %v0, %v0, %v0 + wfkhsbs %f0, %f0, %f0 + wfkhsbs %v0, %v0, %v31 + wfkhsbs %v0, %v31, %v0 + wfkhsbs %v31, %v0, %v0 + wfkhsbs %v18, %v3, %v20 + #CHECK: wfkhedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xea] #CHECK: wfkhedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xea] #CHECK: wfkhedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x32,0xea] @@ -1216,6 +1880,134 @@ wfkhedbs %v31, %v0, %v0 wfkhedbs %v18, %v3, %v20 +#CHECK: wfkhesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xea] +#CHECK: wfkhesb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xea] +#CHECK: wfkhesb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x0c,0x22,0xea] +#CHECK: wfkhesb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x0c,0x24,0xea] +#CHECK: wfkhesb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x0c,0x28,0xea] +#CHECK: wfkhesb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x0c,0x2a,0xea] + + wfkhesb %v0, %v0, %v0 + wfkhesb %f0, %f0, %f0 + wfkhesb %v0, %v0, %v31 + wfkhesb %v0, %v31, %v0 + wfkhesb %v31, %v0, %v0 + wfkhesb %v18, %v3, %v20 + +#CHECK: wfkhesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x1c,0x20,0xea] +#CHECK: wfkhesbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x1c,0x20,0xea] +#CHECK: wfkhesbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x1c,0x22,0xea] +#CHECK: wfkhesbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x1c,0x24,0xea] +#CHECK: wfkhesbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x1c,0x28,0xea] +#CHECK: wfkhesbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x1c,0x2a,0xea] + + wfkhesbs %v0, %v0, %v0 + wfkhesbs %f0, %f0, %f0 + wfkhesbs %v0, %v0, %v31 + wfkhesbs %v0, %v31, %v0 + wfkhesbs %v31, %v0, %v0 + wfkhesbs %v18, %v3, %v20 + +#CHECK: wfpsosb %f0, %f0, 3 # encoding: [0xe7,0x00,0x00,0x38,0x20,0xcc] +#CHECK: wfpsosb %f0, %f0, 3 # encoding: [0xe7,0x00,0x00,0x38,0x20,0xcc] +#CHECK: wfpsosb %f0, %f0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xcc] +#CHECK: wfpsosb %f0, %f15, 3 # encoding: [0xe7,0x0f,0x00,0x38,0x20,0xcc] +#CHECK: wfpsosb %f0, %v31, 3 # encoding: [0xe7,0x0f,0x00,0x38,0x24,0xcc] +#CHECK: wfpsosb %f15, %f0, 3 # encoding: [0xe7,0xf0,0x00,0x38,0x20,0xcc] +#CHECK: wfpsosb %v31, %f0, 3 # encoding: [0xe7,0xf0,0x00,0x38,0x28,0xcc] +#CHECK: wfpsosb %f14, %v17, 7 # encoding: [0xe7,0xe1,0x00,0x78,0x24,0xcc] + + wfpsosb %v0, %v0, 3 + wfpsosb %f0, %f0, 3 + wfpsosb %v0, %v0, 15 + wfpsosb %v0, %v15, 3 + wfpsosb %v0, %v31, 3 + wfpsosb %v15, %v0, 3 + wfpsosb %v31, %v0, 3 + wfpsosb %v14, %v17, 7 + +#CHECK: wflcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xcc] +#CHECK: wflcsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xcc] +#CHECK: wflcsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xcc] +#CHECK: wflcsb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xcc] +#CHECK: wflcsb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xcc] +#CHECK: wflcsb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xcc] +#CHECK: wflcsb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xcc] + + wflcsb %v0, %v0 + wflcsb %f0, %f0 + wflcsb %v0, %v15 + wflcsb %v0, %v31 + wflcsb %v15, %v0 + wflcsb %v31, %v0 + wflcsb %v14, %v17 + +#CHECK: wflnsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xcc] +#CHECK: wflnsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x20,0xcc] +#CHECK: wflnsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x18,0x20,0xcc] +#CHECK: wflnsb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x18,0x24,0xcc] +#CHECK: wflnsb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x20,0xcc] +#CHECK: wflnsb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x28,0xcc] +#CHECK: wflnsb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x18,0x24,0xcc] + + wflnsb %v0, %v0 + wflnsb %f0, %f0 + wflnsb %v0, %v15 + wflnsb %v0, %v31 + wflnsb %v15, %v0 + wflnsb %v31, %v0 + wflnsb %v14, %v17 + +#CHECK: wflpsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x20,0xcc] +#CHECK: wflpsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x20,0xcc] +#CHECK: wflpsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x28,0x20,0xcc] +#CHECK: wflpsb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x28,0x24,0xcc] +#CHECK: wflpsb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x20,0xcc] +#CHECK: wflpsb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x28,0xcc] +#CHECK: wflpsb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x28,0x24,0xcc] + + wflpsb %v0, %v0 + wflpsb %f0, %f0 + wflpsb %v0, %v15 + wflpsb %v0, %v31 + wflpsb %v15, %v0 + wflpsb %v31, %v0 + wflpsb %v14, %v17 + +#CHECK: wflls %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4] +#CHECK: wflls %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4] +#CHECK: wflls %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4] +#CHECK: wflls %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc4] +#CHECK: wflls %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xc4] +#CHECK: wflls %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc4] +#CHECK: wflls %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xc4] + + wflls %v0, %v0 + wflls %f0, %f0 + wflls %v0, %v15 + wflls %v0, %v31 + wflls %v15, %v0 + wflls %v31, %v0 + wflls %v14, %v17 + +#CHECK: wflrd %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5] +#CHECK: wflrd %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5] +#CHECK: wflrd %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5] +#CHECK: wflrd %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] +#CHECK: wflrd %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] +#CHECK: wflrd %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc5] +#CHECK: wflrd %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc5] +#CHECK: wflrd %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc5] + + wflrd %v0, %v0, 0, 0 + wflrd %f0, %f0, 0, 0 + wflrd %v0, %v0, 0, 15 + wflrd %v0, %v0, 4, 0 + wflrd %v0, %v0, 12, 0 + wflrd %v0, %v31, 0, 0 + wflrd %v31, %v0, 0, 0 + wflrd %v14, %v17, 4, 10 + #CHECK: wfmaxdb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xef] #CHECK: wfmaxdb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xef] #CHECK: wfmaxdb %f0, %f0, %f0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x30,0xef] @@ -1232,6 +2024,22 @@ wfmaxdb %v31, %v0, %v0, 0 wfmaxdb %v18, %v3, %v20, 11 +#CHECK: wfmaxsb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xef] +#CHECK: wfmaxsb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xef] +#CHECK: wfmaxsb %f0, %f0, %f0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x20,0xef] +#CHECK: wfmaxsb %f0, %f0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xef] +#CHECK: wfmaxsb %f0, %v31, %f0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xef] +#CHECK: wfmaxsb %v31, %f0, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xef] +#CHECK: wfmaxsb %v18, %f3, %v20, 11 # encoding: [0xe7,0x23,0x40,0xb8,0x2a,0xef] + + wfmaxsb %v0, %v0, %v0, 0 + wfmaxsb %f0, %f0, %f0, 0 + wfmaxsb %v0, %v0, %v0, 4 + wfmaxsb %v0, %v0, %v31, 0 + wfmaxsb %v0, %v31, %v0, 0 + wfmaxsb %v31, %v0, %v0, 0 + wfmaxsb %v18, %v3, %v20, 11 + #CHECK: wfmindb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xee] #CHECK: wfmindb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xee] #CHECK: wfmindb %f0, %f0, %f0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x30,0xee] @@ -1248,6 +2056,68 @@ wfmindb %v31, %v0, %v0, 0 wfmindb %v18, %v3, %v20, 11 +#CHECK: wfminsb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xee] +#CHECK: wfminsb %f0, %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xee] +#CHECK: wfminsb %f0, %f0, %f0, 4 # encoding: [0xe7,0x00,0x00,0x48,0x20,0xee] +#CHECK: wfminsb %f0, %f0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xee] +#CHECK: wfminsb %f0, %v31, %f0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xee] +#CHECK: wfminsb %v31, %f0, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xee] +#CHECK: wfminsb %v18, %f3, %v20, 11 # encoding: [0xe7,0x23,0x40,0xb8,0x2a,0xee] + + wfminsb %v0, %v0, %v0, 0 + wfminsb %f0, %f0, %f0, 0 + wfminsb %v0, %v0, %v0, 4 + wfminsb %v0, %v0, %v31, 0 + wfminsb %v0, %v31, %v0, 0 + wfminsb %v31, %v0, %v0, 0 + wfminsb %v18, %v3, %v20, 11 + +#CHECK: wfmasb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8f] +#CHECK: wfmasb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8f] +#CHECK: wfmasb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x02,0x08,0xf1,0x8f] +#CHECK: wfmasb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf2,0x08,0x02,0x8f] +#CHECK: wfmasb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x02,0x08,0x04,0x8f] +#CHECK: wfmasb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x02,0x08,0x08,0x8f] +#CHECK: wfmasb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x08,0x97,0x8f] + + wfmasb %v0, %v0, %v0, %v0 + wfmasb %f0, %f0, %f0, %f0 + wfmasb %v0, %v0, %v0, %v31 + wfmasb %v0, %v0, %v31, %v0 + wfmasb %v0, %v31, %v0, %v0 + wfmasb %v31, %v0, %v0, %v0 + wfmasb %v13, %v17, %v21, %v25 + +#CHECK: wfmsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe7] +#CHECK: wfmsb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe7] +#CHECK: wfmsb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe7] +#CHECK: wfmsb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xe7] +#CHECK: wfmsb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xe7] +#CHECK: wfmsb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xe7] + + wfmsb %v0, %v0, %v0 + wfmsb %f0, %f0, %f0 + wfmsb %v0, %v0, %v31 + wfmsb %v0, %v31, %v0 + wfmsb %v31, %v0, %v0 + wfmsb %v18, %v3, %v20 + +#CHECK: wfmssb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8e] +#CHECK: wfmssb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x8e] +#CHECK: wfmssb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x02,0x08,0xf1,0x8e] +#CHECK: wfmssb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf2,0x08,0x02,0x8e] +#CHECK: wfmssb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x02,0x08,0x04,0x8e] +#CHECK: wfmssb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x02,0x08,0x08,0x8e] +#CHECK: wfmssb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x08,0x97,0x8e] + + wfmssb %v0, %v0, %v0, %v0 + wfmssb %f0, %f0, %f0, %f0 + wfmssb %v0, %v0, %v0, %v31 + wfmssb %v0, %v0, %v31, %v0 + wfmssb %v0, %v31, %v0, %v0 + wfmssb %v31, %v0, %v0, %v0 + wfmssb %v13, %v17, %v21, %v25 + #CHECK: wfnmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9f] #CHECK: wfnmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9f] #CHECK: wfnmadb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x9f] @@ -1264,6 +2134,22 @@ wfnmadb %v31, %v0, %v0, %v0 wfnmadb %v13, %v17, %v21, %v25 +#CHECK: wfnmasb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x9f] +#CHECK: wfnmasb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x9f] +#CHECK: wfnmasb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x02,0x08,0xf1,0x9f] +#CHECK: wfnmasb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf2,0x08,0x02,0x9f] +#CHECK: wfnmasb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x02,0x08,0x04,0x9f] +#CHECK: wfnmasb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x02,0x08,0x08,0x9f] +#CHECK: wfnmasb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x08,0x97,0x9f] + + wfnmasb %v0, %v0, %v0, %v0 + wfnmasb %f0, %f0, %f0, %f0 + wfnmasb %v0, %v0, %v0, %v31 + wfnmasb %v0, %v0, %v31, %v0 + wfnmasb %v0, %v31, %v0, %v0 + wfnmasb %v31, %v0, %v0, %v0 + wfnmasb %v13, %v17, %v21, %v25 + #CHECK: wfnmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9e] #CHECK: wfnmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x9e] #CHECK: wfnmsdb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x9e] @@ -1280,3 +2166,67 @@ wfnmsdb %v31, %v0, %v0, %v0 wfnmsdb %v13, %v17, %v21, %v25 +#CHECK: wfnmssb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x9e] +#CHECK: wfnmssb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x02,0x08,0x00,0x9e] +#CHECK: wfnmssb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x02,0x08,0xf1,0x9e] +#CHECK: wfnmssb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf2,0x08,0x02,0x9e] +#CHECK: wfnmssb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x02,0x08,0x04,0x9e] +#CHECK: wfnmssb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x02,0x08,0x08,0x9e] +#CHECK: wfnmssb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x08,0x97,0x9e] + + wfnmssb %v0, %v0, %v0, %v0 + wfnmssb %f0, %f0, %f0, %f0 + wfnmssb %v0, %v0, %v0, %v31 + wfnmssb %v0, %v0, %v31, %v0 + wfnmssb %v0, %v31, %v0, %v0 + wfnmssb %v31, %v0, %v0, %v0 + wfnmssb %v13, %v17, %v21, %v25 + +#CHECK: wfssb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe2] +#CHECK: wfssb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xe2] +#CHECK: wfssb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x22,0xe2] +#CHECK: wfssb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xe2] +#CHECK: wfssb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xe2] +#CHECK: wfssb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x2a,0xe2] + + wfssb %v0, %v0, %v0 + wfssb %f0, %f0, %f0 + wfssb %v0, %v0, %v31 + wfssb %v0, %v31, %v0 + wfssb %v31, %v0, %v0 + wfssb %v18, %v3, %v20 + +#CHECK: wfsqsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xce] +#CHECK: wfsqsb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xce] +#CHECK: wfsqsb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xce] +#CHECK: wfsqsb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xce] +#CHECK: wfsqsb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xce] +#CHECK: wfsqsb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xce] +#CHECK: wfsqsb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xce] + + wfsqsb %v0, %v0 + wfsqsb %f0, %f0 + wfsqsb %v0, %v15 + wfsqsb %v0, %v31 + wfsqsb %v15, %v0 + wfsqsb %v31, %v0 + wfsqsb %v14, %v17 + +#CHECK: wftcisb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0x4a] +#CHECK: wftcisb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0x4a] +#CHECK: wftcisb %f0, %f0, 4095 # encoding: [0xe7,0x00,0xff,0xf8,0x20,0x4a] +#CHECK: wftcisb %f0, %f15, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0x4a] +#CHECK: wftcisb %f0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0x4a] +#CHECK: wftcisb %f15, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0x4a] +#CHECK: wftcisb %v31, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0x4a] +#CHECK: wftcisb %f4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x88,0x24,0x4a] + + wftcisb %v0, %v0, 0 + wftcisb %f0, %f0, 0 + wftcisb %v0, %v0, 4095 + wftcisb %v0, %v15, 0 + wftcisb %v0, %v31, 0 + wftcisb %v15, %v0, 0 + wftcisb %v31, %v0, 0 + wftcisb %v4, %v21, 0x678 +