Lower generic MASSV entries to PowerPC subtarget-specific entries

This patch (second of two patches) lowers the generic PowerPC vector entries to PowerPC subtarget-specific entries. For instance, the PowerPC generic entry 'cbrtd2_massv' is lowered to 'cbrtd2_P9' or Power9 subtarget. The first patch enables the vectorizer to recognize the IBM MASS vector library routines. This patch specifically adds support for recognizing the '-vector-library=MASSV' option, and defines mappings from IEEE standard scalar math functions to generic PowerPC MASS vector counterparts. For instance, the generic PowerPC MASS vector entry for double-precision 'cbrt' function is '__cbrtd2_massv' The overall support for MASS vector library is presented as such in two patches for ease of review. Patch by pjeeva01 (Jeeva P.) Differential Revision: https://reviews.llvm.org/D59883
2025-01-31 12:41:49 +01:00 · 2019-11-04 16:27:23 +00:00 · 2019-11-04 16:27:23 +00:00 · 90c7b69420
commit 90c7b69420
parent 68fcd6e209
7 changed files with 816 additions and 3 deletions
--- a/include/llvm/Analysis/VecFuncs.def
+++ b/include/llvm/Analysis/VecFuncs.def
@ -8,7 +8,14 @@

 // This .def file will create mappings from scalar math functions to vector
 // functions along with their vectorization factor. The current support includes
-// such mappings for Accelerate framework, MASS vector library, and SVML library. 
+// such mappings for Accelerate framework, MASS vector library, and SVML library.
+// This .def file also allows creating an array of vector functions supported in
+// the specified framework or library.
+
+#if defined(TLI_DEFINE_MASSV_VECFUNCS_NAMES)
+#define TLI_DEFINE_MASSV_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) VEC,
+#endif

 #if !(defined(TLI_DEFINE_VECFUNC))
 #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
@ -247,4 +254,4 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
 #undef TLI_DEFINE_ACCELERATE_VECFUNCS
 #undef TLI_DEFINE_MASSV_VECFUNCS
 #undef TLI_DEFINE_SVML_VECFUNCS
-
+#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@ -48,6 +48,7 @@ add_llvm_target(PowerPCCodeGen
  PPCVSXSwapRemoval.cpp
  PPCExpandISEL.cpp
  PPCPreEmitPeephole.cpp
+  PPCLowerMASSVEntries.cpp
  )

 add_subdirectory(AsmParser)
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@ -28,7 +28,8 @@ namespace llvm {
  class AsmPrinter;
  class MCInst;
  class MCOperand;
-
+  class ModulePass;
+  
  FunctionPass *createPPCCTRLoops();
 #ifndef NDEBUG
  FunctionPass *createPPCCTRLoopsVerify();
@ -77,6 +78,10 @@ namespace llvm {

  extern char &PPCVSXFMAMutateID;

+  ModulePass *createPPCLowerMASSVEntriesPass();
+  void initializePPCLowerMASSVEntriesPass(PassRegistry &);
+  extern char &PPCLowerMASSVEntriesID;
+  
  namespace PPCII {

  /// Target Operand Flag enum.
--- a/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
+++ b/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@ -0,0 +1,164 @@
+//===-- PPCLowerMASSVEntries.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of MASSV (SIMD) entries for specific PowerPC
+// subtargets.
+// Following is an example of a conversion specific to Power9 subtarget:
+// __sind2_massv ---> __sind2_P9
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "ppc-lower-massv-entries"
+
+using namespace llvm;
+
+namespace {
+
+// Length of the suffix "massv", which is specific to IBM MASSV library entries.
+const unsigned MASSVSuffixLength = 5;
+
+static StringRef MASSVFuncs[] = {
+#define TLI_DEFINE_MASSV_VECFUNCS_NAMES
+#include "llvm/Analysis/VecFuncs.def"
+};
+
+class PPCLowerMASSVEntries : public ModulePass {
+public:
+  static char ID;
+
+  PPCLowerMASSVEntries() : ModulePass(ID) {}
+
+  bool runOnModule(Module &M) override;
+
+  StringRef getPassName() const override { return "PPC Lower MASS Entries"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+
+private:
+  static bool isMASSVFunc(StringRef Name);
+  static StringRef getCPUSuffix(const PPCSubtarget *Subtarget);
+  static std::string createMASSVFuncName(Function &Func,
+                                         const PPCSubtarget *Subtarget);
+  bool lowerMASSVCall(CallInst *CI, Function &Func, Module &M,
+                      const PPCSubtarget *Subtarget);
+};
+
+} // namespace
+
+/// Checks if the specified function name represents an entry in the MASSV
+/// library.
+bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
+  auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name);
+  return Iter != std::end(MASSVFuncs);
+}
+
+// FIXME:
+/// Returns a string corresponding to the specified PowerPC subtarget. e.g.:
+/// "P8" for Power8, "P9" for Power9. The string is used as a suffix while
+/// generating subtarget-specific MASSV library functions. Current support
+/// includes  Power8 and Power9 subtargets.
+StringRef PPCLowerMASSVEntries::getCPUSuffix(const PPCSubtarget *Subtarget) {
+  // Assume Power8 when Subtarget is unavailable.
+  if (!Subtarget)
+    return "P8";
+  if (Subtarget->hasP9Vector())
+    return "P9";
+  if (Subtarget->hasP8Vector())
+    return "P8";
+
+  report_fatal_error("Unsupported Subtarget: MASSV is supported only on "
+                     "Power8 and Power9 subtargets.");
+}
+
+/// Creates PowerPC subtarget-specific name corresponding to the specified
+/// generic MASSV function, and the PowerPC subtarget.
+std::string
+PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
+                                          const PPCSubtarget *Subtarget) {
+  StringRef Suffix = getCPUSuffix(Subtarget);
+  auto GenericName = Func.getName().drop_back(MASSVSuffixLength).str();
+  std::string MASSVEntryName = GenericName + Suffix.str();
+  return MASSVEntryName;
+}
+
+/// Lowers generic MASSV entries to PowerPC subtarget-specific MASSV entries.
+/// e.g.: __sind2_massv --> __sind2_P9 for a Power9 subtarget.
+/// Both function prototypes and their callsites are updated during lowering.
+bool PPCLowerMASSVEntries::lowerMASSVCall(CallInst *CI, Function &Func,
+                                          Module &M,
+                                          const PPCSubtarget *Subtarget) {
+  if (CI->use_empty())
+    return false;
+
+  std::string MASSVEntryName = createMASSVFuncName(Func, Subtarget);
+  FunctionCallee FCache = M.getOrInsertFunction(
+      MASSVEntryName, Func.getFunctionType(), Func.getAttributes());
+
+  CallSite CS(CI);
+  CI->setCalledFunction(FCache);  
+
+  return true;
+}
+
+bool PPCLowerMASSVEntries::runOnModule(Module &M) {
+  bool Changed = false;
+
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+  if (!TPC)
+    return Changed;
+
+  auto &TM = TPC->getTM<PPCTargetMachine>();
+  const PPCSubtarget *Subtarget;
+
+  for (Function &Func : M) {
+    if (!Func.isDeclaration())
+      continue;
+
+    if (!isMASSVFunc(Func.getName()))
+      continue;
+
+    // Call to lowerMASSVCall() invalidates the iterator over users upon
+    // replacing the users. Precomputing the current list of users allows us to
+    // replace all the call sites.
+    SmallVector<User *, 4> MASSVUsers;
+    for (auto *User: Func.users())
+      MASSVUsers.push_back(User);
+    
+    for (auto *User : MASSVUsers) {
+      auto *CI = dyn_cast<CallInst>(User);
+      if (!CI)
+        continue;
+
+      Subtarget = &TM.getSubtarget<PPCSubtarget>(*CI->getParent()->getParent());
+      Changed |= lowerMASSVCall(CI, Func, M, Subtarget);
+    }
+  }
+
+  return Changed;
+}
+
+char PPCLowerMASSVEntries::ID = 0;
+
+char &llvm::PPCLowerMASSVEntriesID = PPCLowerMASSVEntries::ID;
+
+INITIALIZE_PASS(PPCLowerMASSVEntries, DEBUG_TYPE, "Lower MASSV entries", false,
+                false)
+
+ModulePass *llvm::createPPCLowerMASSVEntriesPass() {
+  return new PPCLowerMASSVEntries();
+}
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@ -119,6 +119,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
  initializePPCPreEmitPeepholePass(PR);
  initializePPCTLSDynamicCallPass(PR);
  initializePPCMIPeepholePass(PR);
+  initializePPCLowerMASSVEntriesPass(PR);
 }

 /// Return the datalayout string of a subtarget.
@ -401,6 +402,9 @@ void PPCPassConfig::addIRPasses() {
    addPass(createPPCBoolRetToIntPass());
  addPass(createAtomicExpandPass());

+  // Lower generic MASSV routines to PowerPC subtarget-specific entries.
+  addPass(createPPCLowerMASSVEntriesPass());
+  
  // For the BG/Q (or if explicitly requested), add explicit data prefetch
  // intrinsics.
  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
--- a/test/CodeGen/PowerPC/lower-massv-attr.ll
+++ b/test/CodeGen/PowerPC/lower-massv-attr.ll
@ -0,0 +1,29 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr9  < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=-power9-vector | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s 
+; RUN: llc -verify-machineinstrs -mcpu=pwr8  < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+power9-vector | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s 
+
+declare <2 x double> @__cbrtd2_massv(<2 x double>)
+declare <4 x float> @__cbrtf4_massv(<4 x float>)
+
+; cbrt without the power9-vector attribute on the caller
+; check massv calls are correctly targeted for Power8
+define <2 x double>  @cbrt_f64_massv_nopwr9(<2 x double> %opnd) #0 {
+; CHECK-ALL-LABEL: @cbrt_f64_massv_nopwr9
+; CHECK-PWR8: bl __cbrtd2_P8
+; CHECK-NOT: bl __cbrtd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__cbrtd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+; cbrt with the power9-vector attribute on the caller
+; check massv calls are correctly targeted for Power9
+define <2 x double>  @cbrt_f64_massv_pwr9(<2 x double> %opnd) #1 {
+; CHECK-ALL-LABEL: @cbrt_f64_massv_pwr9
+; CHECK-PWR9: bl __cbrtd2_P9
+; CHECK-NOT: bl __cbrtd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__cbrtd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
--- a/test/CodeGen/PowerPC/lower-massv.ll
+++ b/test/CodeGen/PowerPC/lower-massv.ll
@ -0,0 +1,603 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr9  < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR9,CHECK-ALL %s 
+; RUN: llc -verify-machineinstrs -mcpu=pwr8  < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-PWR8,CHECK-ALL %s 
+; RUN: llc -verify-machineinstrs -mcpu=pwr8  < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefixes=CHECK-DFLT,CHECK-ALL %s 
+
+declare <2 x double> @__cbrtd2_massv(<2 x double>)
+declare <4 x float> @__cbrtf4_massv(<4 x float>)
+
+declare <2 x double> @__powd2_massv(<2 x double>, <2 x double>)
+declare <4 x float> @__powf4_massv(<4 x float>, <4 x float>)
+
+declare <2 x double> @__sqrtd2_massv(<2 x double>)
+declare <4 x float> @__sqrtf4_massv(<4 x float>)
+
+declare <2 x double> @__expd2_massv(<2 x double>)
+declare <4 x float> @__expf4_massv(<4 x float>)
+
+declare <2 x double> @__exp2d2_massv(<2 x double>)
+declare <4 x float> @__exp2f4_massv(<4 x float>)
+
+declare <2 x double> @__expm1d2_massv(<2 x double>)
+declare <4 x float> @__expm1f4_massv(<4 x float>)
+
+declare <2 x double> @__logd2_massv(<2 x double>)
+declare <4 x float> @__logf4_massv(<4 x float>)
+
+declare <2 x double> @__log1pd2_massv(<2 x double>)
+declare <4 x float> @__log1pf4_massv(<4 x float>)
+
+declare <2 x double> @__log10d2_massv(<2 x double>)
+declare <4 x float> @__log10f4_massv(<4 x float>)
+
+declare <2 x double> @__log2d2_massv(<2 x double>)
+declare <4 x float> @__log2f4_massv(<4 x float>)
+
+declare <2 x double> @__sind2_massv(<2 x double>)
+declare <4 x float> @__sinf4_massv(<4 x float>)
+
+declare <2 x double> @__cosd2_massv(<2 x double>)
+declare <4 x float> @__cosf4_massv(<4 x float>)
+
+declare <2 x double> @__tand2_massv(<2 x double>)
+declare <4 x float> @__tanf4_massv(<4 x float>)
+
+declare <2 x double> @__asind2_massv(<2 x double>)
+declare <4 x float> @__asinf4_massv(<4 x float>)
+
+declare <2 x double> @__acosd2_massv(<2 x double>)
+declare <4 x float> @__acosf4_massv(<4 x float>)
+
+declare <2 x double> @__atand2_massv(<2 x double>)
+declare <4 x float> @__atanf4_massv(<4 x float>)
+
+declare <2 x double> @__atan2d2_massv(<2 x double>)
+declare <4 x float> @__atan2f4_massv(<4 x float>)
+
+declare <2 x double> @__sinhd2_massv(<2 x double>)
+declare <4 x float> @__sinhf4_massv(<4 x float>)
+
+declare <2 x double> @__coshd2_massv(<2 x double>)
+declare <4 x float> @__coshf4_massv(<4 x float>)
+
+declare <2 x double> @__tanhd2_massv(<2 x double>)
+declare <4 x float> @__tanhf4_massv(<4 x float>)
+
+declare <2 x double> @__asinhd2_massv(<2 x double>)
+declare <4 x float> @__asinhf4_massv(<4 x float>)
+
+declare <2 x double> @__acoshd2_massv(<2 x double>)
+declare <4 x float> @__acoshf4_massv(<4 x float>)
+
+declare <2 x double> @__atanhd2_massv(<2 x double>)
+declare <4 x float> @__atanhf4_massv(<4 x float>)
+
+; following tests check generation of subtarget-specific calls
+; cbrt
+define <2 x double>  @cbrt_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @cbrt_f64_massv
+; CHECK-PWR9: bl __cbrtd2_P9
+; CHECK-PWR8: bl __cbrtd2_P8
+; CHECK-NOT: bl __cbrtd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__cbrtd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @cbrt_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @cbrt_f32_massv
+; CHECK-PWR9: bl __cbrtf4_P9
+; CHECK-PWR8: bl __cbrtf4_P8
+; CHECK-NOT: bl __cbrtf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__cbrtf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; pow
+define <2 x double>  @pow_f64_massv(<2 x double> %opnd1, <2 x double> %opnd2) {
+; CHECK-ALL-LABEL: @pow_f64_massv
+; CHECK-PWR9: bl __powd2_P9
+; CHECK-PWR8: bl __powd2_P8
+; CHECK-NOT: bl __powd2_massv
+; CHECK-ALL: blr
+;
+ %1 = call <2 x double> @__powd2_massv(<2 x double> %opnd1, <2 x double> %opnd2)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @pow_f32_massv(<4 x float> %opnd1, <4 x float> %opnd2) {
+; CHECK-ALL-LABEL: @pow_f32_massv
+; CHECK-PWR9: bl __powf4_P9
+; CHECK-PWR8: bl __powf4_P8
+; CHECK-NOT: bl __powf4_massv
+; CHECK-ALL: blr
+;
+ %1 = call <4 x float> @__powf4_massv(<4 x float> %opnd1, <4 x float> %opnd2)
+  ret <4 x float> %1 
+}
+
+; sqrt
+define <2 x double>  @sqrt_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @sqrt_f64_massv
+; CHECK-PWR9: bl __sqrtd2_P9
+; CHECK-PWR8: bl __sqrtd2_P8
+; CHECK-NOT: bl __sqrtd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__sqrtd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @sqrt_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @sqrt_f32_massv
+; CHECK-PWR9: bl __sqrtf4_P9
+; CHECK-PWR8: bl __sqrtf4_P8
+; CHECK-NOT: bl __sqrtf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__sqrtf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; exp
+define <2 x double>  @exp_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @exp_f64_massv
+; CHECK-PWR9: bl __expd2_P9
+; CHECK-PWR8: bl __expd2_P8
+; CHECK-NOT: bl __expd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__expd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @exp_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @exp_f32_massv
+; CHECK-PWR9: bl __expf4_P9
+; CHECK-PWR8: bl __expf4_P8
+; CHECK-NOT: bl __expf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__expf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; exp2
+define <2 x double>  @exp2_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @exp2_f64_massv
+; CHECK-PWR9: bl __exp2d2_P9
+; CHECK-PWR8: bl __exp2d2_P8
+; CHECK-NOT: bl __exp2d2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__exp2d2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @exp2_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @exp2_f32_massv
+; CHECK-PWR9: bl __exp2f4_P9
+; CHECK-PWR8: bl __exp2f4_P8
+; CHECK-NOT: bl __exp2f4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__exp2f4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; expm1
+define <2 x double>  @expm1_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @expm1_f64_massv
+; CHECK-PWR9: bl __expm1d2_P9
+; CHECK-PWR8: bl __expm1d2_P8
+; CHECK-NOT: bl __expm1d2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__expm1d2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @expm1_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @expm1_f32_massv
+; CHECK-PWR9: bl __expm1f4_P9
+; CHECK-PWR8: bl __expm1f4_P8
+; CHECK-NOT: bl __expm1f4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__expm1f4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; log
+define <2 x double>  @log_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @log_f64_massv
+; CHECK-PWR9: bl __logd2_P9
+; CHECK-PWR8: bl __logd2_P8
+; CHECK-NOT: bl __logd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__logd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @log_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @log_f32_massv
+; CHECK-PWR9: bl __logf4_P9
+; CHECK-PWR8: bl __logf4_P8
+; CHECK-NOT: bl __logf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__logf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; log1p
+define <2 x double>  @log1p_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @log1p_f64_massv
+; CHECK-PWR9: bl __log1pd2_P9
+; CHECK-PWR8: bl __log1pd2_P8
+; CHECK-NOT: bl __log1pd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__log1pd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @log1p_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @log1p_f32_massv
+; CHECK-PWR9: bl __log1pf4_P9
+; CHECK-PWR8: bl __log1pf4_P8
+; CHECK-NOT: bl __log1pf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__log1pf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; log10
+define <2 x double>  @log10_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @log10_f64_massv
+; CHECK-PWR9: bl __log10d2_P9
+; CHECK-PWR8: bl __log10d2_P8
+; CHECK-NOT: bl __log10d2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__log10d2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @log10_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @log10_f32_massv
+; CHECK-PWR9: bl __log10f4_P9
+; CHECK-PWR8: bl __log10f4_P8
+; CHECK-NOT: bl __log10f4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__log10f4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; log2
+define <2 x double>  @log2_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @log2_f64_massv
+; CHECK-PWR9: bl __log2d2_P9
+; CHECK-PWR8: bl __log2d2_P8
+; CHECK-NOT: bl __log2d2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__log2d2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @log2_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @log2_f32_massv
+; CHECK-PWR9: bl __log2f4_P9
+; CHECK-PWR8: bl __log2f4_P8
+; CHECK-NOT: bl __log2f4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__log2f4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; sin
+define <2 x double>  @sin_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @sin_f64_massv
+; CHECK-PWR9: bl __sind2_P9
+; CHECK-PWR8: bl __sind2_P8
+; CHECK-NOT: bl __sind2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__sind2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @sin_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @sin_f32_massv
+; CHECK-PWR9: bl __sinf4_P9
+; CHECK-PWR8: bl __sinf4_P8
+; CHECK-NOT: bl __sinf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__sinf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; cos
+define <2 x double>  @cos_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @cos_f64_massv
+; CHECK-PWR9: bl __cosd2_P9
+; CHECK-PWR8: bl __cosd2_P8
+; CHECK-NOT: bl __cosd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__cosd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @cos_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @cos_f32_massv
+; CHECK-PWR9: bl __cosf4_P9
+; CHECK-PWR8: bl __cosf4_P8
+; CHECK-NOT: bl __cosf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__cosf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; tan
+define <2 x double>  @tan_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @tan_f64_massv
+; CHECK-PWR9: bl __tand2_P9
+; CHECK-PWR8: bl __tand2_P8
+; CHECK-NOT: bl __tand2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__tand2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @tan_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @tan_f32_massv
+; CHECK-PWR9: bl __tanf4_P9
+; CHECK-PWR8: bl __tanf4_P8
+; CHECK-NOT: bl __tanf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__tanf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; asin
+define <2 x double>  @asin_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @asin_f64_massv
+; CHECK-PWR9: bl __asind2_P9
+; CHECK-PWR8: bl __asind2_P8
+; CHECK-NOT: bl __asind2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__asind2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @asin_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @asin_f32_massv
+; CHECK-PWR9: bl __asinf4_P9
+; CHECK-PWR8: bl __asinf4_P8
+; CHECK-NOT: bl __asinf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__asinf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; acos
+define <2 x double>  @acos_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @acos_f64_massv
+; CHECK-PWR9: bl __acosd2_P9
+; CHECK-PWR8: bl __acosd2_P8
+; CHECK-NOT: bl __acosd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__acosd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @acos_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @acos_f32_massv
+; CHECK-PWR9: bl __acosf4_P9
+; CHECK-PWR8: bl __acosf4_P8
+; CHECK-NOT: bl __acosf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__acosf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; atan
+define <2 x double>  @atan_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @atan_f64_massv
+; CHECK-PWR9: bl __atand2_P9
+; CHECK-PWR8: bl __atand2_P8
+; CHECK-NOT: bl __atand2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__atand2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @atan_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @atan_f32_massv
+; CHECK-PWR9: bl __atanf4_P9
+; CHECK-PWR8: bl __atanf4_P8
+; CHECK-NOT: bl __atanf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__atanf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; atan2
+define <2 x double>  @atan2_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @atan2_f64_massv
+; CHECK-PWR9: bl __atan2d2_P9
+; CHECK-PWR8: bl __atan2d2_P8
+; CHECK-NOT: bl __atan2d2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__atan2d2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @atan2_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @atan2_f32_massv
+; CHECK-PWR9: bl __atan2f4_P9
+; CHECK-PWR8: bl __atan2f4_P8
+; CHECK-NOT: bl __atan2f4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__atan2f4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; sinh
+define <2 x double>  @sinh_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @sinh_f64_massv
+; CHECK-PWR9: bl __sinhd2_P9
+; CHECK-PWR8: bl __sinhd2_P8
+; CHECK-NOT: bl __sinhd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__sinhd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @sinh_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @sinh_f32_massv
+; CHECK-PWR9: bl __sinhf4_P9
+; CHECK-PWR8: bl __sinhf4_P8
+; CHECK-NOT: bl __sinhf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__sinhf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; cosh
+define <2 x double>  @cosh_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @cosh_f64_massv
+; CHECK-PWR9: bl __coshd2_P9
+; CHECK-PWR8: bl __coshd2_P8
+; CHECK-NOT: bl __coshd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__coshd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @cosh_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @cosh_f32_massv
+; CHECK-PWR9: bl __coshf4_P9
+; CHECK-PWR8: bl __coshf4_P8
+; CHECK-NOT: bl __coshf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__coshf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; tanh
+define <2 x double>  @tanh_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @tanh_f64_massv
+; CHECK-PWR9: bl __tanhd2_P9
+; CHECK-PWR8: bl __tanhd2_P8
+; CHECK-NOT: bl __tanhd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__tanhd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @tanh_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @tanh_f32_massv
+; CHECK-PWR9: bl __tanhf4_P9
+; CHECK-PWR8: bl __tanhf4_P8
+; CHECK-NOT: bl __tanhf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__tanhf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; asinh
+define <2 x double>  @asinh_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @asinh_f64_massv
+; CHECK-PWR9: bl __asinhd2_P9
+; CHECK-PWR8: bl __asinhd2_P8
+; CHECK-NOT: bl __asinhd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__asinhd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @asinh_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @asinh_f32_massv
+; CHECK-PWR9: bl __asinhf4_P9
+; CHECK-PWR8: bl __asinhf4_P8
+; CHECK-NOT: bl __asinhf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__asinhf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; acosh
+define <2 x double>  @acosh_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @acosh_f64_massv
+; CHECK-PWR9: bl __acoshd2_P9
+; CHECK-PWR8: bl __acoshd2_P8
+; CHECK-NOT: bl __acoshd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__acoshd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @acosh_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @acosh_f32_massv
+; CHECK-PWR9: bl __acoshf4_P9
+; CHECK-PWR8: bl __acoshf4_P8
+; CHECK-NOT: bl __acoshf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__acoshf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+
+; atanh
+define <2 x double>  @atanh_f64_massv(<2 x double> %opnd) {
+; CHECK-ALL-LABEL: @atanh_f64_massv
+; CHECK-PWR9: bl __atanhd2_P9
+; CHECK-PWR8: bl __atanhd2_P8
+; CHECK-NOT: bl __atanhd2_massv
+; CHECK-ALL: blr
+;
+  %1 = call <2 x double> @__atanhd2_massv(<2 x double> %opnd)
+  ret <2 x double> %1 
+}
+
+define <4 x float>  @atanh_f32_massv(<4 x float> %opnd) {
+; CHECK-ALL-LABEL: @atanh_f32_massv
+; CHECK-PWR9: bl __atanhf4_P9
+; CHECK-PWR8: bl __atanhf4_P8
+; CHECK-NOT: bl __atanhf4_massv
+; CHECK-ALL: blr
+;
+  %1 = call <4 x float> @__atanhf4_massv(<4 x float> %opnd)
+  ret <4 x float> %1 
+}
+