Re-land "[PowerPC] Remove QPX/A2Q BGQ/BGP CNK support"

This reverts commit bf544fa1c3cb80f24d85e84559fb11193846259f. Fixed the typo in PPCInstrInfo.cpp.
2024-11-26 12:43:36 +01:00 · 2020-07-28 03:02:20 +00:00 · 2020-07-28 03:02:20 +00:00 · a3d207d6bc
commit a3d207d6bc
parent 9db223cc66
118 changed files with 153 additions and 6390 deletions
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@ -4310,14 +4310,9 @@ PowerPC:
 - ``r``: A 32 or 64-bit integer register.
 - ``b``: A 32 or 64-bit integer register, excluding ``R0`` (that is:
  ``R1-R31``).
- ``f``: A 32 or 64-bit float register (``F0-F31``), or when QPX is enabled, a
+- ``f``: A 32 or 64-bit float register (``F0-F31``), 
-  128 or 256-bit QPX register (``Q0-Q31``; aliases the ``F`` registers).
+- ``v``: For ``4 x f32`` or ``4 x f64`` types, a 128-bit altivec vector
- ``v``: For ``4 x f32`` or ``4 x f64`` types, when QPX is enabled, a
+   register (``V0-V31``).
  128 or 256-bit QPX register (``Q0-Q31``), otherwise a 128-bit
  altivec vector register (``V0-V31``).
  .. FIXME: is this a bug that v accepts QPX registers? I think this
     is supposed to only use the altivec vector registers?
 - ``y``: Condition register (``CR0-CR7``).
 - ``wc``: An individual CR bit in a CR register.
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@ -142,8 +142,6 @@ public:
    Apple,
    PC,
    SCEI,
    BGP,
    BGQ,
    Freescale,
    IBM,
    ImaginationTechnologies,
@ -179,7 +177,6 @@ public:
    Minix,
    RTEMS,
    NaCl,       // Native Client
    CNK,        // BG/P Compute-Node Kernel
    AIX,
    CUDA,       // NVIDIA CUDA
    NVCL,       // NVIDIA OpenCL
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@ -1109,182 +1109,6 @@ def  int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">,
                 [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
 // PowerPC QPX Intrinsics.
 //
 let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
  /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
  class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
                              list<LLVMType> param_types,
                              list<IntrinsicProperty> properties>
    : GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
      Intrinsic<ret_types, param_types, properties>;
 }
 //===----------------------------------------------------------------------===//
 // PowerPC QPX Intrinsic Class Definitions.
 //
 /// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
 /// vector and returns one.  These intrinsics have no side effects.
 class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
                          [llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
 /// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
 /// vectors and returns one.  These intrinsics have no side effects.
 class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
                          [llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
                          [IntrNoMem]>;
 /// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
 /// vectors and returns one.  These intrinsics have no side effects.
 class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
                          [llvm_v4f64_ty],
                          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                          [IntrNoMem]>;
 /// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
 /// and returns a v4f64.
 class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
                          [llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
 /// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
 /// and returns a v4f64 permutation.
 class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
                          [llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
 /// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
 /// and stores a v4f64.
 class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
                          [], [llvm_v4f64_ty, llvm_ptr_ty],
                          [IntrWriteMem, IntrArgMemOnly]>;
 //===----------------------------------------------------------------------===//
 // PowerPC QPX Intrinsic Definitions.
 let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
  // Add Instructions
  def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
  def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
  def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
  def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
  // Estimate Instructions
  def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
  def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
  def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
  def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
  // Multiply Instructions
  def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
  def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
  def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
  def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
  // Multiply-add instructions
  def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
  def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
  def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
  def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
  def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
  def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
  def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
  def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
  def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
  def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
  def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
  def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
  def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
  def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
  def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
  def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
  // Select Instruction
  def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
  // Permute Instruction
  def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
  // Convert and Round Instructions
  def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
  def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
  def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
  def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
  def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
  def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
  def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
  def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
  def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
  def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
  def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
  def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
  def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
  def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
  def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
  def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
  def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
  // Move Instructions
  def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
  def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
  def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
  def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
  // Compare Instructions
  def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
  def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
  def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
  def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
  // Load instructions
  def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
  def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
  def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
  def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
  def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
  def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
  def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
  def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
  def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
  def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
  def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
  def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
  def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
  def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
  def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
  def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
  // Store instructions
  def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
  def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
  def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
  def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
  def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
  def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
  def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
  def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
  def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
  def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
  // Logical and permutation formation
  def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
                          [llvm_v4f64_ty],
                          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
                          [IntrNoMem]>;
  def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
                          [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
 // PowerPC HTM Intrinsic Definitions.
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@ -160,8 +160,6 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
  case AMD: return "amd";
  case Apple: return "apple";
  case BGP: return "bgp";
  case BGQ: return "bgq";
  case CSR: return "csr";
  case Freescale: return "fsl";
  case IBM: return "ibm";
@ -187,7 +185,6 @@ StringRef Triple::getOSTypeName(OSType Kind) {
  case AMDHSA: return "amdhsa";
  case AMDPAL: return "amdpal";
  case Ananas: return "ananas";
  case CNK: return "cnk";
  case CUDA: return "cuda";
  case CloudABI: return "cloudabi";
  case Contiki: return "contiki";
@ -470,8 +467,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
    .Case("apple", Triple::Apple)
    .Case("pc", Triple::PC)
    .Case("scei", Triple::SCEI)
    .Case("bgp", Triple::BGP)
    .Case("bgq", Triple::BGQ)
    .Case("fsl", Triple::Freescale)
    .Case("ibm", Triple::IBM)
    .Case("img", Triple::ImaginationTechnologies)
@ -508,7 +503,6 @@ static Triple::OSType parseOS(StringRef OSName) {
    .StartsWith("minix", Triple::Minix)
    .StartsWith("rtems", Triple::RTEMS)
    .StartsWith("nacl", Triple::NaCl)
    .StartsWith("cnk", Triple::CNK)
    .StartsWith("aix", Triple::AIX)
    .StartsWith("cuda", Triple::CUDA)
    .StartsWith("nvcl", Triple::NVCL)
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@ -492,21 +492,6 @@ public:
    Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
  }
  void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
    assert(N == 1 && "Invalid number of operands!");
    Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
  }
  void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
    assert(N == 1 && "Invalid number of operands!");
    Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
  }
  void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
    assert(N == 1 && "Invalid number of operands!");
    Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
  }
  void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
    assert(N == 1 && "Invalid number of operands!");
    Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
@ -1207,9 +1192,6 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
    } else if (Name.startswith_lower("v") &&
               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
      RegNo = VRegs[IntVal];
    } else if (Name.startswith_lower("q") &&
               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
      RegNo = QFRegs[IntVal];
    } else if (Name.startswith_lower("cr") &&
               !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
      RegNo = CRRegs[IntVal];
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@ -36,7 +36,6 @@ add_llvm_target(PowerPCCodeGen
  PPCMacroFusion.cpp
  PPCMIPeephole.cpp
  PPCRegisterInfo.cpp
  PPCQPXLoadSplat.cpp
  PPCSubtarget.cpp
  PPCTargetMachine.cpp
  PPCTargetObjectFile.cpp
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@ -167,12 +167,6 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
 #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
 #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
 static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                            uint64_t Address,
                                            const void *Decoder) {
  return decodeRegisterClass(Inst, RegNo, QFRegs);
 }
 static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                            uint64_t Address,
                                            const void *Decoder) {
@ -401,12 +395,7 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
  // Read the instruction in the proper endianness.
  uint64_t Inst = ReadFunc(Bytes.data());
-  if (STI.getFeatureBits()[PPC::FeatureQPX]) {
+  if (STI.getFeatureBits()[PPC::FeatureSPE]) {
    DecodeStatus result =
      decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
    if (result != MCDisassembler::Fail)
      return result;
  } else if (STI.getFeatureBits()[PPC::FeatureSPE]) {
    DecodeStatus result =
        decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
    if (result != MCDisassembler::Fail)
--- a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@ -49,18 +49,6 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
 void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
  const char *RegName = getRegisterName(RegNo);
  if (RegName[0] == 'q' /* QPX */) {
    // The system toolchain on the BG/Q does not understand QPX register names
    // in .cfi_* directives, so print the name of the floating-point
    // subregister instead.
    std::string RN(RegName);
    RN[0] = 'f';
    OS << RN;
    return;
  }
  OS << RegName;
 }
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@ -159,7 +159,6 @@ using llvm::MCPhysReg;
  static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
  static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
  static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
  static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
  static const MCPhysReg RRegsNoR0[32] = \
    PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
  static const MCPhysReg XRegsNoX0[32] = \
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@ -44,7 +44,6 @@ namespace llvm {
  FunctionPass *createPPCMIPeepholePass();
  FunctionPass *createPPCBranchSelectionPass();
  FunctionPass *createPPCBranchCoalescingPass();
  FunctionPass *createPPCQPXLoadSplatPass();
  FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
  FunctionPass *createPPCTLSDynamicCallPass();
  FunctionPass *createPPCBoolRetToIntPass();
@ -68,7 +67,6 @@ namespace llvm {
  void initializePPCReduceCRLogicalsPass(PassRegistry&);
  void initializePPCBSelPass(PassRegistry&);
  void initializePPCBranchCoalescingPass(PassRegistry&);
  void initializePPCQPXLoadSplatPass(PassRegistry&);
  void initializePPCBoolRetToIntPass(PassRegistry&);
  void initializePPCExpandISELPass(PassRegistry &);
  void initializePPCPreEmitPeepholePass(PassRegistry &);
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@ -132,9 +132,6 @@ def FeaturePPC4xx    : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
                                        "Enable PPC 4xx instructions">;
 def FeaturePPC6xx    : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
                                        "Enable PPC 6xx instructions">;
 def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
                                        "Enable QPX instructions",
                                        [FeatureFPU]>;
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                        "Enable VSX instructions",
                                        [FeatureAltivec]>;
@ -193,7 +190,7 @@ def FeatureFloat128 :
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD",
                                        "POPCNTD_Fast",
                                        "Enable the popcnt[dw] instructions">;
-// Note that for the a2/a2q processor models we should not use popcnt[dw] by
+// Note that for the a2 processor models we should not use popcnt[dw] by
 // default. These processors do support the instructions, but they're
 // microcoded, and the software emulation is about twice as fast.
 def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
@ -514,15 +511,6 @@ def : ProcessorModel<"a2", PPCA2Model,
                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
                   FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
                   Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
 def : ProcessorModel<"a2q", PPCA2Model,
                  [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
                   FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
                   FeatureSTFIWX, FeatureLFIWAX,
                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
                   FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
                   Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
                   FeatureMFTB]>;
 def : ProcessorModel<"pwr3", G5Model,
                  [DirectivePwr3, FeatureAltivec,
                   FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@ -549,9 +549,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
        if (Subtarget->hasSPE()) {
          if (PPC::F4RCRegClass.contains(Reg) ||
              PPC::F8RCRegClass.contains(Reg) ||
              PPC::QBRCRegClass.contains(Reg) ||
              PPC::QFRCRegClass.contains(Reg) ||
              PPC::QSRCRegClass.contains(Reg) ||
              PPC::VFRCRegClass.contains(Reg) ||
              PPC::VRRCRegClass.contains(Reg) ||
              PPC::VSFRCRegClass.contains(Reg) ||
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@ -61,9 +61,6 @@ def RetCC_PPC_Cold : CallingConv<[
  CCIfType<[f64], CCAssignToReg<[F1]>>,
  CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
  CCIfType<[v4f64, v4f32, v4i1],
           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,
  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
           CCIfSubtarget<"hasAltivec()",
           CCAssignToReg<[V2]>>>
@ -98,10 +95,6 @@ def RetCC_PPC : CallingConv<[
           CCIfSubtarget<"hasP9Vector()",
           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
  // QPX vectors are returned in QF1 and QF2. 
  CCIfType<[v4f64, v4f32, v4i1],
           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
  // Vector types returned as "direct" go into V2 .. V9; note that only the
  // ELFv2 ABI fully utilizes all these registers.
  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
@ -158,8 +151,6 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
  CCIfType<[f128],
           CCIfSubtarget<"hasP9Vector()",
           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
  CCIfType<[v4f64, v4f32, v4i1],
           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
           CCIfSubtarget<"hasAltivec()",
           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
@ -223,9 +214,6 @@ def CC_PPC32_SVR4_Common : CallingConv<[
  CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
  CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
  // QPX vectors that are stored in double precision need 32-byte alignment.
  CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
  // Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
  CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>,
  CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
@ -243,10 +231,6 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
 // put vector arguments in vector registers before putting them on the stack.
 let Entry = 1 in
 def CC_PPC32_SVR4 : CallingConv<[
  // QPX vectors mirror the scalar FP convention.
  CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
    CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
  // The first 12 Vector arguments are passed in AltiVec registers.
  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
           CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -4142,7 +4142,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
  // Altivec Vector compare instructions do not set any CR register by default and
  // vector compare operations return the same type as the operands.
  if (LHS.getValueType().isVector()) {
-    if (Subtarget->hasQPX() || Subtarget->hasSPE())
+    if (Subtarget->hasSPE())
      return false;
    EVT VecVT = LHS.getValueType();
@ -4813,8 +4813,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
        switch (LoadedVT.getSimpleVT().SimpleTy) {
          default: llvm_unreachable("Invalid PPC load type!");
          case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
          case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
          case MVT::f64: Opcode = PPC::LFDUX; break;
          case MVT::f32: Opcode = PPC::LFSUX; break;
          case MVT::i32: Opcode = PPC::LWZUX; break;
@ -5095,12 +5093,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
      SelectCCOp = PPC::SELECT_CC_F16;
    else if (Subtarget->hasSPE())
      SelectCCOp = PPC::SELECT_CC_SPE;
    else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
      SelectCCOp = PPC::SELECT_CC_QFRC;
    else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
      SelectCCOp = PPC::SELECT_CC_QSRC;
    else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
      SelectCCOp = PPC::SELECT_CC_QBRC;
    else if (N->getValueType(0) == MVT::v2f64 ||
             N->getValueType(0) == MVT::v2i64)
      SelectCCOp = PPC::SELECT_CC_VSRC;
@ -5856,9 +5848,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
      case PPC::SELECT_I8:
      case PPC::SELECT_F4:
      case PPC::SELECT_F8:
      case PPC::SELECT_QFRC:
      case PPC::SELECT_QSRC:
      case PPC::SELECT_QBRC:
      case PPC::SELECT_SPE:
      case PPC::SELECT_SPE4:
      case PPC::SELECT_VRRC:
@ -6177,9 +6166,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
      case PPC::SELECT_I8:
      case PPC::SELECT_F4:
      case PPC::SELECT_F8:
      case PPC::SELECT_QFRC:
      case PPC::SELECT_QSRC:
      case PPC::SELECT_QBRC:
      case PPC::SELECT_SPE:
      case PPC::SELECT_SPE4:
      case PPC::SELECT_VRRC:
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@ -427,22 +427,6 @@ namespace llvm {
    ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
    VABSD,
    /// QVFPERM = This corresponds to the QPX qvfperm instruction.
    QVFPERM,
    /// QVGPCI = This corresponds to the QPX qvgpci instruction.
    QVGPCI,
    /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
    QVALIGNI,
    /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
    QVESPLATI,
    /// QBFLT = Access the underlying QPX floating-point boolean
    /// representation.
    QBFLT,
    /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
    /// lower (IDX=1) half of v4f32 to v2f64.
    FP_EXTEND_HALF,
@ -519,10 +503,6 @@ namespace llvm {
    /// Store scalar integers from VSR.
    ST_VSR_SCAL_INT,
    /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
    /// The 4xf32 load used for v4i1 constants.
    QVLFSb,
    /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
    /// except they ensure that the compare input is zero-extended for
    /// sub-word versions because the atomic loads zero-extend.
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@ -642,7 +642,6 @@ class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
  let FRA = 0;
 }
 // Used for QPX
 class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
         : I<opcode, OOL, IOL, asmstr, itin> {
@ -1781,14 +1780,6 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
  let Inst{31}    = 0;
 }
 // Used for QPX
 class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
              InstrItinClass itin, list<dag> pattern>
  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
  let FRA = 0;
  let FRC = 0;
 }
 // 1.7.13 M-Form
 class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
              InstrItinClass itin, list<dag> pattern>
@ -2099,49 +2090,6 @@ class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr,
  let Inst{23-31} = xo;
 }
 // Z23-Form (used by QPX)
 class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
              InstrItinClass itin, list<dag> pattern>
         : I<opcode, OOL, IOL, asmstr, itin> {
  bits<5> FRT;
  bits<5> FRA;
  bits<5> FRB;
  bits<2> idx;
  let Pattern = pattern;
  bit RC = 0;    // set by isRecordForm
  let Inst{6-10}  = FRT;
  let Inst{11-15} = FRA;
  let Inst{16-20} = FRB;
  let Inst{21-22} = idx;
  let Inst{23-30} = xo;
  let Inst{31}    = RC;
 }
 class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
              InstrItinClass itin, list<dag> pattern>
  : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
  let FRB = 0;
 }
 class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
              InstrItinClass itin, list<dag> pattern>
         : I<opcode, OOL, IOL, asmstr, itin> {
  bits<5> FRT;
  bits<12> idx;
  let Pattern = pattern;
  bit RC = 0;    // set by isRecordForm
  let Inst{6-10}  = FRT;
  let Inst{11-22} = idx;
  let Inst{23-30} = xo;
  let Inst{31}    = RC;
 }
 class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
              InstrItinClass itin, list<dag> pattern>
         : I<opcode, OOL, IOL, asmstr, itin> {
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -259,14 +259,6 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
  case PPC::XVMULDP:
  case PPC::XVMULSP:
  case PPC::XSMULSP:
  // QPX Add:
  case PPC::QVFADD:
  case PPC::QVFADDS:
  case PPC::QVFADDSs:
  // QPX Multiply:
  case PPC::QVFMUL:
  case PPC::QVFMULS:
  case PPC::QVFMULSs:
    return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
           Inst.getFlag(MachineInstr::MIFlag::FmNsz);
  // Fixed point:
@ -300,9 +292,7 @@ static const uint16_t FMAOpIdxInfo[][5] = {
    {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
    {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
    {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
-    {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
+    {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}};
    {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
    {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
 // Check if an opcode is a FMA instruction. If it is, return the index in array
 // FMAOpIdxInfo. Otherwise, return -1.
@ -666,7 +656,6 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
  case PPC::LI8:
  case PPC::LIS:
  case PPC::LIS8:
  case PPC::QVGPCI:
  case PPC::ADDIStocHA:
  case PPC::ADDIStocHA8:
  case PPC::ADDItocL:
@ -1343,12 +1332,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
  else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
           PPC::VSSRCRegClass.contains(DestReg, SrcReg))
    Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
  else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
    Opc = PPC::QVFMR;
  else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
    Opc = PPC::QVFMRs;
  else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
    Opc = PPC::QVFMRb;
  else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
    Opc = PPC::CROR;
  else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
@ -1393,12 +1376,6 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) {
    OpcodeIndex = SOK_VectorFloat4Spill;
  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
    OpcodeIndex = SOK_VRSaveSpill;
  } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
    OpcodeIndex = SOK_QuadFloat8Spill;
  } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
    OpcodeIndex = SOK_QuadFloat4Spill;
  } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
    OpcodeIndex = SOK_QuadBitSpill;
  } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
    OpcodeIndex = SOK_SpillToVSR;
  } else {
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@ -123,9 +123,6 @@ enum SpillOpcodeKey {
  SOK_VectorFloat8Spill,
  SOK_VectorFloat4Spill,
  SOK_VRSaveSpill,
  SOK_QuadFloat8Spill,
  SOK_QuadFloat4Spill,
  SOK_QuadBitSpill,
  SOK_SpillToVSR,
  SOK_SPESpill,
  SOK_LastOpcodeSpill // This must be last on the enum.
@ -136,32 +133,28 @@ enum SpillOpcodeKey {
  {                                                                            \
    PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,                    \
        PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX,    \
-        PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,          \
+        PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD, PPC::EVLDD                    \
        PPC::SPILLTOVSR_LD, PPC::EVLDD                                         \
  }
 #define Pwr9LoadOpcodes                                                        \
  {                                                                            \
    PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,                    \
        PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64,                \
-        PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs,        \
+        PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD                \
        PPC::QVLFDXb, PPC::SPILLTOVSR_LD                                       \
  }
 #define Pwr8StoreOpcodes                                                       \
  {                                                                            \
    PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
        PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
-        PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST,        \
+        PPC::SPILLTOVSR_ST, PPC::EVSTDD                                        \
        PPC::EVSTDD                                                            \
  }
 #define Pwr9StoreOpcodes                                                       \
  {                                                                            \
    PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
        PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,                \
-        PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,         \
+        PPC::SPILL_VRSAVE, PPC::SPILLTOVSR_ST                                  \
        PPC::SPILLTOVSR_ST                                                     \
  }
 // Initialize arrays for load and store spill opcodes on supported subtargets.
@ -273,10 +266,10 @@ public:
  }
  static bool isSameClassPhysRegCopy(unsigned Opcode) {
-    unsigned CopyOpcodes[] =
+    unsigned CopyOpcodes[] = {PPC::OR,        PPC::OR8,   PPC::FMR,
-      { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+                              PPC::VOR,       PPC::XXLOR, PPC::XXLORf,
-        PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+                              PPC::XSCPSGNDP, PPC::MCRF,  PPC::CROR,
-        PPC::CROR, PPC::EVOR, -1U };
+                              PPC::EVOR,      -1U};
    for (int i = 0; CopyOpcodes[i] != -1U; i++)
      if (Opcode == CopyOpcodes[i])
        return true;
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@ -203,16 +203,6 @@ def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
 def PPCxxpermdi  : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
 def PPCvecshl    : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
 def PPCqvfperm   : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
 def PPCqvgpci    : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
 def PPCqvaligni  : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
 def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
 def PPCqbflt     : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
 def PPCqvlfsb    : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
                          [SDNPHasChain, SDNPMayLoad]>;
 def PPCcmpb     : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@ -3467,7 +3457,6 @@ include "PPCInstrAltivec.td"
 include "PPCInstrSPE.td"
 include "PPCInstr64Bit.td"
 include "PPCInstrVSX.td"
 include "PPCInstrQPX.td"
 include "PPCInstrHTM.td"
 def crnot : OutPatFrag<(ops node:$in),
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
--- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@ -1,161 +0,0 @@
 //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // The QPX vector registers overlay the scalar floating-point registers, and
 // any scalar floating-point loads splat their value across all vector lanes.
 // Thus, if we have a scalar load followed by a splat, we can remove the splat
 // (i.e. replace the load with a load-and-splat pseudo instruction).
 //
 // This pass must run after anything that might do store-to-load forwarding.
 //
 //===----------------------------------------------------------------------===//
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 #define DEBUG_TYPE "ppc-qpx-load-splat"
 STATISTIC(NumSimplified, "Number of QPX load splats simplified");
 namespace {
  struct PPCQPXLoadSplat : public MachineFunctionPass {
    static char ID;
    PPCQPXLoadSplat() : MachineFunctionPass(ID) {
      initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
    }
    bool runOnMachineFunction(MachineFunction &Fn) override;
    StringRef getPassName() const override {
      return "PowerPC QPX Load Splat Simplification";
    }
  };
  char PPCQPXLoadSplat::ID = 0;
 }
 INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
                "PowerPC QPX Load Splat Simplification",
                false, false)
 FunctionPass *llvm::createPPCQPXLoadSplatPass() {
  return new PPCQPXLoadSplat();
 }
 bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;
  bool MadeChange = false;
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
    MachineBasicBlock *MBB = &*MFI;
    SmallVector<MachineInstr *, 4> Splats;
    for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
      MachineInstr *MI = &*MBBI;
      if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
        Splats.clear();
        continue;
      }
      // We're looking for a sequence like this:
      // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
      // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
      for (auto SI = Splats.begin(); SI != Splats.end();) {
        MachineInstr *SMI = *SI;
        Register SplatReg = SMI->getOperand(0).getReg();
        Register SrcReg = SMI->getOperand(1).getReg();
        if (MI->modifiesRegister(SrcReg, TRI)) {
          switch (MI->getOpcode()) {
          default:
            SI = Splats.erase(SI);
            continue;
          case PPC::LFS:
          case PPC::LFD:
          case PPC::LFSU:
          case PPC::LFDU:
          case PPC::LFSUX:
          case PPC::LFDUX:
          case PPC::LFSX:
          case PPC::LFDX:
          case PPC::LFIWAX:
          case PPC::LFIWZX:
            if (SplatReg != SrcReg) {
              // We need to change the load to define the scalar subregister of
              // the QPX splat source register.
              unsigned SubRegIndex =
                TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
              Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
              // Substitute both the explicit defined register, and also the
              // implicit def of the containing QPX register.
              MI->getOperand(0).setReg(SplatSubReg);
              MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
            }
            SI = Splats.erase(SI);
            // If SMI is directly after MI, then MBBI's base iterator is
            // pointing at SMI.  Adjust MBBI around the call to erase SMI to
            // avoid invalidating MBBI.
            ++MBBI;
            SMI->eraseFromParent();
            --MBBI;
            ++NumSimplified;
            MadeChange = true;
            continue;
          }
        }
        // If this instruction defines the splat register, then we cannot move
        // the previous definition above it. If it reads from the splat
        // register, then it must already be alive from some previous
        // definition, and if the splat register is different from the source
        // register, then this definition must not be the load for which we're
        // searching.
        if (MI->modifiesRegister(SplatReg, TRI) ||
            (SrcReg != SplatReg &&
             MI->readsRegister(SplatReg, TRI))) {
          SI = Splats.erase(SI);
          continue;
        }
        ++SI;
      }
      if (MI->getOpcode() != PPC::QVESPLATI &&
          MI->getOpcode() != PPC::QVESPLATIs &&
          MI->getOpcode() != PPC::QVESPLATIb)
        continue;
      if (MI->getOperand(2).getImm() != 0)
        continue;
      // If there are other uses of the scalar value after this, replacing
      // those uses might be non-trivial.
      if (!MI->getOperand(1).isKill())
        continue;
      Splats.push_back(MI);
    }
  }
  return MadeChange;
 }
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@ -404,9 +404,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
  }
  case PPC::F8RCRegClassID:
  case PPC::F4RCRegClassID:
  case PPC::QFRCRegClassID:
  case PPC::QSRCRegClassID:
  case PPC::QBRCRegClassID:
  case PPC::VRRCRegClassID:
  case PPC::VFRCRegClassID:
  case PPC::VSLRCRegClassID:
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@ -153,7 +153,6 @@ public:
    switch (RegName[0]) {
      case 'r':
      case 'f':
      case 'q': // for QPX
      case 'v':
        if (RegName[1] == 's')
          return RegName + 2;
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@ -54,13 +54,6 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
  let HWEncoding{4-0} = num;
 }
 // QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
 class QFPR<FPR SubReg, string n> : PPCReg<n> {
  let HWEncoding = SubReg.HWEncoding;
  let SubRegs = [SubReg];
  let SubRegIndices = [sub_64];
 }
 // VF - One of the 32 64-bit floating-point subregisters of the vector
 // registers (used by VSX).
 class VF<bits<5> num, string n> : PPCReg<n> {
@ -132,12 +125,6 @@ foreach Index = 0-31 in {
                 DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }
 // QPX Floating-point registers
 foreach Index = 0-31 in {
  def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
 }
 // Vector registers
 foreach Index = 0-31 in {
  def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@ -343,16 +330,6 @@ def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC,
 // Register class for single precision scalars in VSX registers
 def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
 // For QPX
 def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
                                                (sequence "QF%u", 31, 14))>;
 def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
 def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
  // These are actually stored as floating-point values where a positive
  // number is true and anything else (including NaN) is false.
  let Size = 256;
 }
 def CRBITRC : RegisterClass<"PPC", [i1], 32,
  (add CR2LT, CR2GT, CR2EQ, CR2UN,
       CR3LT, CR3GT, CR3EQ, CR3UN,
--- a/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/lib/Target/PowerPC/PPCScheduleP9.td
@ -40,12 +40,9 @@ def P9Model : SchedMachineModel {
  let CompleteModel = 1;
-  // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
+  // Do not support SPE (Signal Processing Engine), prefixed instructions on
-  // Engine), prefixed instructions on Power 9, PC relative mem ops, or
+  // Power 9, PC relative mem ops, or instructions introduced in ISA 3.1.
-  // instructions introduced in ISA 3.1.
+  let UnsupportedFeatures = [HasSPE, PrefixInstrs, PCRelativeMemops, IsISA3_1];
  let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
                             IsISA3_1];
 }
 let SchedModel = P9Model in {
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@ -35,10 +35,6 @@ using namespace llvm;
 static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
 cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
 static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
  cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
  cl::Hidden);
 static cl::opt<bool>
    EnableMachinePipeliner("ppc-enable-pipeliner",
                           cl::desc("Enable Machine Pipeliner for PPC"),
@ -70,7 +66,6 @@ void PPCSubtarget::initializeEnvironment() {
  HasAltivec = false;
  HasSPE = false;
  HasFPU = false;
  HasQPX = false;
  HasVSX = false;
  NeedsTwoConstNR = false;
  HasP8Vector = false;
@ -109,7 +104,6 @@ void PPCSubtarget::initializeEnvironment() {
  HasInvariantFunctionDescriptors = false;
  HasPartwordAtomics = false;
  HasDirectMove = false;
  IsQPXStackUnaligned = false;
  HasHTM = false;
  HasFloat128 = false;
  HasFusion = false;
@ -158,7 +152,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
  if (HasSPE && IsPPC64)
    report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
-  if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
+  if (HasSPE && (HasAltivec || HasVSX || HasFPU))
    report_fatal_error(
        "SPE and traditional floating point cannot both be enabled.\n", false);
@ -166,10 +160,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
  if (!HasSPE)
    HasFPU = true;
  // QPX requires a 32-byte aligned stack. Note that we need to do this if
  // we're compiling for a BG/Q system regardless of whether or not QPX
  // is enabled because external functions will assume this alignment.
  IsQPXStackUnaligned = QPXStackUnaligned;
  StackAlignment = getPlatformStackAlignment();
  // Determine endianness.
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@ -97,7 +97,6 @@ protected:
  bool HasAltivec;
  bool HasFPU;
  bool HasSPE;
  bool HasQPX;
  bool HasVSX;
  bool NeedsTwoConstNR;
  bool HasP8Vector;
@ -150,11 +149,6 @@ protected:
  POPCNTDKind HasPOPCNTD;
  /// When targeting QPX running a stock PPC64 Linux kernel where the stack
  /// alignment has not been changed, we need to keep the 16-byte alignment
  /// of the stack.
  bool IsQPXStackUnaligned;
  const PPCTargetMachine &TM;
  PPCFrameLowering FrameLowering;
  PPCInstrInfo InstrInfo;
@ -255,7 +249,6 @@ public:
  bool hasAltivec() const { return HasAltivec; }
  bool hasSPE() const { return HasSPE; }
  bool hasFPU() const { return HasFPU; }
  bool hasQPX() const { return HasQPX; }
  bool hasVSX() const { return HasVSX; }
  bool needsTwoConstNR() const { return NeedsTwoConstNR; }
  bool hasP8Vector() const { return HasP8Vector; }
@ -291,11 +284,7 @@ public:
  bool hasPartwordAtomics() const { return HasPartwordAtomics; }
  bool hasDirectMove() const { return HasDirectMove; }
  bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
  Align getPlatformStackAlignment() const {
    if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
      return Align(32);
    return Align(16);
  }
@ -325,9 +314,6 @@ public:
  const Triple &getTargetTriple() const { return TargetTriple; }
  /// isBGQ - True if this is a BG/Q platform.
  bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
  bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
  bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@ -63,10 +63,6 @@ static cl::
 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
                                cl::desc("Disable VSX Swap Removal for PPC"));
 static cl::
 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
                              cl::desc("Disable QPX load splat simplification"));
 static cl::
 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
                            cl::desc("Disable machine peepholes for PPC"));
@ -114,7 +110,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
  initializePPCReduceCRLogicalsPass(PR);
  initializePPCBSelPass(PR);
  initializePPCBranchCoalescingPass(PR);
  initializePPCQPXLoadSplatPass(PR);
  initializePPCBoolRetToIntPass(PR);
  initializePPCExpandISELPass(PR);
  initializePPCPreEmitPeepholePass(PR);
@ -412,13 +407,8 @@ void PPCPassConfig::addIRPasses() {
  // Lower generic MASSV routines to PowerPC subtarget-specific entries.
  addPass(createPPCLowerMASSVEntriesPass());
-  // For the BG/Q (or if explicitly requested), add explicit data prefetch
+  // If explicitly requested, add explicit data prefetch intrinsics.
  // intrinsics.
  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
                        getOptLevel() != CodeGenOpt::None;
  if (EnablePrefetch.getNumOccurrences() > 0)
    UsePrefetching = EnablePrefetch;
  if (UsePrefetching)
    addPass(createLoopDataPrefetchPass());
  if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
@ -515,15 +505,8 @@ void PPCPassConfig::addPreRegAlloc() {
 }
 void PPCPassConfig::addPreSched2() {
-  if (getOptLevel() != CodeGenOpt::None) {
+  if (getOptLevel() != CodeGenOpt::None)
    addPass(&IfConverterID);
    // This optimization must happen after anything that might do store-to-load
    // forwarding. Here we're after RA (and, thus, when spills are inserted)
    // but before post-RA scheduling.
    if (!DisableQPXLoadSplat)
      addPass(createPPCQPXLoadSplatPass());
  }
 }
 void PPCPassConfig::addPreEmitPass() {
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@ -25,8 +25,7 @@ using namespace llvm;
 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// This is currently only used for the data prefetch pass which is only enabled
+// This is currently only used for the data prefetch pass
 // for BG/Q by default.
 static cl::opt<unsigned>
 CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
              cl::desc("The loop prefetch cache line size"));
@ -104,55 +103,6 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
    Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
    return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
  }
  case Intrinsic::ppc_qpx_qvlfs:
    // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
    if (getOrEnforceKnownAlignment(
            II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
      Type *VTy =
          VectorType::get(IC.Builder.getFloatTy(),
                          cast<VectorType>(II.getType())->getElementCount());
      Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
                                            PointerType::getUnqual(VTy));
      Value *Load = IC.Builder.CreateLoad(VTy, Ptr);
      return new FPExtInst(Load, II.getType());
    }
    break;
  case Intrinsic::ppc_qpx_qvlfd:
    // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
    if (getOrEnforceKnownAlignment(
            II.getArgOperand(0), Align(32), IC.getDataLayout(), &II,
            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
      Value *Ptr = IC.Builder.CreateBitCast(
          II.getArgOperand(0), PointerType::getUnqual(II.getType()));
      return new LoadInst(II.getType(), Ptr, "", false, Align(32));
    }
    break;
  case Intrinsic::ppc_qpx_qvstfs:
    // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
    if (getOrEnforceKnownAlignment(
            II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
      Type *VTy = VectorType::get(
          IC.Builder.getFloatTy(),
          cast<VectorType>(II.getArgOperand(0)->getType())->getElementCount());
      Value *TOp = IC.Builder.CreateFPTrunc(II.getArgOperand(0), VTy);
      Type *OpPtrTy = PointerType::getUnqual(VTy);
      Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
      return new StoreInst(TOp, Ptr, false, Align(16));
    }
    break;
  case Intrinsic::ppc_qpx_qvstfd:
    // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
    if (getOrEnforceKnownAlignment(
            II.getArgOperand(1), Align(32), IC.getDataLayout(), &II,
            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
      Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
      Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
      return new StoreInst(II.getArgOperand(0), Ptr, false, Align(32));
    }
    break;
  case Intrinsic::ppc_altivec_vperm:
    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
    // Note that ppc_altivec_vperm has a big-endian bias, so when creating
@ -736,10 +686,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
 }
 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
-  // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
+  // On the A2, always unroll aggressively.
  // on combining the loads generated for consecutive accesses, and failure to
  // do so is particularly expensive. This makes it much more likely (compared
  // to only using concatenation unrolling).
  if (ST->getCPUDirective() == PPC::DIR_A2)
    return true;
@ -799,7 +746,6 @@ const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
 unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
  if (Vector) {
    if (ST->hasQPX()) return 256;
    if (ST->hasAltivec()) return 128;
    return 0;
  }
@ -828,8 +774,6 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
 }
 unsigned PPCTTIImpl::getPrefetchDistance() const {
  // This seems like a reasonable default for the BG/Q (this pass is enabled, by
  // default, only on the BG/Q).
  return 300;
 }
@ -918,7 +862,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
  // Legalize the type.
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-  // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
  // (at least in the sense that there need only be one non-loop-invariant
  // instruction). We need one such shuffle instruction for each actual
  // register (this is not true for arbitrary shuffles, but is true for the
@ -974,13 +918,6 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
    return Cost;
  } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
    // Floating point scalars are already located in index #0.
    if (Index == 0)
      return 0;
    return Cost;
  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
    if (ST->hasP9Altivec()) {
      if (ISD == ISD::INSERT_VECTOR_ELT)
@ -1055,8 +992,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                        LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
  bool IsVSXType = ST->hasVSX() &&
                   (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
  bool IsQPXType = ST->hasQPX() &&
                   (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
  // VSX has 32b/64b load instructions. Legalization can handle loading of
  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
@ -1079,8 +1014,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
  // for Altivec types using the VSX instructions, but that's more expensive
  // than using the permutation-based load sequence. On the P8, that's no
  // longer true.
-  if (Opcode == Instruction::Load &&
+  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
      ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
      *Alignment >= LT.second.getScalarType().getStoreSize())
    return Cost + LT.first; // Add the cost of the permutations.
@ -1133,7 +1067,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
      getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
                      CostKind);
-  // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
  // (at least in the sense that there need only be one non-loop-invariant
  // instruction). For each result vector, we need one shuffle per incoming
  // vector (except that the first shuffle can take two incoming vectors
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@ -4751,15 +4751,14 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
    // For PowerPC, we need to deal with alignment of stack arguments -
    // they are mostly aligned to 8 bytes, but vectors and i128 arrays
    // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
-    // and QPX vectors are aligned to 32 bytes.  For that reason, we
+    // For that reason, we compute current offset from stack pointer (which is
-    // compute current offset from stack pointer (which is always properly
+    // always properly aligned), and offset for the first vararg, then subtract
-    // aligned), and offset for the first vararg, then subtract them.
+    // them.
    unsigned VAArgBase;
    Triple TargetTriple(F.getParent()->getTargetTriple());
    // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
    // and 32 bytes for ABIv2.  This is usually determined by target
    // endianness, but in theory could be overridden by function attribute.
    // For simplicity, we ignore it here (it'd only matter for QPX vectors).
    if (TargetTriple.getArch() == Triple::ppc64)
      VAArgBase = 48;
    else
--- a/test/Analysis/BasicAA/phi-spec-order.ll
+++ b/test/Analysis/BasicAA/phi-spec-order.ll
@ -1,5 +1,5 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 ; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
@X = external global [16000 x double], align 32
--- a/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
+++ b/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
@ -218,42 +218,6 @@ entry:
 ; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
 }
 define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
 entry:
  %r = load <4 x float>, <4 x float>* %p, align 4
  ret <4 x float> %r
 ; CHECK-LABEL: test_l_qv4float
 ; CHECK: cost of 2 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
 }
 define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
 entry:
  %r = load <8 x float>, <8 x float>* %p, align 4
  ret <8 x float> %r
 ; CHECK-LABEL: test_l_qv8float
 ; CHECK: cost of 4 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
 }
 define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
 entry:
  %r = load <4 x double>, <4 x double>* %p, align 8
  ret <4 x double> %r
 ; CHECK-LABEL: test_l_qv4double
 ; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
 }
 define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
 entry:
  %r = load <8 x double>, <8 x double>* %p, align 8
  ret <8 x double> %r
 ; CHECK-LABEL: test_l_qv8double
 ; CHECK: cost of 4 for instruction:   %r = load <8 x double>, <8 x double>* %p, align 8
 }
 define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
 entry:
  store <16 x i8> %v, <16 x i8>* %p, align 1
@ -362,43 +326,6 @@ entry:
 ; CHECK: cost of 2 for instruction:   store <4 x double> %v, <4 x double>* %p, align 8
 }
 define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
 entry:
  store <4 x float> %v, <4 x float>* %p, align 4
  ret void
 ; CHECK-LABEL: test_s_qv4float
 ; CHECK: cost of 7 for instruction:   store <4 x float> %v, <4 x float>* %p, align 4
 }
 define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
 entry:
  store <8 x float> %v, <8 x float>* %p, align 4
  ret void
 ; CHECK-LABEL: test_s_qv8float
 ; CHECK: cost of 15 for instruction:   store <8 x float> %v, <8 x float>* %p, align 4
 }
 define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
 entry:
  store <4 x double> %v, <4 x double>* %p, align 8
  ret void
 ; CHECK-LABEL: test_s_qv4double
 ; CHECK: cost of 7 for instruction:   store <4 x double> %v, <4 x double>* %p, align 8
 }
 define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
 entry:
  store <8 x double> %v, <8 x double>* %p, align 8
  ret void
 ; CHECK-LABEL: test_s_qv8double
 ; CHECK: cost of 15 for instruction:   store <8 x double> %v, <8 x double>* %p, align 8
 }
 attributes #0 = { nounwind "target-cpu"="pwr7" }
 attributes #1 = { nounwind "target-cpu"="a2q" }
 attributes #2 = { nounwind "target-cpu"="pwr8" }
--- a/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
+++ b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -enable-misched < %s | FileCheck %s
 ;
 ; PR14315: misched should not move the physreg copy of %t below the calls.
--- a/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
+++ b/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir
@ -55,7 +55,7 @@
  ; Function Attrs: nounwind
  declare void @llvm.stackprotector(i8*, i8**) #1
-  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind }
  !llvm.module.flags = !{!0, !1}
--- a/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
+++ b/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
@ -30,7 +30,7 @@
  ; Function Attrs: nounwind
  declare void @llvm.stackprotector(i8*, i8**) #1
-  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind }
  !llvm.ident = !{!0}
--- a/test/CodeGen/PowerPC/a2q-stackalign.ll
+++ b/test/CodeGen/PowerPC/a2q-stackalign.ll
@ -1,23 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 declare i32 @bar(i8* %a) nounwind;
 define i32 @foo() nounwind {
  %p = alloca i8, i8 115
  store i8 0, i8* %p
  %r = call i32 @bar(i8* %p)
  ret i32 %r
 }
 ; Without QPX, the allocated stack frame is 240 bytes, but with QPX
 ; (because we require 32-byte alignment), it is 256 bytes.
 ; CHECK-A2: @foo
 ; CHECK-A2: stdu 1, -240(1)
 ; CHECK-A2Q: @foo
 ; CHECK-A2Q: stdu 1, -256(1)
 ; CHECK-BGQ: @foo
 ; CHECK-BGQ: stdu 1, -256(1)
--- a/test/CodeGen/PowerPC/a2q.ll
+++ b/test/CodeGen/PowerPC/a2q.ll
@ -1,10 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 -mattr=+qpx | FileCheck %s
 define void @foo() {
 entry:
  ret void
 }
 ; CHECK: @foo
--- a/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
+++ b/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
@ -298,7 +298,7 @@ _ZN10SubProcess12SafeSyscalls5fcntlEiil.exit:     ; preds = %_ZN10SubProcess12Sa
 ; Function Attrs: nounwind argmemonly
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind argmemonly }
 attributes #2 = { nounwind }
--- a/test/CodeGen/PowerPC/asm-Zy.ll
+++ b/test/CodeGen/PowerPC/asm-Zy.ll
@ -1,6 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 define i32 @zytest(i32 %a) nounwind {
 entry:
--- a/test/CodeGen/PowerPC/asm-constraints.ll
+++ b/test/CodeGen/PowerPC/asm-constraints.ll
@ -65,7 +65,7 @@ entry:
 }
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir
@ -63,8 +63,8 @@
    ret i64 %2
  }
-  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
  !llvm.module.flags = !{!0, !1}
  !llvm.ident = !{!2}
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@ -187,7 +187,7 @@
    ret i64 %cond
  }
-  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
  !llvm.module.flags = !{!0, !1}
  !llvm.ident = !{!2}
--- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@ -983,10 +983,10 @@
    ret i64 %xor
  }
-  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
  !llvm.module.flags = !{!0, !1}
  !llvm.ident = !{!2}
--- a/test/CodeGen/PowerPC/ctr-minmaxnum.ll
+++ b/test/CodeGen/PowerPC/ctr-minmaxnum.ll
@ -1,5 +1,4 @@
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s --check-prefix=QPX
 declare float @fabsf(float)
@ -64,11 +63,6 @@ loop_exit:
 ; CHECK-NOT: xsmindp
 ; CHECK: blr
 ; QPX-LABEL: test1v:
 ; QPX: mtctr
 ; QPX-NOT: bl fminf
 ; QPX: blr
 define void @test1a(float %f, float* %fp) {
 entry:
  br label %loop_body
@ -139,11 +133,6 @@ loop_exit:
 ; CHECK-NOT: xsmaxdp
 ; CHECK: blr
 ; QPX-LABEL: test2v:
 ; QPX: mtctr
 ; QPX-NOT: bl fmax
 ; QPX: blr
 define void @test2a(float %f, float* %fp) {
 entry:
  br label %loop_body
--- a/test/CodeGen/PowerPC/ctrloop-shortLoops.ll
+++ b/test/CodeGen/PowerPC/ctrloop-shortLoops.ll
@ -1,5 +1,4 @@
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q | FileCheck %s --check-prefixes=CHECK,CHECK-A2Q
 ; Verify that we do NOT generate the mtctr instruction for loop trip counts < 4
 ; The latency of the mtctr is only justified if there are more than 4 comparisons that are removed as a result.
@ -86,11 +85,8 @@ for.body:                                         ; preds = %entry, %for.body
 }
 ; Function Attrs: norecurse nounwind
 ; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
 ; a2q should use mtctr, but pwr8 should not use mtctr.
 define signext i32 @testTripCount2NonSmallLoop() {
 ; CHECK-LABEL: testTripCount2NonSmallLoop:
 ; CHECK-A2Q: mtctr
 ; CHECK-PWR8-NOT: mtctr
 ; CHECK: blr
@ -121,12 +117,9 @@ for.end:                                          ; preds = %if.end
  ret i32 %conv
 }
 ; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
 ; a2q should use mtctr, but pwr8 should not use mtctr.
 define signext i32 @testTripCount5() {
 ; CHECK-LABEL: testTripCount5:
 ; CHECK-PWR8-NOT: mtctr
 ; CHECK-A2Q: mtctr
 entry:
  %.prea = load i32, i32* @a, align 4
--- a/test/CodeGen/PowerPC/ec-input.ll
+++ b/test/CodeGen/PowerPC/ec-input.ll
@ -5,7 +5,7 @@
 ; that were both inputs to the inline asm and also early-clobber outputs).
 target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
 %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712 = type { %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32 }
--- a/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
+++ b/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64-unknown-linux"
 %"class.Foam::messageStream.6" = type <{ %"class.Foam::string.5", i32, i32, i32, [4 x i8] }>
 %"class.Foam::string.5" = type { %"class.std::basic_string.4" }
@ -419,8 +419,8 @@ declare void @_ZN4Foam11regIOobjectD2Ev() #0
 declare void @_ZN4Foam6reduceIiNS_5sumOpIiEEEEvRKNS_4ListINS_8UPstream11commsStructEEERT_RKT0_ii() #0
-attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 !llvm.module.flags = !{!0}
--- a/test/CodeGen/PowerPC/fast-isel-icmp-split.ll
+++ b/test/CodeGen/PowerPC/fast-isel-icmp-split.ll
@ -1,6 +1,6 @@
 ; RUN: llc -verify-machineinstrs -O0 -relocation-model=pic < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 %"class.std::__1::__tree_node.130.151" = type { %"class.std::__1::__tree_node_base.base.128.149", %"class.boost::serialization::extended_type_info.129.150"* }
 %"class.std::__1::__tree_node_base.base.128.149" = type <{ %"class.std::__1::__tree_end_node.127.148", %"class.std::__1::__tree_node_base.126.147"*, %"class.std::__1::__tree_node_base.126.147"*, i8 }>
--- a/test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll
+++ b/test/CodeGen/PowerPC/fma-mutate-duplicate-vreg.ll
@ -33,4 +33,4 @@ define float @f(float %xf) #0 {
  ret float %25
 }
-attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
--- a/test/CodeGen/PowerPC/fp2int2fp-ppcfp128.ll
+++ b/test/CodeGen/PowerPC/fp2int2fp-ppcfp128.ll
@ -1,6 +1,5 @@
-; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 define linkonce_odr double @test1(ppc_fp128 %input) {
 entry:
--- a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
+++ b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 %"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::__exception_ptr::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
 %"class.std::__1::__shared_count" = type { i32 (...)**, i64 }
--- a/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
+++ b/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
@ -33,5 +33,5 @@ declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*)
 declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #7 = { nobuiltin nounwind }
--- a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 %struct.BG_CoordinateMapping_t = type { [4 x i8] }
--- a/test/CodeGen/PowerPC/load-two-flts.ll
+++ b/test/CodeGen/PowerPC/load-two-flts.ll
@ -1,6 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
 entry:
--- a/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll
+++ b/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll
@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -enable-ppc-prefetching=true -verify-machineinstrs < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 ; Function Attrs: nounwind
 define void @foo(double* %x, double* nocapture readonly %y) #0 {
--- a/test/CodeGen/PowerPC/loop-data-prefetch.ll
+++ b/test/CodeGen/PowerPC/loop-data-prefetch.ll
@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -enable-ppc-prefetching=true -mcpu=a2 < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 ; Function Attrs: nounwind
 define void @foo(double* nocapture %a, double* nocapture readonly %b) #0 {
--- a/test/CodeGen/PowerPC/loop-prep-all.ll
+++ b/test/CodeGen/PowerPC/loop-prep-all.ll
@ -1,5 +1,4 @@
 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BGQ
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@ -21,7 +20,6 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK-LABEL: @foo
 ; CHECK-BGQ-DAG: dcbt 4, 5
 ; CHECK-DAG: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
 ; CHECK-DAG: fadd [[REG2:[0-9]+]], [[REG1]], 0
 ; CHECK-DAG: stfdu [[REG2]], 8({{[0-9]+}})
@ -34,15 +32,13 @@ for.cond.cleanup6:                                ; preds = %for.body7
 for.body7:                                        ; preds = %for.body, %for.body7
  %i3.017 = phi i32 [ %inc9, %for.body7 ], [ 0, %for.body ]
-  tail call void bitcast (void (...)* @bar to void ()*)() #2
+  tail call void bitcast (void (...)* @bar to void ()*)() #0
  %inc9 = add nuw nsw i32 %i3.017, 1
  %exitcond = icmp eq i32 %inc9, 1024
  br i1 %exitcond, label %for.cond.cleanup6, label %for.body7
 }
-declare void @bar(...) #1
+declare void @bar(...) 
-attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #0 = { nounwind }
 attributes #1 = { "target-cpu"="a2q" }
 attributes #2 = { nounwind }
--- a/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll
+++ b/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll
@ -41,6 +41,6 @@ define void @aligned_slot() #0 {
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
-attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
 attributes #2 = { nounwind }
--- a/test/CodeGen/PowerPC/machine-combiner.ll
+++ b/test/CodeGen/PowerPC/machine-combiner.ll
@ -1,5 +1,4 @@
 ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-PWR
 ; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck  %s -check-prefix=CHECK -check-prefix=CHECK-QPX
 ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 < %s | FileCheck  %s -check-prefix=FIXPOINT
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@ -93,9 +92,6 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
 define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 ; CHECK-LABEL: vector_reassociate_adds1:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
 ; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
 ; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
@ -110,9 +106,6 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
 define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 ; CHECK-LABEL: vector_reassociate_adds2:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
 ; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
 ; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
@ -127,9 +120,6 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
 define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 ; CHECK-LABEL: vector_reassociate_adds3:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
 ; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
 ; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
@ -144,9 +134,6 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
 define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 ; CHECK-LABEL: vector_reassociate_adds4:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
 ; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
 ; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
 ; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
 ; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
 ; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
@ -217,9 +204,6 @@ define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
 define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
 ; CHECK-LABEL: reassociate_mamaa_double:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX-DAG:   fmadd [[REG0:[0-9]+]], 4, 3, 2
 ; CHECK-QPX-DAG:   fmadd [[REG1:[0-9]+]], 6, 5, 1
 ; CHECK-QPX:       fadd 1, [[REG0]], [[REG1]]
 ; CHECK-PWR-DAG:   xsmaddadp 1, 6, 5
 ; CHECK-PWR-DAG:   xsmaddadp 2, 4, 3
 ; CHECK-PWR:       xsadddp 1, 2, 1
@ -250,9 +234,6 @@ define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, fl
 define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
 ; CHECK-LABEL: reassociate_mamaa_vec:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX-DAG:   qvfmadds [[REG0:[0-9]+]], 4, 3, 2
 ; CHECK-QPX-DAG:   qvfmadds [[REG1:[0-9]+]], 6, 5, 1
 ; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
 ; CHECK-PWR-DAG:   xvmaddasp [[REG0:[0-9]+]], 39, 38
 ; CHECK-PWR-DAG:   xvmaddasp [[REG1:[0-9]+]], 37, 36
 ; CHECK-PWR:       xvaddsp 34, [[REG1]], [[REG0]]
@ -268,11 +249,6 @@ define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x f
 define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
 ; CHECK-LABEL: reassociate_mamama_double:
 ; CHECK:       # %bb.0:
 ; CHECK-QPX:       fmadd [[REG0:[0-9]+]], 2, 1, 7
 ; CHECK-QPX-DAG:   fmul [[REG1:[0-9]+]], 4, 3
 ; CHECK-QPX-DAG:   fmadd [[REG2:[0-9]+]], 6, 5, [[REG0]]
 ; CHECK-QPX-DAG:   fmadd [[REG3:[0-9]+]], 9, 8, [[REG1]]
 ; CHECK-QPX:       fadd 1, [[REG2]], [[REG3]]
 ; CHECK-PWR:       xsmaddadp 7, 2, 1
 ; CHECK-PWR-DAG:   xsmuldp [[REG0:[0-9]+]], 4, 3
 ; CHECK-PWR-DAG:   xsmaddadp 7, 6, 5
--- a/test/CodeGen/PowerPC/mc-instrlat.ll
+++ b/test/CodeGen/PowerPC/mc-instrlat.ll
@ -19,7 +19,7 @@ entry:
 declare void @bar(double) #1
-attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #2 = { nounwind }
--- a/test/CodeGen/PowerPC/mcount-insertion.ll
+++ b/test/CodeGen/PowerPC/mcount-insertion.ll
@ -1,9 +1,8 @@
-; RUN: opt -ee-instrument < %s | opt -inline | llc | FileCheck %s
+; RUN: opt -ee-instrument < %s | opt -inline | llc -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 ; The run-line mimics how Clang might run the instrumentation passes.
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 define void @leaf_function() #0 {
--- a/test/CodeGen/PowerPC/memcpy-vec.ll
+++ b/test/CodeGen/PowerPC/memcpy-vec.ll
@ -1,6 +1,5 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck  %s -check-prefix=PWR7
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck  %s -check-prefix=PWR8
 ; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck  %s -check-prefix=A2Q
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@ -25,12 +24,6 @@ entry:
 ; PWR8: lxvw4x
 ; PWR8: stxvw4x
 ; PWR8: blr
 ; A2Q-LABEL: @foo1
 ; A2Q-NOT: bl memcpy
 ; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
 ; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
 ; A2Q: blr
 }
 ; Function Attrs: nounwind
@ -52,12 +45,6 @@ entry:
 ; PWR8: lxvw4x
 ; PWR8: stxvw4x
 ; PWR8: blr
 ; A2Q-LABEL: @foo2
 ; A2Q-NOT: bl memcpy
 ; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
 ; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
 ; A2Q: blr
 }
 ; Function Attrs: nounwind
@ -76,11 +63,6 @@ entry:
 ; PWR8-NOT: bl memset
 ; PWR8: stxvw4x
 ; PWR8: blr
 ; A2Q-LABEL: @bar1
 ; A2Q-NOT: bl memset
 ; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
 ; A2Q: blr
 }
 ; Function Attrs: nounwind
@ -99,11 +81,6 @@ entry:
 ; PWR8-NOT: bl memset
 ; PWR8: stxvw4x
 ; PWR8: blr
 ; A2Q-LABEL: @bar2
 ; A2Q-NOT: bl memset
 ; A2Q: qvstfdx
 ; A2Q: blr
 }
 ; Function Attrs: nounwind
--- a/test/CodeGen/PowerPC/memset-nc.ll
+++ b/test/CodeGen/PowerPC/memset-nc.ll
@ -1,48 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck %s -check-prefix=CHECK-O0
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 ; Function Attrs: nounwind
 define void @test_qpx() unnamed_addr #0 align 2 {
 entry:
  %0 = load i32, i32* undef, align 4
  %1 = trunc i32 %0 to i8
  call void @llvm.memset.p0i8.i64(i8* align 32 null, i8 %1, i64 64, i1 false)
  ret void
 ; CHECK-LABEL: @test_qpx
 ; CHECK: qvstfdx
 ; CHECK: qvstfdx
 ; CHECK: blr
 ; CHECK-O0-LABEL: @test_qpx
 ; CHECK-O0-NOT: qvstfdx
 ; CHECK-O0: blr
 }
 ; Function Attrs: nounwind
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
 ; Function Attrs: nounwind
 define void @test_vsx() unnamed_addr #2 align 2 {
 entry:
  %0 = load i32, i32* undef, align 4
  %1 = trunc i32 %0 to i8
  call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i1 false)
  ret void
 ; CHECK-LABEL: @test_vsx
 ; CHECK: stxvw4x
 ; CHECK: stxvw4x
 ; CHECK: blr
 ; CHECK-O0-LABEL: @test_vsx
 ; CHECK-O0-NOT: stxvw4x
 ; CHECK-O0: blr
 }
 attributes #0 = { nounwind "target-cpu"="a2q" }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind "target-cpu"="pwr7" }
--- a/test/CodeGen/PowerPC/misched-inorder-latency.ll
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@ -1,8 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -enable-misched -pre-RA-sched=source -scheditins=false \
-; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
+; RUN:          -disable-ifcvt-triangle-false -disable-post-ra -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 ;
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 ; %val1 is a load live out of %entry. It should be hoisted
 ; above the add.
--- a/test/CodeGen/PowerPC/misched.ll
+++ b/test/CodeGen/PowerPC/misched.ll
@ -1,7 +1,6 @@
 ; RUN: llc < %s -enable-misched -verify-machineinstrs
 ; PR14302
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
@b = external global [16000 x double], align 32
--- a/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll
+++ b/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll
@ -1,6 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 declare zeroext i1 @ri1()
 declare void @se1()
--- a/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll
+++ b/test/CodeGen/PowerPC/pcrel-local-caller-toc.ll
@ -92,7 +92,7 @@ entry:
 ; Left the target features in this test because it is important that caller has
 ; -pcrelative-memops while callee has +pcrelative-memops
-attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
+attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
-attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
+attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
-attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-qpx,-spe" }
+attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-spe" }
 attributes #3 = { nounwind }
--- a/test/CodeGen/PowerPC/popcnt.ll
+++ b/test/CodeGen/PowerPC/popcnt.ll
@ -1,8 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+popcntd < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOWPC
 ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=pwr7 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC
 ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q -mattr=+popcntd < %s | FileCheck %s
 define i64 @_cntb64(i64 %x) nounwind readnone {
  %cnt = tail call i64 @llvm.ppc.popcntb(i64 %x)
--- a/test/CodeGen/PowerPC/ppc-passname.ll
+++ b/test/CodeGen/PowerPC/ppc-passname.ll
@ -105,14 +105,3 @@
 ; STOP-AFTER-BRANCH-COALESCING-NOT: "ppc-branch-coalescing" pass is not registered.
 ; STOP-AFTER-BRANCH-COALESCING: Branch Coalescing 
 ; Test pass name: ppc-qpx-load-splat.
 ; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-QPX-LOAD-SPLAT
 ; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: -ppc-qpx-load-splat
 ; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
 ; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: PowerPC QPX Load Splat Simplification
 ; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-QPX-LOAD-SPLAT
 ; STOP-AFTER-QPX-LOAD-SPLAT: -ppc-qpx-load-splat
 ; STOP-AFTER-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
 ; STOP-AFTER-QPX-LOAD-SPLAT: PowerPC QPX Load Splat Simplification
--- a/test/CodeGen/PowerPC/ppc64-sibcall.ll
+++ b/test/CodeGen/PowerPC/ppc64-sibcall.ll
@ -1,6 +1,6 @@
 ; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
-; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
+; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
-; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
+; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
 ; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -code-model=small | FileCheck %s -check-prefix=SCM
 ; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
@ -117,23 +117,6 @@ define void @caller_local_sret_32(%S_32* %a) #1 {
 attributes #0 = { noinline nounwind  }
 attributes #1 = { nounwind }
 ; vector <4 x i1> test
 define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
 define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
  tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
  ret void
 ; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
 ; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
 ; CHECK-SCO-LABEL: caller_v4i1_reorder:
 ; CHECK-SCO: bl callee_v4i1
 ; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
 ; CHECK-SCO-HASQPX: b callee_v4i1
 }
 define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
 define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
  tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)
--- a/test/CodeGen/PowerPC/pr24546.ll
+++ b/test/CodeGen/PowerPC/pr24546.ll
@ -47,8 +47,8 @@ declare double @pow(double, double) #0
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
--- a/test/CodeGen/PowerPC/pr27350.ll
+++ b/test/CodeGen/PowerPC/pr27350.ll
@ -18,7 +18,7 @@ entry:
 declare fastcc void @bar([2 x i64], [2 x i64]) unnamed_addr #1 align 2
 attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 !llvm.ident = !{!0}
--- a/test/CodeGen/PowerPC/pr28130.ll
+++ b/test/CodeGen/PowerPC/pr28130.ll
@ -67,4 +67,4 @@ bb:
  ret void
 }
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
--- a/test/CodeGen/PowerPC/preinc-ld-sel-crash.ll
+++ b/test/CodeGen/PowerPC/preinc-ld-sel-crash.ll
@ -1,6 +1,6 @@
 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-bgq-linux"
+target triple = "powerpc64le-unknown-linux"
 %t1 = type { %t2*, %t3* }
 %t2 = type <{ %t3*, i32, [4 x i8] }>
--- a/test/CodeGen/PowerPC/qpx-bv-sint.ll
+++ b/test/CodeGen/PowerPC/qpx-bv-sint.ll
@ -1,33 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 define void @s452(i32 %inp1) nounwind {
 entry:
  br label %for.body4
 for.body4:                                        ; preds = %for.body4, %entry
  %conv.4 = sitofp i32 %inp1 to double
  %conv.5 = sitofp i32 %inp1 to double
  %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
  %v = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
  %vv = fmul <2 x double> %v, %v
  %add7.4 = fadd <2 x double> %vv, %vv
  store <2 x double> %add7.4, <2 x double>* undef, align 16
  br i1 undef, label %for.end, label %for.body4
 for.end:                                          ; preds = %for.body4
  unreachable
 ; CHECK-LABEL: @s452
 ; CHECK: lfiwax [[REG1:[0-9]+]],
 ; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
 ; FIXME: We could 'promote' this to a vector earlier and remove this splat.
 ; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
 ; CHECK: qvfmul
 ; CHECK: qvfadd
 ; CHECK: qvesplati {{[0-9]+}},
 ; FIXME: We can use qvstfcdx here instead of two stores.
 ; CHECK: stfd
 ; CHECK: stfd
 }
--- a/test/CodeGen/PowerPC/qpx-bv.ll
+++ b/test/CodeGen/PowerPC/qpx-bv.ll
@ -1,37 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
  %v1 = insertelement <4 x double> undef, double %f1, i32 0
  %v2 = insertelement <4 x double> %v1,   double %f2, i32 1
  %v3 = insertelement <4 x double> %v2,   double %f3, i32 2
  %v4 = insertelement <4 x double> %v3,   double %f4, i32 3
  ret <4 x double> %v4
 ; CHECK-LABEL: @foo
 ; CHECK: qvgpci [[REG1:[0-9]+]], 275
 ; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
 ; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
 ; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
 ; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
 ; CHECK: blr
 }
 define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
  %v1 = insertelement <4 x float> undef, float %f1, i32 0
  %v2 = insertelement <4 x float> %v1,   float %f2, i32 1
  %v3 = insertelement <4 x float> %v2,   float %f3, i32 2
  %v4 = insertelement <4 x float> %v3,   float %f4, i32 3
  ret <4 x float> %v4
 ; CHECK-LABEL: @goo
 ; CHECK: qvgpci [[REG1:[0-9]+]], 275
 ; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
 ; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
 ; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
 ; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
 ; CHECK: blr
 }
--- a/test/CodeGen/PowerPC/qpx-func-clobber.ll
+++ b/test/CodeGen/PowerPC/qpx-func-clobber.ll
@ -1,22 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
 declare <4 x double> @foo(<4 x double> %p)
 define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
 entry:
  %v = call <4 x double> @foo(<4 x double> %p)
  %w = call <4 x double> @foo(<4 x double> %q)
  %x = fadd <4 x double> %v, %w
  ret <4 x double> %x
 ; CHECK-LABEL: @bar
 ; CHECK: qvstfdx 2,
 ; CHECK: bl foo
 ; CHECK: qvstfdx 1,
 ; CHECK: qvlfdx 1,
 ; CHECK: bl foo
 ; CHECK: qvlfdx [[REG:[0-9]+]],
 ; CHECK: qvfadd 1, [[REG]], 1
 }
--- a/test/CodeGen/PowerPC/qpx-load-splat.ll
+++ b/test/CodeGen/PowerPC/qpx-load-splat.ll
@ -1,80 +0,0 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
 ; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
 ; Function Attrs: norecurse nounwind readonly
 define <4 x double> @foo(double* nocapture readonly %a) #0 {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxvdsx v2, 0, r3
 ; CHECK-NEXT:    vmr v3, v2
 ; CHECK-NEXT:    blr
 entry:
  %0 = load double, double* %a, align 8
  %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
  %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
  ret <4 x double> %shuffle.i
 }
 define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
 ; CHECK-LABEL: foox:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sldi r4, r4, 3
 ; CHECK-NEXT:    lxvdsx v2, r3, r4
 ; CHECK-NEXT:    vmr v3, v2
 ; CHECK-NEXT:    blr
 entry:
  %p = getelementptr double, double* %a, i64 %idx
  %0 = load double, double* %p, align 8
  %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
  %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
  ret <4 x double> %shuffle.i
 }
 define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
 ; CHECK-LABEL: fooxu:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sldi r4, r4, 3
 ; CHECK-NEXT:    add r6, r3, r4
 ; CHECK-NEXT:    std r6, 0(r5)
 ; CHECK-NEXT:    lxvdsx v2, r3, r4
 ; CHECK-NEXT:    vmr v3, v2
 ; CHECK-NEXT:    blr
 entry:
  %p = getelementptr double, double* %a, i64 %idx
  %0 = load double, double* %p, align 8
  %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
  %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
  store double* %p, double** %pptr, align 8
  ret <4 x double> %shuffle.i
 }
 define <4 x float> @foof(float* nocapture readonly %a) #0 {
 ; CHECK-LABEL: foof:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-NEXT:    blr
 entry:
  %0 = load float, float* %a, align 4
  %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
  %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
  ret <4 x float> %shuffle.i
 }
 define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
 ; CHECK-LABEL: foofx:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sldi r4, r4, 2
 ; CHECK-NEXT:    lfiwzx f0, r3, r4
 ; CHECK-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-NEXT:    blr
 entry:
  %p = getelementptr float, float* %a, i64 %idx
  %0 = load float, float* %p, align 4
  %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
  %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
  ret <4 x float> %shuffle.i
 }
--- a/test/CodeGen/PowerPC/qpx-load.ll
+++ b/test/CodeGen/PowerPC/qpx-load.ll
@ -1,26 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
 define <4 x double> @foo(<4 x double>* %p) {
 entry:
  %v = load <4 x double>, <4 x double>* %p, align 8
  ret <4 x double> %v
 }
 ; CHECK: @foo
 ; CHECK-DAG: li [[REG1:[0-9]+]], 31
 ; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
 ; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
 ; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
 ; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
 ; CHECK: blr
 define <4 x double> @bar(<4 x double>* %p) {
 entry:
  %v = load <4 x double>, <4 x double>* %p, align 32
  ret <4 x double> %v
 }
 ; CHECK: @bar
 ; CHECK: qvlfdx
--- a/test/CodeGen/PowerPC/qpx-qvfmadd.ll
+++ b/test/CodeGen/PowerPC/qpx-qvfmadd.ll
@ -1,79 +0,0 @@
 ; RUN: llc -verify-machineinstrs -stop-after=finalize-isel < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
 define <2 x double> @test_qvfmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
 ; CHECK: test_qvfmadd
 ; CHECK: QVFMADD %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <2 x double> %2, %1
  %5 = fadd reassoc nsz <2 x double> %4, %0
  ret <2 x double> %5
 }
 define <4 x float> @test_qvfmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
 ; CHECK: test_qvfmadds
 ; CHECK: QVFMADDSs %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <4 x float> %2, %1
  %5 = fadd reassoc nsz <4 x float> %4, %0
  ret <4 x float> %5
 }
 define <2 x double> @test_qvfnmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
 ; CHECK: test_qvfnmadd
 ; CHECK: QVFNMADD %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <2 x double> %2, %1
  %5 = fadd reassoc nsz <2 x double> %4, %0
  %6 = fneg reassoc nsz <2 x double> %5
  ret <2 x double> %6
 }
 define <4 x float> @test_qvfnmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
 ; CHECK: test_qvfnmadds
 ; CHECK: QVFNMADDSs %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <4 x float> %2, %1
  %5 = fadd reassoc nsz <4 x float> %4, %0
  %6 = fneg reassoc nsz <4 x float> %5
  ret <4 x float> %6
 }
 define <2 x double> @test_qvfmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
 ; CHECK: test_qvfmsub
 ; CHECK: QVFMSUB %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <2 x double> %2, %1
  %5 = fsub reassoc nsz <2 x double> %4, %0
  ret <2 x double> %5
 }
 define <4 x float> @test_qvfmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
 ; CHECK: test_qvfmsubs
 ; CHECK: QVFMSUBSs %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <4 x float> %2, %1
  %5 = fsub reassoc nsz <4 x float> %4, %0
  ret <4 x float> %5
 }
 define <2 x double> @test_qvfnmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
 ; CHECK: test_qvfnmsub
 ; CHECK: QVFNMSUB %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <2 x double> %2, %1
  %5 = fsub reassoc nsz <2 x double> %4, %0
  %6 = fneg reassoc nsz <2 x double> %5
  ret <2 x double> %6
 }
 define <4 x float> @test_qvfnmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
 ; CHECK: test_qvfnmsubs
 ; CHECK: QVFNMSUBSs %2, %1, %0, implicit $rm
 ;
  %4 = fmul reassoc nsz <4 x float> %2, %1
  %5 = fsub reassoc nsz <4 x float> %4, %0
  %6 = fneg reassoc nsz <4 x float> %5
  ret <4 x float> %6
 }
--- a/test/CodeGen/PowerPC/qpx-recipest.ll
+++ b/test/CodeGen/PowerPC/qpx-recipest.ll
@ -1,473 +0,0 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind {
 ; CHECK-LABEL: foo_fmf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
 ; CHECK-NEXT:    qvfrsqrte 3, 2
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0@toc@l
 ; CHECK-NEXT:    qvlfdx 0, 0, 3
 ; CHECK-NEXT:    qvfmul 4, 3, 3
 ; CHECK-NEXT:    qvfmsub 2, 2, 0, 2
 ; CHECK-NEXT:    qvfnmsub 4, 2, 4, 0
 ; CHECK-NEXT:    qvfmul 3, 3, 4
 ; CHECK-NEXT:    qvfmul 4, 3, 3
 ; CHECK-NEXT:    qvfnmsub 0, 2, 4, 0
 ; CHECK-NEXT:    qvfmul 0, 3, 0
 ; CHECK-NEXT:    qvfmul 1, 1, 0
 ; CHECK-NEXT:    blr
 entry:
  %x = call ninf afn reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
  %r = fdiv arcp reassoc <4 x double> %a, %x
  ret <4 x double> %r
 }
 define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind {
 ; CHECK-LABEL: foo_safe:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 5, 2, 3
 ; CHECK-NEXT:    qvesplati 3, 2, 1
 ; CHECK-NEXT:    qvesplati 4, 2, 2
 ; CHECK-NEXT:    fsqrt 2, 2
 ; CHECK-NEXT:    fsqrt 5, 5
 ; CHECK-NEXT:    fsqrt 4, 4
 ; CHECK-NEXT:    fsqrt 3, 3
 ; CHECK-NEXT:    qvesplati 6, 1, 3
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    fdiv 2, 1, 2
 ; CHECK-NEXT:    fdiv 5, 6, 5
 ; CHECK-NEXT:    qvesplati 6, 1, 2
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fdiv 4, 6, 4
 ; CHECK-NEXT:    fdiv 1, 1, 3
 ; CHECK-NEXT:    qvfperm 3, 4, 5, 0
 ; CHECK-NEXT:    qvfperm 0, 2, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 3, 1
 ; CHECK-NEXT:    blr
 entry:
  %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
  %r = fdiv <4 x double> %a, %x
  ret <4 x double> %r
 }
 define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: foof_fmf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
 ; CHECK-NEXT:    qvfrsqrtes 3, 2
 ; CHECK-NEXT:    addi 3, 3, .LCPI2_0@toc@l
 ; CHECK-NEXT:    qvlfsx 0, 0, 3
 ; CHECK-NEXT:    qvfmuls 4, 3, 3
 ; CHECK-NEXT:    qvfmsubs 2, 2, 0, 2
 ; CHECK-NEXT:    qvfnmsubs 0, 2, 4, 0
 ; CHECK-NEXT:    qvfmuls 0, 3, 0
 ; CHECK-NEXT:    qvfmul 1, 1, 0
 ; CHECK-NEXT:    blr
 entry:
  %x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
  %y = fpext <4 x float> %x to <4 x double>
  %r = fdiv arcp reassoc nsz <4 x double> %a, %y
  ret <4 x double> %r
 }
 define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: foof_safe:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 0, 2, 3
 ; CHECK-NEXT:    qvesplati 3, 2, 2
 ; CHECK-NEXT:    fsqrts 4, 2
 ; CHECK-NEXT:    qvesplati 2, 2, 1
 ; CHECK-NEXT:    fsqrts 0, 0
 ; CHECK-NEXT:    fsqrts 3, 3
 ; CHECK-NEXT:    fsqrts 2, 2
 ; CHECK-NEXT:    qvgpci 5, 275
 ; CHECK-NEXT:    qvgpci 6, 101
 ; CHECK-NEXT:    qvfperm 0, 3, 0, 5
 ; CHECK-NEXT:    qvesplati 3, 1, 2
 ; CHECK-NEXT:    qvfperm 2, 4, 2, 5
 ; CHECK-NEXT:    qvfperm 0, 2, 0, 6
 ; CHECK-NEXT:    qvesplati 2, 1, 3
 ; CHECK-NEXT:    qvesplati 4, 0, 3
 ; CHECK-NEXT:    fdiv 2, 2, 4
 ; CHECK-NEXT:    qvesplati 4, 0, 2
 ; CHECK-NEXT:    fdiv 3, 3, 4
 ; CHECK-NEXT:    qvesplati 4, 1, 1
 ; CHECK-NEXT:    fdiv 1, 1, 0
 ; CHECK-NEXT:    qvesplati 0, 0, 1
 ; CHECK-NEXT:    fdiv 0, 4, 0
 ; CHECK-NEXT:    qvfperm 2, 3, 2, 5
 ; CHECK-NEXT:    qvfperm 0, 1, 0, 5
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 6
 ; CHECK-NEXT:    blr
 entry:
  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
  %y = fpext <4 x float> %x to <4 x double>
  %r = fdiv <4 x double> %a, %y
  ret <4 x double> %r
 }
 define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind {
 ; CHECK-LABEL: food_fmf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
 ; CHECK-NEXT:    qvfrsqrte 3, 2
 ; CHECK-NEXT:    addi 3, 3, .LCPI4_0@toc@l
 ; CHECK-NEXT:    qvlfdx 0, 0, 3
 ; CHECK-NEXT:    qvfmul 4, 3, 3
 ; CHECK-NEXT:    qvfmsub 2, 2, 0, 2
 ; CHECK-NEXT:    qvfnmsub 4, 2, 4, 0
 ; CHECK-NEXT:    qvfmul 3, 3, 4
 ; CHECK-NEXT:    qvfmul 4, 3, 3
 ; CHECK-NEXT:    qvfnmsub 0, 2, 4, 0
 ; CHECK-NEXT:    qvfmul 0, 3, 0
 ; CHECK-NEXT:    qvfrsp 0, 0
 ; CHECK-NEXT:    qvfmuls 1, 1, 0
 ; CHECK-NEXT:    blr
 entry:
  %x = call afn ninf reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
  %y = fptrunc <4 x double> %x to <4 x float>
  %r = fdiv arcp reassoc <4 x float> %a, %y
  ret <4 x float> %r
 }
 define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind {
 ; CHECK-LABEL: food_safe:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 0, 2, 3
 ; CHECK-NEXT:    qvesplati 3, 2, 2
 ; CHECK-NEXT:    fsqrt 4, 2
 ; CHECK-NEXT:    qvesplati 2, 2, 1
 ; CHECK-NEXT:    fsqrt 0, 0
 ; CHECK-NEXT:    fsqrt 3, 3
 ; CHECK-NEXT:    fsqrt 2, 2
 ; CHECK-NEXT:    qvgpci 5, 275
 ; CHECK-NEXT:    qvgpci 6, 101
 ; CHECK-NEXT:    qvfperm 0, 3, 0, 5
 ; CHECK-NEXT:    qvesplati 3, 1, 2
 ; CHECK-NEXT:    qvfperm 2, 4, 2, 5
 ; CHECK-NEXT:    qvfperm 0, 2, 0, 6
 ; CHECK-NEXT:    qvesplati 2, 1, 3
 ; CHECK-NEXT:    qvfrsp 0, 0
 ; CHECK-NEXT:    qvesplati 4, 0, 3
 ; CHECK-NEXT:    fdivs 2, 2, 4
 ; CHECK-NEXT:    qvesplati 4, 0, 2
 ; CHECK-NEXT:    fdivs 3, 3, 4
 ; CHECK-NEXT:    qvesplati 4, 1, 1
 ; CHECK-NEXT:    fdivs 1, 1, 0
 ; CHECK-NEXT:    qvesplati 0, 0, 1
 ; CHECK-NEXT:    fdivs 0, 4, 0
 ; CHECK-NEXT:    qvfperm 2, 3, 2, 5
 ; CHECK-NEXT:    qvfperm 0, 1, 0, 5
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 6
 ; CHECK-NEXT:    blr
 entry:
  %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
  %y = fptrunc <4 x double> %x to <4 x float>
  %r = fdiv <4 x float> %a, %y
  ret <4 x float> %r
 }
 define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: goo_fmf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
 ; CHECK-NEXT:    qvfrsqrtes 3, 2
 ; CHECK-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; CHECK-NEXT:    qvlfsx 0, 0, 3
 ; CHECK-NEXT:    qvfmuls 4, 3, 3
 ; CHECK-NEXT:    qvfmsubs 2, 2, 0, 2
 ; CHECK-NEXT:    qvfnmsubs 0, 2, 4, 0
 ; CHECK-NEXT:    qvfmuls 0, 3, 0
 ; CHECK-NEXT:    qvfmuls 1, 1, 0
 ; CHECK-NEXT:    blr
 entry:
  %x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
  %r = fdiv arcp reassoc nsz <4 x float> %a, %x
  ret <4 x float> %r
 }
 define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: goo_safe:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 5, 2, 3
 ; CHECK-NEXT:    qvesplati 3, 2, 1
 ; CHECK-NEXT:    qvesplati 4, 2, 2
 ; CHECK-NEXT:    fsqrts 2, 2
 ; CHECK-NEXT:    fsqrts 5, 5
 ; CHECK-NEXT:    fsqrts 4, 4
 ; CHECK-NEXT:    fsqrts 3, 3
 ; CHECK-NEXT:    qvesplati 6, 1, 3
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    fdivs 2, 1, 2
 ; CHECK-NEXT:    fdivs 5, 6, 5
 ; CHECK-NEXT:    qvesplati 6, 1, 2
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fdivs 4, 6, 4
 ; CHECK-NEXT:    fdivs 1, 1, 3
 ; CHECK-NEXT:    qvfperm 3, 4, 5, 0
 ; CHECK-NEXT:    qvfperm 0, 2, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 3, 1
 ; CHECK-NEXT:    blr
 entry:
  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
  %r = fdiv <4 x float> %a, %x
  ret <4 x float> %r
 }
 define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
 ; CHECK-LABEL: foo2_fmf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
 ; CHECK-NEXT:    qvfre 3, 2
 ; CHECK-NEXT:    addi 3, 3, .LCPI8_0@toc@l
 ; CHECK-NEXT:    qvlfdx 0, 0, 3
 ; CHECK-NEXT:    qvfmadd 0, 2, 3, 0
 ; CHECK-NEXT:    qvfnmsub 0, 3, 0, 3
 ; CHECK-NEXT:    qvfmul 3, 1, 0
 ; CHECK-NEXT:    qvfnmsub 1, 2, 3, 1
 ; CHECK-NEXT:    qvfmadd 1, 0, 1, 3
 ; CHECK-NEXT:    blr
 entry:
  %r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
  ret <4 x double> %r
 }
 define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind {
 ; CHECK-LABEL: foo2_safe:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    qvesplati 3, 2, 3
 ; CHECK-NEXT:    qvesplati 4, 1, 3
 ; CHECK-NEXT:    qvesplati 5, 2, 2
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    fdiv 3, 4, 3
 ; CHECK-NEXT:    qvesplati 4, 1, 2
 ; CHECK-NEXT:    fdiv 4, 4, 5
 ; CHECK-NEXT:    fdiv 5, 1, 2
 ; CHECK-NEXT:    qvesplati 2, 2, 1
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fdiv 1, 1, 2
 ; CHECK-NEXT:    qvfperm 2, 4, 3, 0
 ; CHECK-NEXT:    qvfperm 0, 5, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 1
 ; CHECK-NEXT:    blr
  %r = fdiv <4 x double> %a, %b
  ret <4 x double> %r
 }
 define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: goo2_fmf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvfres 0, 2
 ; CHECK-NEXT:    qvfmuls 3, 1, 0
 ; CHECK-NEXT:    qvfnmsubs 1, 2, 3, 1
 ; CHECK-NEXT:    qvfmadds 1, 0, 1, 3
 ; CHECK-NEXT:    blr
 entry:
  %r = fdiv arcp reassoc ninf <4 x float> %a, %b
  ret <4 x float> %r
 }
 define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-LABEL: goo2_safe:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 3, 2, 3
 ; CHECK-NEXT:    qvesplati 4, 1, 3
 ; CHECK-NEXT:    qvesplati 5, 2, 2
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    fdivs 3, 4, 3
 ; CHECK-NEXT:    qvesplati 4, 1, 2
 ; CHECK-NEXT:    fdivs 4, 4, 5
 ; CHECK-NEXT:    fdivs 5, 1, 2
 ; CHECK-NEXT:    qvesplati 2, 2, 1
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fdivs 1, 1, 2
 ; CHECK-NEXT:    qvfperm 2, 4, 3, 0
 ; CHECK-NEXT:    qvfperm 0, 5, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 1
 ; CHECK-NEXT:    blr
 entry:
  %r = fdiv <4 x float> %a, %b
  ret <4 x float> %r
 }
 define <4 x double> @foo3_fmf_denorm_on(<4 x double> %a) #0 {
 ; CHECK-LABEL: foo3_fmf_denorm_on:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
 ; CHECK-NEXT:    qvfrsqrte 0, 1
 ; CHECK-NEXT:    addi 3, 3, .LCPI12_0@toc@l
 ; CHECK-NEXT:    qvlfdx 2, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI12_1@toc@l
 ; CHECK-NEXT:    qvfmul 3, 0, 0
 ; CHECK-NEXT:    qvfmsub 4, 1, 2, 1
 ; CHECK-NEXT:    qvfnmsub 3, 4, 3, 2
 ; CHECK-NEXT:    qvfmul 0, 0, 3
 ; CHECK-NEXT:    qvfmul 3, 0, 0
 ; CHECK-NEXT:    qvfnmsub 2, 4, 3, 2
 ; CHECK-NEXT:    qvfmul 0, 0, 2
 ; CHECK-NEXT:    qvlfdx 2, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI12_2@toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI12_2@toc@l
 ; CHECK-NEXT:    qvlfdx 3, 0, 3
 ; CHECK-NEXT:    qvfmul 0, 0, 1
 ; CHECK-NEXT:    qvfabs 1, 1
 ; CHECK-NEXT:    qvfcmplt 1, 1, 2
 ; CHECK-NEXT:    qvfsel 1, 1, 3, 0
 ; CHECK-NEXT:    blr
 entry:
  %r = call reassoc ninf afn <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
  ret <4 x double> %r
 }
 define <4 x double> @foo3_fmf_denorm_off(<4 x double> %a) #1 {
 ; CHECK-LABEL: foo3_fmf_denorm_off:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
 ; CHECK-NEXT:    qvfrsqrte 0, 1
 ; CHECK-NEXT:    addi 3, 3, .LCPI13_0@toc@l
 ; CHECK-NEXT:    qvlfdx 2, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI13_1@toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI13_1@toc@l
 ; CHECK-NEXT:    qvfmul 3, 0, 0
 ; CHECK-NEXT:    qvfmsub 4, 1, 2, 1
 ; CHECK-NEXT:    qvfnmsub 3, 4, 3, 2
 ; CHECK-NEXT:    qvfmul 0, 0, 3
 ; CHECK-NEXT:    qvfmul 3, 0, 0
 ; CHECK-NEXT:    qvfnmsub 2, 4, 3, 2
 ; CHECK-NEXT:    qvfmul 0, 0, 2
 ; CHECK-NEXT:    qvlfdx 2, 0, 3
 ; CHECK-NEXT:    qvfmul 0, 0, 1
 ; CHECK-NEXT:    qvfcmpeq 1, 1, 2
 ; CHECK-NEXT:    qvfsel 1, 1, 2, 0
 ; CHECK-NEXT:    blr
 entry:
  %r = call afn reassoc ninf <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
  ret <4 x double> %r
 }
 define <4 x double> @foo3_safe_denorm_on(<4 x double> %a) #0 {
 ; CHECK-LABEL: foo3_safe_denorm_on:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 2, 1, 3
 ; CHECK-NEXT:    qvesplati 3, 1, 2
 ; CHECK-NEXT:    fsqrt 4, 1
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fsqrt 2, 2
 ; CHECK-NEXT:    fsqrt 3, 3
 ; CHECK-NEXT:    fsqrt 1, 1
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    qvfperm 2, 3, 2, 0
 ; CHECK-NEXT:    qvfperm 0, 4, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 1
 ; CHECK-NEXT:    blr
 entry:
  %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
  ret <4 x double> %r
 }
 define <4 x double> @foo3_safe_denorm_off(<4 x double> %a) #1 {
 ; CHECK-LABEL: foo3_safe_denorm_off:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 2, 1, 3
 ; CHECK-NEXT:    qvesplati 3, 1, 2
 ; CHECK-NEXT:    fsqrt 4, 1
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fsqrt 2, 2
 ; CHECK-NEXT:    fsqrt 3, 3
 ; CHECK-NEXT:    fsqrt 1, 1
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    qvfperm 2, 3, 2, 0
 ; CHECK-NEXT:    qvfperm 0, 4, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 1
 ; CHECK-NEXT:    blr
 entry:
  %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
  ret <4 x double> %r
 }
 define <4 x float> @goo3_fmf_denorm_on(<4 x float> %a) #0 {
 ; CHECK-LABEL: goo3_fmf_denorm_on:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI16_1@toc@ha
 ; CHECK-NEXT:    qvfrsqrtes 2, 1
 ; CHECK-NEXT:    addi 3, 3, .LCPI16_1@toc@l
 ; CHECK-NEXT:    qvlfsx 0, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI16_0@toc@l
 ; CHECK-NEXT:    qvfmuls 4, 2, 2
 ; CHECK-NEXT:    qvfmsubs 3, 1, 0, 1
 ; CHECK-NEXT:    qvfnmsubs 0, 3, 4, 0
 ; CHECK-NEXT:    qvlfsx 3, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI16_2@toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI16_2@toc@l
 ; CHECK-NEXT:    qvlfsx 4, 0, 3
 ; CHECK-NEXT:    qvfmuls 0, 2, 0
 ; CHECK-NEXT:    qvfabs 2, 1
 ; CHECK-NEXT:    qvfmuls 0, 0, 1
 ; CHECK-NEXT:    qvfcmplt 1, 2, 3
 ; CHECK-NEXT:    qvfsel 1, 1, 4, 0
 ; CHECK-NEXT:    blr
 entry:
  %r = call reassoc afn ninf nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
  ret <4 x float> %r
 }
 define <4 x float> @goo3_fmf_denorm_off(<4 x float> %a) #1 {
 ; CHECK-LABEL: goo3_fmf_denorm_off:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
 ; CHECK-NEXT:    qvfrsqrtes 2, 1
 ; CHECK-NEXT:    addi 3, 3, .LCPI17_1@toc@l
 ; CHECK-NEXT:    qvlfsx 0, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI17_0@toc@l
 ; CHECK-NEXT:    qvfmuls 4, 2, 2
 ; CHECK-NEXT:    qvfmsubs 3, 1, 0, 1
 ; CHECK-NEXT:    qvfnmsubs 0, 3, 4, 0
 ; CHECK-NEXT:    qvlfsx 3, 0, 3
 ; CHECK-NEXT:    qvfmuls 0, 2, 0
 ; CHECK-NEXT:    qvfmuls 0, 0, 1
 ; CHECK-NEXT:    qvfcmpeq 1, 1, 3
 ; CHECK-NEXT:    qvfsel 1, 1, 3, 0
 ; CHECK-NEXT:    blr
 entry:
  %r = call reassoc ninf afn nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
  ret <4 x float> %r
 }
 define <4 x float> @goo3_safe(<4 x float> %a) nounwind {
 ; CHECK-LABEL: goo3_safe:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 2, 1, 3
 ; CHECK-NEXT:    qvesplati 3, 1, 2
 ; CHECK-NEXT:    fsqrts 4, 1
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    fsqrts 2, 2
 ; CHECK-NEXT:    fsqrts 3, 3
 ; CHECK-NEXT:    fsqrts 1, 1
 ; CHECK-NEXT:    qvgpci 0, 275
 ; CHECK-NEXT:    qvfperm 2, 3, 2, 0
 ; CHECK-NEXT:    qvfperm 0, 4, 1, 0
 ; CHECK-NEXT:    qvgpci 1, 101
 ; CHECK-NEXT:    qvfperm 1, 0, 2, 1
 ; CHECK-NEXT:    blr
 entry:
  %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
  ret <4 x float> %r
 }
 attributes #0 = { nounwind "denormal-fp-math"="ieee,ieee" }
 attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
--- a/test/CodeGen/PowerPC/qpx-rounding-ops.ll
+++ b/test/CodeGen/PowerPC/qpx-rounding-ops.ll
@ -1,109 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 define <4 x float> @test1(<4 x float> %x) nounwind  {
  %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
  ret <4 x float> %call
 ; CHECK: test1:
 ; CHECK: qvfrim 1, 1
 ; CHECK-FM: test1:
 ; CHECK-FM: qvfrim 1, 1
 }
 declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
 define <4 x double> @test2(<4 x double> %x) nounwind  {
  %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
  ret <4 x double> %call
 ; CHECK: test2:
 ; CHECK: qvfrim 1, 1
 ; CHECK-FM: test2:
 ; CHECK-FM: qvfrim 1, 1
 }
 declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
 define <4 x float> @test3(<4 x float> %x) nounwind  {
  %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
  ret <4 x float> %call
 ; CHECK: test3:
 ; CHECK-NOT: qvfrin
 ; CHECK-FM: test3:
 ; CHECK-FM-NOT: qvfrin
 }
 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
 define <4 x double> @test4(<4 x double> %x) nounwind  {
  %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
  ret <4 x double> %call
 ; CHECK: test4:
 ; CHECK-NOT: qvfrin
 ; CHECK-FM: test4:
 ; CHECK-FM-NOT: qvfrin
 }
 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
 define <4 x float> @test5(<4 x float> %x) nounwind  {
  %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
  ret <4 x float> %call
 ; CHECK: test5:
 ; CHECK: qvfrip 1, 1
 ; CHECK-FM: test5:
 ; CHECK-FM: qvfrip 1, 1
 }
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
 define <4 x double> @test6(<4 x double> %x) nounwind  {
  %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
  ret <4 x double> %call
 ; CHECK: test6:
 ; CHECK: qvfrip 1, 1
 ; CHECK-FM: test6:
 ; CHECK-FM: qvfrip 1, 1
 }
 declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
 define <4 x float> @test9(<4 x float> %x) nounwind  {
  %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
  ret <4 x float> %call
 ; CHECK: test9:
 ; CHECK: qvfriz 1, 1
 ; CHECK-FM: test9:
 ; CHECK-FM: qvfriz 1, 1
 }
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
 define <4 x double> @test10(<4 x double> %x) nounwind  {
  %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
  ret <4 x double> %call
 ; CHECK: test10:
 ; CHECK: qvfriz 1, 1
 ; CHECK-FM: test10:
 ; CHECK-FM: qvfriz 1, 1
 }
 declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
--- a/test/CodeGen/PowerPC/qpx-s-load.ll
+++ b/test/CodeGen/PowerPC/qpx-s-load.ll
@ -1,26 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
 define <4 x float> @foo(<4 x float>* %p) {
 entry:
  %v = load <4 x float>, <4 x float>* %p, align 4
  ret <4 x float> %v
 }
 ; CHECK: @foo
 ; CHECK-DAG: li [[REG1:[0-9]+]], 15
 ; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
 ; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
 ; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
 ; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
 ; CHECK: blr
 define <4 x float> @bar(<4 x float>* %p) {
 entry:
  %v = load <4 x float>, <4 x float>* %p, align 16
  ret <4 x float> %v
 }
 ; CHECK: @bar
 ; CHECK: qvlfsx
--- a/test/CodeGen/PowerPC/qpx-s-sel.ll
+++ b/test/CodeGen/PowerPC/qpx-s-sel.ll
@ -1,143 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
 define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
 entry:
  %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
  ret <4 x float> %r
 ; CHECK-LABEL: @test1
 ; CHECK: qvfsel 1, 3, 1, 2
 ; CHECK: blr
 }
 define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
 entry:
  %v = insertelement <4 x i1> undef, i1 %c1, i32 0
  %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
  %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
  %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
  %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
  ret <4 x float> %r
 ; CHECK-LABEL: @test2
 ; CHECK: stw
 ; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
 ; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
 ; CHECK: qvfsel 1, [[REG4]], 1, 2
 ; CHECK: blr
 }
 define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
 entry:
  %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
  ret <4 x i1> %v
 ; CHECK-LABEL: @test3
 ; CHECK: qvlfsx [[REG:[0-9]+]],
 ; qvflogical 1, 1, [[REG]], 1
 ; blr
 }
 define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
 entry:
  %q = load <4 x i1>, <4 x i1>* %t, align 16
  %v = and <4 x i1> %a, %q
  ret <4 x i1> %v
 ; CHECK-LABEL: @test4
 ; CHECK-DAG: lbz
 ; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
 ; CHECK-DAG: stw
 ; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
 ; CHECK: qvfand 1, 1, [[REG4]]
 ; CHECK: blr
 }
 define void @test5(<4 x i1> %a) nounwind {
 entry:
  store <4 x i1> %a, <4 x i1>* @R
  ret void
 ; CHECK-LABEL: @test5
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK: lwz
 ; CHECK: stb
 ; CHECK: blr
 }
 define i1 @test6(<4 x i1> %a) nounwind {
 entry:
  %r = extractelement <4 x i1> %a, i32 2
  ret i1 %r
 ; CHECK-LABEL: @test6
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK: lwz
 ; CHECK: blr
 }
 define i1 @test7(<4 x i1> %a) nounwind {
 entry:
  %r = extractelement <4 x i1> %a, i32 2
  %s = extractelement <4 x i1> %a, i32 3
  %q = and i1 %r, %s
  ret i1 %q
 ; CHECK-LABEL: @test7
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK-DAG: lwz [[REG4:[0-9]+]],
 ; FIXME: We're storing the vector twice, and that's silly.
 ; CHECK-DAG: qvstfiwx [[REG3]],
 ; CHECK: lwz [[REG5:[0-9]+]],
 ; CHECK: and 3,
 ; CHECK: blr
 }
 define i1 @test8(<3 x i1> %a) nounwind {
 entry:
  %r = extractelement <3 x i1> %a, i32 2
  ret i1 %r
 ; CHECK-LABEL: @test8
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK: lwz
 ; CHECK: blr
 }
 define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
 entry:
  %v = insertelement <3 x i1> undef, i1 %c1, i32 0
  %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
  %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
  %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
  ret <3 x float> %r
 ; CHECK-LABEL: @test9
 ; CHECK: stw
 ; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
 ; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
 ; CHECK: qvfsel 1, [[REG4]], 1, 2
 ; CHECK: blr
 }
--- a/test/CodeGen/PowerPC/qpx-s-store.ll
+++ b/test/CodeGen/PowerPC/qpx-s-store.ll
@ -1,25 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
 define void @foo(<4 x float> %v, <4 x float>* %p) {
 entry:
  store <4 x float> %v, <4 x float>* %p, align 4
  ret void
 }
 ; CHECK: @foo
 ; CHECK: stfs
 ; CHECK: stfs
 ; CHECK: stfs
 ; CHECK: stfs
 ; CHECK: blr
 define void @bar(<4 x float> %v, <4 x float>* %p) {
 entry:
  store <4 x float> %v, <4 x float>* %p, align 16
  ret void
 }
 ; CHECK: @bar
 ; CHECK: qvstfsx
--- a/test/CodeGen/PowerPC/qpx-sel.ll
+++ b/test/CodeGen/PowerPC/qpx-sel.ll
@ -1,151 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
 define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
 entry:
  %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
  ret <4 x double> %r
 ; CHECK-LABEL: @test1
 ; CHECK: qvfsel 1, 3, 1, 2
 ; CHECK: blr
 }
 define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
 entry:
  %v = insertelement <4 x i1> undef, i1 %c1, i32 0
  %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
  %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
  %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
  %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
  ret <4 x double> %r
 ; CHECK-LABEL: @test2
 ; FIXME: This load/store sequence is unnecessary.
 ; CHECK-DAG: lbz
 ; CHECK-DAG: stw
 ; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
 ; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
 ; CHECK: qvfsel 1, [[REG4]], 1, 2
 ; CHECK: blr
 }
 define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
 entry:
  %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
  ret <4 x i1> %v
 ; CHECK-LABEL: @test3
 ; CHECK: qvlfsx [[REG:[0-9]+]],
 ; qvflogical 1, 1, [[REG]], 1
 ; blr
 }
 define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
 entry:
  %q = load <4 x i1>, <4 x i1>* %t, align 16
  %v = and <4 x i1> %a, %q
  ret <4 x i1> %v
 ; CHECK-LABEL: @test4
 ; CHECK-DAG: lbz
 ; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
 ; CHECK-DAG: stw
 ; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
 ; CHECK: qvfand 1, 1, [[REG4]]
 ; CHECK: blr
 }
 define void @test5(<4 x i1> %a) nounwind {
 entry:
  store <4 x i1> %a, <4 x i1>* @R
  ret void
 ; CHECK-LABEL: @test5
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK: lwz
 ; CHECK: stb
 ; CHECK: blr
 }
 define i1 @test6(<4 x i1> %a) nounwind {
 entry:
  %r = extractelement <4 x i1> %a, i32 2
  ret i1 %r
 ; CHECK-LABEL: @test6
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK: lwz
 ; CHECK: blr
 }
 define i1 @test7(<4 x i1> %a) nounwind {
 entry:
  %r = extractelement <4 x i1> %a, i32 2
  %s = extractelement <4 x i1> %a, i32 3
  %q = and i1 %r, %s
  ret i1 %q
 ; CHECK-LABEL: @test7
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK-DAG: lwz [[REG4:[0-9]+]],
 ; FIXME: We're storing the vector twice, and that's silly.
 ; CHECK-DAG: qvstfiwx [[REG3]],
 ; CHECK-DAG: lwz [[REG5:[0-9]+]],
 ; CHECK: and 3,
 ; CHECK: blr
 }
 define i1 @test8(<3 x i1> %a) nounwind {
 entry:
  %r = extractelement <3 x i1> %a, i32 2
  ret i1 %r
 ; CHECK-LABEL: @test8
 ; CHECK: qvlfdx [[REG1:[0-9]+]],
 ; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
 ; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvstfiwx [[REG3]],
 ; CHECK: lwz
 ; CHECK: blr
 }
 define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
 entry:
  %v = insertelement <3 x i1> undef, i1 %c1, i32 0
  %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
  %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
  %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
  ret <3 x double> %r
 ; CHECK-LABEL: @test9
 ; FIXME: This load/store sequence is unnecessary.
 ; CHECK-DAG: lbz
 ; CHECK-DAG: stw
 ; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
 ; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
 ; CHECK: qvfsel 1, [[REG4]], 1, 2
 ; CHECK: blr
 }
--- a/test/CodeGen/PowerPC/qpx-split-vsetcc.ll
+++ b/test/CodeGen/PowerPC/qpx-split-vsetcc.ll
@ -1,31 +0,0 @@
 ; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-bgq-linux"
 ; Function Attrs: nounwind
 define void @gsl_sf_legendre_Pl_deriv_array(<4 x i32> %inp1, <4 x double> %inp2) #0 {
 entry:
  br label %vector.body198
 vector.body198:                                   ; preds = %vector.body198, %for.body46.lr.ph
  %0 = icmp ne <4 x i32> %inp1, zeroinitializer
  %1 = select <4 x i1> %0, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
  %2 = fmul <4 x double> %inp2, %1
  %3 = fmul <4 x double> %inp2, %2
  %4 = fmul <4 x double> %3, %inp2
  store <4 x double> %4, <4 x double>* undef, align 8
  br label %return
 ; CHECK-LABEL: @gsl_sf_legendre_Pl_deriv_array
 ; CHECK: qvlfiwzx
 ; CHECK: qvfcfidu
 ; CHECK: qvfcmpeq
 ; CHECK: qvfsel
 ; CHECK: qvfmul
 return:                                           ; preds = %if.else.i
  ret void
 }
 attributes #0 = { nounwind }
--- a/test/CodeGen/PowerPC/qpx-store.ll
+++ b/test/CodeGen/PowerPC/qpx-store.ll
@ -1,25 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target triple = "powerpc64-bgq-linux"
 define void @foo(<4 x double> %v, <4 x double>* %p) {
 entry:
  store <4 x double> %v, <4 x double>* %p, align 8
  ret void
 }
 ; CHECK: @foo
 ; CHECK: stfd
 ; CHECK: stfd
 ; CHECK: stfd
 ; CHECK: stfd
 ; CHECK: blr
 define void @bar(<4 x double> %v, <4 x double>* %p) {
 entry:
  store <4 x double> %v, <4 x double>* %p, align 32
  ret void
 }
 ; CHECK: @bar
 ; CHECK: qvstfdx
--- a/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
+++ b/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
@ -1,217 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 ; Function Attrs: nounwind
 define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
 entry:
  br label %vector.body
 ; CHECK-LABEL: @foo
 ; Make sure that the offset constants we use are all even (only the last should be odd).
 ; CHECK-DAG: li {{[0-9]+}}, 1056
 ; CHECK-DAG: li {{[0-9]+}}, 1088
 ; CHECK-DAG: li {{[0-9]+}}, 1152
 ; CHECK-DAG: li {{[0-9]+}}, 1216
 ; CHECK-DAG: li {{[0-9]+}}, 1280
 ; CHECK-DAG: li {{[0-9]+}}, 1344
 ; CHECK-DAG: li {{[0-9]+}}, 1408
 ; CHECK-DAG: li {{[0-9]+}}, 1472
 ; CHECK-DAG: li {{[0-9]+}}, 1536
 ; CHECK-DAG: li {{[0-9]+}}, 1600
 ; CHECK-DAG: li {{[0-9]+}}, 1568
 ; CHECK-DAG: li {{[0-9]+}}, 1664
 ; CHECK-DAG: li {{[0-9]+}}, 1632
 ; CHECK-DAG: li {{[0-9]+}}, 1728
 ; CHECK-DAG: li {{[0-9]+}}, 1696
 ; CHECK-DAG: li {{[0-9]+}}, 1792
 ; CHECK-DAG: li {{[0-9]+}}, 1760
 ; CHECK-DAG: li {{[0-9]+}}, 1856
 ; CHECK-DAG: li {{[0-9]+}}, 1824
 ; CHECK-DAG: li {{[0-9]+}}, 1920
 ; CHECK-DAG: li {{[0-9]+}}, 1888
 ; CHECK-DAG: li {{[0-9]+}}, 1984
 ; CHECK-DAG: li {{[0-9]+}}, 1952
 ; CHECK-DAG: li {{[0-9]+}}, 2016
 ; CHECK-DAG: li {{[0-9]+}}, 1024
 ; CHECK-DAG: li {{[0-9]+}}, 1120
 ; CHECK-DAG: li {{[0-9]+}}, 1184
 ; CHECK-DAG: li {{[0-9]+}}, 1248
 ; CHECK-DAG: li {{[0-9]+}}, 1312
 ; CHECK-DAG: li {{[0-9]+}}, 1376
 ; CHECK-DAG: li {{[0-9]+}}, 1440
 ; CHECK-DAG: li {{[0-9]+}}, 1504
 ; CHECK-DAG: li {{[0-9]+}}, 2047
 ; CHECK: blr
 vector.body:                                      ; preds = %vector.body, %entry
  %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
  %0 = shl i64 %index, 1
  %1 = getelementptr inbounds double, double* %b, i64 %0
  %2 = bitcast double* %1 to <8 x double>*
  %wide.vec = load <8 x double>, <8 x double>* %2, align 8
  %strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %4 = getelementptr inbounds double, double* %a, i64 %index
  %5 = bitcast double* %4 to <4 x double>*
  store <4 x double> %3, <4 x double>* %5, align 8
  %index.next = or i64 %index, 4
  %6 = shl i64 %index.next, 1
  %7 = getelementptr inbounds double, double* %b, i64 %6
  %8 = bitcast double* %7 to <8 x double>*
  %wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8
  %strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %10 = getelementptr inbounds double, double* %a, i64 %index.next
  %11 = bitcast double* %10 to <4 x double>*
  store <4 x double> %9, <4 x double>* %11, align 8
  %index.next.1 = or i64 %index, 8
  %12 = shl i64 %index.next.1, 1
  %13 = getelementptr inbounds double, double* %b, i64 %12
  %14 = bitcast double* %13 to <8 x double>*
  %wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8
  %strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %16 = getelementptr inbounds double, double* %a, i64 %index.next.1
  %17 = bitcast double* %16 to <4 x double>*
  store <4 x double> %15, <4 x double>* %17, align 8
  %index.next.2 = or i64 %index, 12
  %18 = shl i64 %index.next.2, 1
  %19 = getelementptr inbounds double, double* %b, i64 %18
  %20 = bitcast double* %19 to <8 x double>*
  %wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8
  %strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %22 = getelementptr inbounds double, double* %a, i64 %index.next.2
  %23 = bitcast double* %22 to <4 x double>*
  store <4 x double> %21, <4 x double>* %23, align 8
  %index.next.3 = or i64 %index, 16
  %24 = shl i64 %index.next.3, 1
  %25 = getelementptr inbounds double, double* %b, i64 %24
  %26 = bitcast double* %25 to <8 x double>*
  %wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8
  %strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %28 = getelementptr inbounds double, double* %a, i64 %index.next.3
  %29 = bitcast double* %28 to <4 x double>*
  store <4 x double> %27, <4 x double>* %29, align 8
  %index.next.4 = or i64 %index, 20
  %30 = shl i64 %index.next.4, 1
  %31 = getelementptr inbounds double, double* %b, i64 %30
  %32 = bitcast double* %31 to <8 x double>*
  %wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8
  %strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %34 = getelementptr inbounds double, double* %a, i64 %index.next.4
  %35 = bitcast double* %34 to <4 x double>*
  store <4 x double> %33, <4 x double>* %35, align 8
  %index.next.5 = or i64 %index, 24
  %36 = shl i64 %index.next.5, 1
  %37 = getelementptr inbounds double, double* %b, i64 %36
  %38 = bitcast double* %37 to <8 x double>*
  %wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8
  %strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %40 = getelementptr inbounds double, double* %a, i64 %index.next.5
  %41 = bitcast double* %40 to <4 x double>*
  store <4 x double> %39, <4 x double>* %41, align 8
  %index.next.6 = or i64 %index, 28
  %42 = shl i64 %index.next.6, 1
  %43 = getelementptr inbounds double, double* %b, i64 %42
  %44 = bitcast double* %43 to <8 x double>*
  %wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8
  %strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %46 = getelementptr inbounds double, double* %a, i64 %index.next.6
  %47 = bitcast double* %46 to <4 x double>*
  store <4 x double> %45, <4 x double>* %47, align 8
  %index.next.7 = or i64 %index, 32
  %48 = shl i64 %index.next.7, 1
  %49 = getelementptr inbounds double, double* %b, i64 %48
  %50 = bitcast double* %49 to <8 x double>*
  %wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8
  %strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %52 = getelementptr inbounds double, double* %a, i64 %index.next.7
  %53 = bitcast double* %52 to <4 x double>*
  store <4 x double> %51, <4 x double>* %53, align 8
  %index.next.8 = or i64 %index, 36
  %54 = shl i64 %index.next.8, 1
  %55 = getelementptr inbounds double, double* %b, i64 %54
  %56 = bitcast double* %55 to <8 x double>*
  %wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8
  %strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %58 = getelementptr inbounds double, double* %a, i64 %index.next.8
  %59 = bitcast double* %58 to <4 x double>*
  store <4 x double> %57, <4 x double>* %59, align 8
  %index.next.9 = or i64 %index, 40
  %60 = shl i64 %index.next.9, 1
  %61 = getelementptr inbounds double, double* %b, i64 %60
  %62 = bitcast double* %61 to <8 x double>*
  %wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8
  %strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %64 = getelementptr inbounds double, double* %a, i64 %index.next.9
  %65 = bitcast double* %64 to <4 x double>*
  store <4 x double> %63, <4 x double>* %65, align 8
  %index.next.10 = or i64 %index, 44
  %66 = shl i64 %index.next.10, 1
  %67 = getelementptr inbounds double, double* %b, i64 %66
  %68 = bitcast double* %67 to <8 x double>*
  %wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8
  %strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %70 = getelementptr inbounds double, double* %a, i64 %index.next.10
  %71 = bitcast double* %70 to <4 x double>*
  store <4 x double> %69, <4 x double>* %71, align 8
  %index.next.11 = or i64 %index, 48
  %72 = shl i64 %index.next.11, 1
  %73 = getelementptr inbounds double, double* %b, i64 %72
  %74 = bitcast double* %73 to <8 x double>*
  %wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8
  %strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %76 = getelementptr inbounds double, double* %a, i64 %index.next.11
  %77 = bitcast double* %76 to <4 x double>*
  store <4 x double> %75, <4 x double>* %77, align 8
  %index.next.12 = or i64 %index, 52
  %78 = shl i64 %index.next.12, 1
  %79 = getelementptr inbounds double, double* %b, i64 %78
  %80 = bitcast double* %79 to <8 x double>*
  %wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8
  %strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %82 = getelementptr inbounds double, double* %a, i64 %index.next.12
  %83 = bitcast double* %82 to <4 x double>*
  store <4 x double> %81, <4 x double>* %83, align 8
  %index.next.13 = or i64 %index, 56
  %84 = shl i64 %index.next.13, 1
  %85 = getelementptr inbounds double, double* %b, i64 %84
  %86 = bitcast double* %85 to <8 x double>*
  %wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8
  %strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %88 = getelementptr inbounds double, double* %a, i64 %index.next.13
  %89 = bitcast double* %88 to <4 x double>*
  store <4 x double> %87, <4 x double>* %89, align 8
  %index.next.14 = or i64 %index, 60
  %90 = shl i64 %index.next.14, 1
  %91 = getelementptr inbounds double, double* %b, i64 %90
  %92 = bitcast double* %91 to <8 x double>*
  %wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8
  %strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
  %94 = getelementptr inbounds double, double* %a, i64 %index.next.14
  %95 = bitcast double* %94 to <4 x double>*
  store <4 x double> %93, <4 x double>* %95, align 8
  %index.next.15 = add nsw i64 %index, 64
  %96 = icmp eq i64 %index.next.15, 1600
  br i1 %96, label %for.cond.cleanup, label %vector.body
 for.cond.cleanup:                                 ; preds = %vector.body
  ret void
 }
 attributes #0 = { nounwind "target-cpu"="a2q" }
--- a/test/CodeGen/PowerPC/qpx-unalperm.ll
+++ b/test/CodeGen/PowerPC/qpx-unalperm.ll
@ -1,64 +0,0 @@
 ; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 define <4 x double> @foo(<4 x double>* %a) {
 entry:
  %r = load <4 x double>, <4 x double>* %a, align 32
  ret <4 x double> %r
 ; CHECK: qvlfdx
 ; CHECK: blr
 }
 define <4 x double> @bar(<4 x double>* %a) {
 entry:
  %r = load <4 x double>, <4 x double>* %a, align 8
  %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
  %s = load <4 x double>, <4 x double>* %b, align 32
  %t = fadd <4 x double> %r, %s
  ret <4 x double> %t
 ; CHECK: qvlpcldx
 ; CHECK: qvlfdx
 ; CHECK: qvfperm
 ; CHECK: blr
 }
 define <4 x double> @bar1(<4 x double>* %a) {
 entry:
  %r = load <4 x double>, <4 x double>* %a, align 8
  %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
  %s = load <4 x double>, <4 x double>* %b, align 8
  %t = fadd <4 x double> %r, %s
  ret <4 x double> %t
 }
 define <4 x double> @bar2(<4 x double>* %a) {
 entry:
  %r = load <4 x double>, <4 x double>* %a, align 8
  %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
  %s = load <4 x double>, <4 x double>* %b, align 32
  %t = fadd <4 x double> %r, %s
  ret <4 x double> %t
 }
 define <4 x double> @bar3(<4 x double>* %a) {
 entry:
  %r = load <4 x double>, <4 x double>* %a, align 8
  %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
  %s = load <4 x double>, <4 x double>* %b, align 8
  %t = fadd <4 x double> %r, %s
  ret <4 x double> %t
 }
 define <4 x double> @bar4(<4 x double>* %a) {
 entry:
  %r = load <4 x double>, <4 x double>* %a, align 8
  %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
  %s = load <4 x double>, <4 x double>* %b, align 8
  %c = getelementptr <4 x double>, <4 x double>* %b, i32 1
  %t = load <4 x double>, <4 x double>* %c, align 8
  %u = fadd <4 x double> %r, %s
  %v = fadd <4 x double> %u, %t
  ret <4 x double> %v
 }
--- a/test/CodeGen/PowerPC/rlwimi-and.ll
+++ b/test/CodeGen/PowerPC/rlwimi-and.ll
@ -1,6 +1,4 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
 define void @test() align 2 {
 entry:
--- a/test/CodeGen/PowerPC/rlwinm_rldicl_to_andi.mir
+++ b/test/CodeGen/PowerPC/rlwinm_rldicl_to_andi.mir
@ -60,7 +60,7 @@
    ret i64 %cond
  }
-  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
  !llvm.module.flags = !{!0, !1}
  !llvm.ident = !{!2}
--- a/test/CodeGen/PowerPC/s000-alias-misched.ll
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@ -1,7 +1,6 @@
-; RUN: llc -verify-machineinstrs < %s -enable-misched -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -enable-misched -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -enable-misched -enable-aa-sched-mi -mcpu=a2 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -enable-misched -enable-aa-sched-mi -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-bgq-linux"
@aa = external global [256 x [256 x double]], align 32
@bb = external global [256 x [256 x double]], align 32
--- a/test/CodeGen/PowerPC/select-i1-vs-i1.ll
+++ b/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@ -1225,576 +1225,5 @@ entry:
 ; CHECK: blr
 }
 define <4 x double> @testqv4doubleslt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doubleslt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doubleult(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doubleult
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doublesle(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doublesle
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doubleule(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doubleule
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doubleeq(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doubleeq
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: bclr 12, [[REG1]], 0
 ; CHECK: qvfmr 1, 6
 ; CHECK: blr
 }
 define <4 x double> @testqv4doublesge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doublesge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doubleuge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doubleuge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doublesgt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doublesgt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doubleugt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doubleugt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x double> @testqv4doublene(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
  ret <4 x double> %cond
 ; CHECK-LABEL: @testqv4doublene
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: bclr 12, [[REG1]], 0
 ; CHECK: qvfmr 1, 6
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatslt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatslt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatult(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatult
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatsle(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatsle
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatule(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatule
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floateq(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floateq
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: bclr 12, [[REG1]], 0
 ; CHECK: qvfmr 1, 6
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatsge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatsge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatuge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatuge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatsgt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatsgt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatugt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatugt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x float> @testqv4floatne(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
  ret <4 x float> %cond
 ; CHECK-LABEL: @testqv4floatne
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: bclr 12, [[REG1]], 0
 ; CHECK: qvfmr 1, 6
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1slt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1slt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1ult(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1ult
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1sle(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1sle
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1ule(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1ule
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1eq(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1eq
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: bclr 12, [[REG1]], 0
 ; CHECK: qvfmr 1, 6
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1sge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1sge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1uge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1uge
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB]]
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1sgt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1sgt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 4, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 4, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1ugt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1ugt
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK: bc 12, 2, .LBB[[BB1:[0-9_]+]]
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: bc 12, 2, .LBB[[BB2:[0-9_]+]]
 ; CHECK: .LBB[[BB1]]:
 ; CHECK: qvfmr 5, 6
 ; CHECK: .LBB[[BB2]]:
 ; CHECK: qvfmr 1, 5
 ; CHECK: blr
 }
 define <4 x i1> @testqv4i1ne(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
 entry:
  %cmp1 = fcmp oeq float %c3, %c4
  %cmp3tmp = fcmp oeq float %c1, %c2
  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
  ret <4 x i1> %cond
 ; CHECK-LABEL: @testqv4i1ne
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
 ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: bclr 12, [[REG1]], 0
 ; CHECK: qvfmr 1, 6
 ; CHECK: blr
 }
 attributes #0 = { nounwind readnone "target-cpu"="pwr7" }
 attributes #1 = { nounwind readnone "target-cpu"="a2q" }
--- a/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
+++ b/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux < %s
 ; Check that llc does not crash due to an illegal APInt operation
--- a/test/CodeGen/PowerPC/setcr_bc.mir
+++ b/test/CodeGen/PowerPC/setcr_bc.mir
@ -32,8 +32,8 @@
    ret i32 %call2.i.sink
  }
-  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
 ...
 ---
--- a/test/CodeGen/PowerPC/setcr_bc2.mir
+++ b/test/CodeGen/PowerPC/setcr_bc2.mir
@ -32,8 +32,8 @@
    ret i32 %call2.i.sink
  }
-  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
-  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
 ...
 ---
--- a/test/CodeGen/PowerPC/stwu-sched.ll
+++ b/test/CodeGen/PowerPC/stwu-sched.ll
@ -58,7 +58,7 @@ define void @initCombList(%0* nocapture, i32 signext) local_unnamed_addr #0 {
  ret void
 }
-attributes #0 = { norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
 !llvm.ident = !{!0}
--- a/test/CodeGen/PowerPC/unal-vec-ldst.ll
+++ b/test/CodeGen/PowerPC/unal-vec-ldst.ll
@ -327,72 +327,6 @@ entry:
 }
 define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
 ; CHECK-LABEL: test_l_qv4float:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 15
 ; CHECK-NEXT:    qvlpclsx 0, 0, 3
 ; CHECK-NEXT:    qvlfsx 1, 3, 4
 ; CHECK-NEXT:    qvlfsx 2, 0, 3
 ; CHECK-NEXT:    qvfperm 1, 2, 1, 0
 ; CHECK-NEXT:    blr
 entry:
  %r = load <4 x float>, <4 x float>* %p, align 4
  ret <4 x float> %r
 }
 define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
 ; CHECK-LABEL: test_l_qv8float:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 31
 ; CHECK-NEXT:    qvlpclsx 1, 0, 3
 ; CHECK-NEXT:    qvlfsx 0, 3, 4
 ; CHECK-NEXT:    li 4, 16
 ; CHECK-NEXT:    qvlfsx 3, 3, 4
 ; CHECK-NEXT:    qvlfsx 4, 0, 3
 ; CHECK-NEXT:    qvfperm 2, 3, 0, 1
 ; CHECK-NEXT:    qvfperm 1, 4, 3, 1
 ; CHECK-NEXT:    blr
 entry:
  %r = load <8 x float>, <8 x float>* %p, align 4
  ret <8 x float> %r
 }
 define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
 ; CHECK-LABEL: test_l_qv4double:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 31
 ; CHECK-NEXT:    qvlpcldx 0, 0, 3
 ; CHECK-NEXT:    qvlfdx 1, 3, 4
 ; CHECK-NEXT:    qvlfdx 2, 0, 3
 ; CHECK-NEXT:    qvfperm 1, 2, 1, 0
 ; CHECK-NEXT:    blr
 entry:
  %r = load <4 x double>, <4 x double>* %p, align 8
  ret <4 x double> %r
 }
 define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
 ; CHECK-LABEL: test_l_qv8double:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 63
 ; CHECK-NEXT:    qvlpcldx 1, 0, 3
 ; CHECK-NEXT:    qvlfdx 0, 3, 4
 ; CHECK-NEXT:    li 4, 32
 ; CHECK-NEXT:    qvlfdx 3, 3, 4
 ; CHECK-NEXT:    qvlfdx 4, 0, 3
 ; CHECK-NEXT:    qvfperm 2, 3, 0, 1
 ; CHECK-NEXT:    qvfperm 1, 4, 3, 1
 ; CHECK-NEXT:    blr
 entry:
  %r = load <8 x double>, <8 x double>* %p, align 8
  ret <8 x double> %r
 }
 define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
 ; CHECK-LABEL: test_s_v16i8:
 ; CHECK:       # %bb.0: # %entry
@ -537,89 +471,6 @@ entry:
 }
 define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
 ; CHECK-LABEL: test_s_qv4float:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 0, 1, 3
 ; CHECK-NEXT:    stfs 1, 0(3)
 ; CHECK-NEXT:    stfs 0, 12(3)
 ; CHECK-NEXT:    qvesplati 0, 1, 2
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    stfs 0, 8(3)
 ; CHECK-NEXT:    stfs 1, 4(3)
 ; CHECK-NEXT:    blr
 entry:
  store <4 x float> %v, <4 x float>* %p, align 4
  ret void
 }
 define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
 ; CHECK-LABEL: test_s_qv8float:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 0, 2, 3
 ; CHECK-NEXT:    stfs 2, 16(3)
 ; CHECK-NEXT:    stfs 0, 28(3)
 ; CHECK-NEXT:    qvesplati 0, 2, 2
 ; CHECK-NEXT:    qvesplati 2, 2, 1
 ; CHECK-NEXT:    stfs 1, 0(3)
 ; CHECK-NEXT:    stfs 0, 24(3)
 ; CHECK-NEXT:    qvesplati 0, 1, 3
 ; CHECK-NEXT:    stfs 2, 20(3)
 ; CHECK-NEXT:    qvesplati 2, 1, 2
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    stfs 0, 12(3)
 ; CHECK-NEXT:    stfs 2, 8(3)
 ; CHECK-NEXT:    stfs 1, 4(3)
 ; CHECK-NEXT:    blr
 entry:
  store <8 x float> %v, <8 x float>* %p, align 4
  ret void
 }
 define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
 ; CHECK-LABEL: test_s_qv4double:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 0, 1, 3
 ; CHECK-NEXT:    stfd 1, 0(3)
 ; CHECK-NEXT:    stfd 0, 24(3)
 ; CHECK-NEXT:    qvesplati 0, 1, 2
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    stfd 0, 16(3)
 ; CHECK-NEXT:    stfd 1, 8(3)
 ; CHECK-NEXT:    blr
 entry:
  store <4 x double> %v, <4 x double>* %p, align 8
  ret void
 }
 define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
 ; CHECK-LABEL: test_s_qv8double:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    qvesplati 0, 2, 3
 ; CHECK-NEXT:    stfd 2, 32(3)
 ; CHECK-NEXT:    stfd 0, 56(3)
 ; CHECK-NEXT:    qvesplati 0, 2, 2
 ; CHECK-NEXT:    qvesplati 2, 2, 1
 ; CHECK-NEXT:    stfd 1, 0(3)
 ; CHECK-NEXT:    stfd 0, 48(3)
 ; CHECK-NEXT:    qvesplati 0, 1, 3
 ; CHECK-NEXT:    stfd 2, 40(3)
 ; CHECK-NEXT:    qvesplati 2, 1, 2
 ; CHECK-NEXT:    qvesplati 1, 1, 1
 ; CHECK-NEXT:    stfd 0, 24(3)
 ; CHECK-NEXT:    stfd 2, 16(3)
 ; CHECK-NEXT:    stfd 1, 8(3)
 ; CHECK-NEXT:    blr
 entry:
  store <8 x double> %v, <8 x double>* %p, align 8
  ret void
 }
 attributes #0 = { nounwind "target-cpu"="pwr7" }
 attributes #1 = { nounwind "target-cpu"="a2q" }
 attributes #2 = { nounwind "target-cpu"="pwr8" }
--- a/Show More
+++ b/Show More
`@ -1,4 +1,4 @@`
	`; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s`	`; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux < %s`

	`; Check that llc does not crash due to an illegal APInt operation`	`; Check that llc does not crash due to an illegal APInt operation`