llvm-mirror/lib/Target/M68k/M68kISelLowering.cpp

//===-- M68kISelLowering.cpp - M68k DAG Lowering Impl ------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the interfaces that M68k uses to lower LLVM code into a
/// selection DAG.
///
//===----------------------------------------------------------------------===//

#include "M68kISelLowering.h"
#include "M68kCallingConv.h"
#include "M68kMachineFunction.h"
#include "M68kSubtarget.h"
#include "M68kTargetMachine.h"
#include "M68kTargetObjectFile.h"

#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

#define DEBUG_TYPE "M68k-isel"

STATISTIC(NumTailCalls, "Number of tail calls");

M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
                                       const M68kSubtarget &STI)
    : TargetLowering(TM), Subtarget(STI), TM(TM) {

  MVT PtrVT = MVT::i32;

  setBooleanContents(ZeroOrOneBooleanContent);

  auto *RegInfo = Subtarget.getRegisterInfo();
  setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());

  // Set up the register classes.
  addRegisterClass(MVT::i8, &M68k::DR8RegClass);
  addRegisterClass(MVT::i16, &M68k::XR16RegClass);
  addRegisterClass(MVT::i32, &M68k::XR32RegClass);

  for (auto VT : MVT::integer_valuetypes()) {
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
  }

  // We don't accept any truncstore of integer registers.
  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
  setTruncStoreAction(MVT::i64, MVT::i8, Expand);
  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
  setTruncStoreAction(MVT::i32, MVT::i8, Expand);
  setTruncStoreAction(MVT::i16, MVT::i8, Expand);

  setOperationAction(ISD::MUL, MVT::i8, Promote);
  setOperationAction(ISD::MUL, MVT::i16, Legal);
  if (Subtarget.atLeastM68020())
    setOperationAction(ISD::MUL, MVT::i32, Legal);
  else
    setOperationAction(ISD::MUL, MVT::i32, LibCall);
  setOperationAction(ISD::MUL, MVT::i64, LibCall);

  for (auto OP :
       {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::UDIVREM, ISD::SDIVREM,
        ISD::MULHS, ISD::MULHU, ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
    setOperationAction(OP, MVT::i8, Promote);
    setOperationAction(OP, MVT::i16, Legal);
    setOperationAction(OP, MVT::i32, LibCall);
  }

  for (auto OP : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
    setOperationAction(OP, MVT::i8, Expand);
    setOperationAction(OP, MVT::i16, Expand);
  }

  // FIXME It would be better to use a custom lowering
  for (auto OP : {ISD::SMULO, ISD::UMULO}) {
    setOperationAction(OP, MVT::i8, Expand);
    setOperationAction(OP, MVT::i16, Expand);
    setOperationAction(OP, MVT::i32, Expand);
  }

  // Add/Sub overflow ops with MVT::Glues are lowered to CCR dependences.
  for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
    setOperationAction(ISD::ADDC, VT, Custom);
    setOperationAction(ISD::ADDE, VT, Custom);
    setOperationAction(ISD::SUBC, VT, Custom);
    setOperationAction(ISD::SUBE, VT, Custom);
  }

  // SADDO and friends are legal with this setup, i hope
  for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
    setOperationAction(ISD::SADDO, VT, Custom);
    setOperationAction(ISD::UADDO, VT, Custom);
    setOperationAction(ISD::SSUBO, VT, Custom);
    setOperationAction(ISD::USUBO, VT, Custom);
  }

  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
  setOperationAction(ISD::BRCOND, MVT::Other, Custom);

  for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
    setOperationAction(ISD::BR_CC, VT, Expand);
    setOperationAction(ISD::SELECT, VT, Custom);
    setOperationAction(ISD::SELECT_CC, VT, Expand);
    setOperationAction(ISD::SETCC, VT, Custom);
    setOperationAction(ISD::SETCCCARRY, VT, Custom);
  }

  for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
    setOperationAction(ISD::BSWAP, VT, Expand);
    setOperationAction(ISD::CTTZ, VT, Expand);
    setOperationAction(ISD::CTLZ, VT, Expand);
    setOperationAction(ISD::CTPOP, VT, Expand);
  }

  setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
  setOperationAction(ISD::ExternalSymbol, MVT::i32, Custom);
  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);

  setOperationAction(ISD::VASTART, MVT::Other, Custom);
  setOperationAction(ISD::VAEND, MVT::Other, Expand);
  setOperationAction(ISD::VAARG, MVT::Other, Expand);
  setOperationAction(ISD::VACOPY, MVT::Other, Expand);

  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

  setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);

  computeRegisterProperties(STI.getRegisterInfo());

  // 2^2 bytes
  // FIXME can it be just 2^1?
  setMinFunctionAlignment(Align::Constant<2>());
}

EVT M68kTargetLowering::getSetCCResultType(const DataLayout &DL,
                                           LLVMContext &Context, EVT VT) const {
  // M68k SETcc producess either 0x00 or 0xFF
  return MVT::i8;
}

MVT M68kTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
                                               EVT Ty) const {
  if (Ty.isSimple()) {
    return Ty.getSimpleVT();
  }
  return MVT::getIntegerVT(8 * DL.getPointerSize(0));
}

#include "M68kGenCallingConv.inc"

enum StructReturnType { NotStructReturn, RegStructReturn, StackStructReturn };

static StructReturnType
callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
  if (Outs.empty())
    return NotStructReturn;

  const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
  if (!Flags.isSRet())
    return NotStructReturn;
  if (Flags.isInReg())
    return RegStructReturn;
  return StackStructReturn;
}

/// Determines whether a function uses struct return semantics.
static StructReturnType
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
  if (Ins.empty())
    return NotStructReturn;

  const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
  if (!Flags.isSRet())
    return NotStructReturn;
  if (Flags.isInReg())
    return RegStructReturn;
  return StackStructReturn;
}

/// Make a copy of an aggregate at address specified by "Src" to address
/// "Dst" with size and alignment information specified by the specific
/// parameter attribute. The copy will be passed as a byval function parameter.
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
                                         SDValue Chain, ISD::ArgFlagsTy Flags,
                                         SelectionDAG &DAG, const SDLoc &DL) {
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), DL, MVT::i32);

  return DAG.getMemcpy(
      Chain, DL, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
      /*isVolatile=*/false, /*AlwaysInline=*/true,
      /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
}

/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC) { return false; }

/// Return true if we might ever do TCO for calls with this calling convention.
static bool mayTailCallThisCC(CallingConv::ID CC) {
  switch (CC) {
  // C calling conventions:
  case CallingConv::C:
    return true;
  default:
    return canGuaranteeTCO(CC);
  }
}

/// Return true if the function is being made into a tailcall target by
/// changing its ABI.
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
  return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
}

/// Return true if the given stack call argument is already available in the
/// same position (relatively) of the caller's incoming argument stack.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset,
                                ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI,
                                const MachineRegisterInfo *MRI,
                                const M68kInstrInfo *TII,
                                const CCValAssign &VA) {
  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;

  for (;;) {
    // Look through nodes that don't alter the bits of the incoming value.
    unsigned Op = Arg.getOpcode();
    if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
      Arg = Arg.getOperand(0);
      continue;
    }
    if (Op == ISD::TRUNCATE) {
      const SDValue &TruncInput = Arg.getOperand(0);
      if (TruncInput.getOpcode() == ISD::AssertZext &&
          cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
              Arg.getValueType()) {
        Arg = TruncInput.getOperand(0);
        continue;
      }
    }
    break;
  }

  int FI = INT_MAX;
  if (Arg.getOpcode() == ISD::CopyFromReg) {
    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
    if (!Register::isVirtualRegister(VR))
      return false;
    MachineInstr *Def = MRI->getVRegDef(VR);
    if (!Def)
      return false;
    if (!Flags.isByVal()) {
      if (!TII->isLoadFromStackSlot(*Def, FI))
        return false;
    } else {
      unsigned Opcode = Def->getOpcode();
      if ((Opcode == M68k::LEA32p || Opcode == M68k::LEA32f) &&
          Def->getOperand(1).isFI()) {
        FI = Def->getOperand(1).getIndex();
        Bytes = Flags.getByValSize();
      } else
        return false;
    }
  } else if (auto *Ld = dyn_cast<LoadSDNode>(Arg)) {
    if (Flags.isByVal())
      // ByVal argument is passed in as a pointer but it's now being
      // dereferenced. e.g.
      // define @foo(%struct.X* %A) {
      //   tail call @bar(%struct.X* byval %A)
      // }
      return false;
    SDValue Ptr = Ld->getBasePtr();
    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
    if (!FINode)
      return false;
    FI = FINode->getIndex();
  } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
    FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
    FI = FINode->getIndex();
    Bytes = Flags.getByValSize();
  } else
    return false;

  assert(FI != INT_MAX);
  if (!MFI.isFixedObjectIndex(FI))
    return false;

  if (Offset != MFI.getObjectOffset(FI))
    return false;

  if (VA.getLocVT().getSizeInBits() > Arg.getValueType().getSizeInBits()) {
    // If the argument location is wider than the argument type, check that any
    // extension flags match.
    if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
        Flags.isSExt() != MFI.isObjectSExt(FI)) {
      return false;
    }
  }

  return Bytes == MFI.getObjectSize(FI);
}

SDValue
M68kTargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
  MachineFunction &MF = DAG.getMachineFunction();
  M68kMachineFunctionInfo *FuncInfo = MF.getInfo<M68kMachineFunctionInfo>();
  int ReturnAddrIndex = FuncInfo->getRAIndex();

  if (ReturnAddrIndex == 0) {
    // Set up a frame object for the return address.
    unsigned SlotSize = Subtarget.getSlotSize();
    ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(
        SlotSize, -(int64_t)SlotSize, false);
    FuncInfo->setRAIndex(ReturnAddrIndex);
  }

  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
}

SDValue M68kTargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
                                                    SDValue &OutRetAddr,
                                                    SDValue Chain,
                                                    bool IsTailCall, int FPDiff,
                                                    const SDLoc &DL) const {
  EVT VT = getPointerTy(DAG.getDataLayout());
  OutRetAddr = getReturnAddressFrameIndex(DAG);

  // Load the "old" Return address.
  OutRetAddr = DAG.getLoad(VT, DL, Chain, OutRetAddr, MachinePointerInfo());
  return SDValue(OutRetAddr.getNode(), 1);
}

SDValue M68kTargetLowering::EmitTailCallStoreRetAddr(
    SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetFI,
    EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &DL) const {
  if (!FPDiff)
    return Chain;

  // Calculate the new stack slot for the return address.
  int NewFO = MF.getFrameInfo().CreateFixedObject(
      SlotSize, (int64_t)FPDiff - SlotSize, false);

  SDValue NewFI = DAG.getFrameIndex(NewFO, PtrVT);
  // Store the return address to the appropriate stack slot.
  Chain = DAG.getStore(
      Chain, DL, RetFI, NewFI,
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFO));
  return Chain;
}

SDValue
M68kTargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     const SDLoc &DL, SelectionDAG &DAG,
                                     const CCValAssign &VA,
                                     MachineFrameInfo &MFI,
                                     unsigned ArgIdx) const {
  // Create the nodes corresponding to a load from this parameter slot.
  ISD::ArgFlagsTy Flags = Ins[ArgIdx].Flags;
  EVT ValVT;

  // If value is passed by pointer we have address passed instead of the value
  // itself.
  if (VA.getLocInfo() == CCValAssign::Indirect)
    ValVT = VA.getLocVT();
  else
    ValVT = VA.getValVT();

  // Because we are dealing with BE architecture we need to offset loading of
  // partial types
  int Offset = VA.getLocMemOffset();
  if (VA.getValVT() == MVT::i8) {
    Offset += 3;
  } else if (VA.getValVT() == MVT::i16) {
    Offset += 2;
  }

  // TODO Interrupt handlers
  // Calculate SP offset of interrupt parameter, re-arrange the slot normally
  // taken by a return address.

  // FIXME For now, all byval parameter objects are marked mutable. This can
  // be changed with more analysis. In case of tail call optimization mark all
  // arguments mutable. Since they could be overwritten by lowering of arguments
  // in case of a tail call.
  bool AlwaysUseMutable = shouldGuaranteeTCO(
      CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
  bool IsImmutable = !AlwaysUseMutable && !Flags.isByVal();

  if (Flags.isByVal()) {
    unsigned Bytes = Flags.getByValSize();
    if (Bytes == 0)
      Bytes = 1; // Don't create zero-sized stack objects.
    int FI = MFI.CreateFixedObject(Bytes, Offset, IsImmutable);
    // TODO Interrupt handlers
    // Adjust SP offset of interrupt parameter.
    return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
  } else {
    int FI =
        MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, Offset, IsImmutable);

    // Set SExt or ZExt flag.
    if (VA.getLocInfo() == CCValAssign::ZExt) {
      MFI.setObjectZExt(FI, true);
    } else if (VA.getLocInfo() == CCValAssign::SExt) {
      MFI.setObjectSExt(FI, true);
    }

    // TODO Interrupt handlers
    // Adjust SP offset of interrupt parameter.

    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
    SDValue Val = DAG.getLoad(
        ValVT, DL, Chain, FIN,
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
    return VA.isExtInLoc() ? DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val)
                           : Val;
  }
}

SDValue M68kTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
                                             SDValue Arg, const SDLoc &DL,
                                             SelectionDAG &DAG,
                                             const CCValAssign &VA,
                                             ISD::ArgFlagsTy Flags) const {
  unsigned LocMemOffset = VA.getLocMemOffset();
  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, DL);
  PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
                       StackPtr, PtrOff);
  if (Flags.isByVal())
    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, DL);

  return DAG.getStore(
      Chain, DL, Arg, PtrOff,
      MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
}

//===----------------------------------------------------------------------===//
//                                   Call
//===----------------------------------------------------------------------===//

SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                      SmallVectorImpl<SDValue> &InVals) const {
  SelectionDAG &DAG = CLI.DAG;
  SDLoc &DL = CLI.DL;
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
  SDValue Chain = CLI.Chain;
  SDValue Callee = CLI.Callee;
  CallingConv::ID CallConv = CLI.CallConv;
  bool &IsTailCall = CLI.IsTailCall;
  bool IsVarArg = CLI.IsVarArg;

  MachineFunction &MF = DAG.getMachineFunction();
  StructReturnType SR = callIsStructReturn(Outs);
  bool IsSibcall = false;
  M68kMachineFunctionInfo *MFI = MF.getInfo<M68kMachineFunctionInfo>();
  // const M68kRegisterInfo *TRI = Subtarget.getRegisterInfo();

  if (CallConv == CallingConv::M68k_INTR)
    report_fatal_error("M68k interrupts may not be called directly");

  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
  if (Attr.getValueAsBool())
    IsTailCall = false;

  // FIXME Add tailcalls support

  bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
  if (IsMustTail) {
    // Force this to be a tail call.  The verifier rules are enough to ensure
    // that we can lower this successfully without moving the return address
    // around.
    IsTailCall = true;
  } else if (IsTailCall) {
    // Check if it's really possible to do a tail call.
    IsTailCall = IsEligibleForTailCallOptimization(
        Callee, CallConv, IsVarArg, SR != NotStructReturn,
        MF.getFunction().hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins,
        DAG);

    // Sibcalls are automatically detected tailcalls which do not require
    // ABI changes.
    if (!MF.getTarget().Options.GuaranteedTailCallOpt && IsTailCall)
      IsSibcall = true;

    if (IsTailCall)
      ++NumTailCalls;
  }

  assert(!(IsVarArg && canGuaranteeTCO(CallConv)) &&
         "Var args not supported with calling convention fastcc");

  // Analyze operands of the call, assigning locations to each operand.
  SmallVector<CCValAssign, 16> ArgLocs;
  // It is empty for LibCall
  const Function *CalleeFunc = CLI.CB ? CLI.CB->getCalledFunction() : nullptr;
  M68kCCState CCInfo(*CalleeFunc, CallConv, IsVarArg, MF, ArgLocs,
                     *DAG.getContext());
  CCInfo.AnalyzeCallOperands(Outs, CC_M68k);

  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
  if (IsSibcall) {
    // This is a sibcall. The memory operands are available in caller's
    // own caller's stack.
    NumBytes = 0;
  } else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
             canGuaranteeTCO(CallConv)) {
    NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
  }

  int FPDiff = 0;
  if (IsTailCall && !IsSibcall && !IsMustTail) {
    // Lower arguments at fp - stackoffset + fpdiff.
    unsigned NumBytesCallerPushed = MFI->getBytesToPopOnReturn();

    FPDiff = NumBytesCallerPushed - NumBytes;

    // Set the delta of movement of the returnaddr stackslot.
    // But only set if delta is greater than previous delta.
    if (FPDiff < MFI->getTCReturnAddrDelta())
      MFI->setTCReturnAddrDelta(FPDiff);
  }

  unsigned NumBytesToPush = NumBytes;
  unsigned NumBytesToPop = NumBytes;

  // If we have an inalloca argument, all stack space has already been allocated
  // for us and be right at the top of the stack.  We don't support multiple
  // arguments passed in memory when using inalloca.
  if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
    NumBytesToPush = 0;
    if (!ArgLocs.back().isMemLoc())
      report_fatal_error("cannot use inalloca attribute on a register "
                         "parameter");
    if (ArgLocs.back().getLocMemOffset() != 0)
      report_fatal_error("any parameter with the inalloca attribute must be "
                         "the only memory argument");
  }

  if (!IsSibcall)
    Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
                                 NumBytes - NumBytesToPush, DL);

  SDValue RetFI;
  // Load return address for tail calls.
  if (IsTailCall && FPDiff)
    Chain = EmitTailCallLoadRetAddr(DAG, RetFI, Chain, IsTailCall, FPDiff, DL);

  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
  SmallVector<SDValue, 8> MemOpChains;
  SDValue StackPtr;

  // Walk the register/memloc assignments, inserting copies/loads.  In the case
  // of tail call optimization arguments are handle later.
  const M68kRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    ISD::ArgFlagsTy Flags = Outs[i].Flags;

    // Skip inalloca arguments, they have already been written.
    if (Flags.isInAlloca())
      continue;

    CCValAssign &VA = ArgLocs[i];
    EVT RegVT = VA.getLocVT();
    SDValue Arg = OutVals[i];
    bool IsByVal = Flags.isByVal();

    // Promote the value if needed.
    switch (VA.getLocInfo()) {
    default:
      llvm_unreachable("Unknown loc info!");
    case CCValAssign::Full:
      break;
    case CCValAssign::SExt:
      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg);
      break;
    case CCValAssign::ZExt:
      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg);
      break;
    case CCValAssign::AExt:
      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg);
      break;
    case CCValAssign::BCvt:
      Arg = DAG.getBitcast(RegVT, Arg);
      break;
    case CCValAssign::Indirect: {
      // Store the argument.
      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
      Chain = DAG.getStore(
          Chain, DL, Arg, SpillSlot,
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
      Arg = SpillSlot;
      break;
    }
    }

    if (VA.isRegLoc()) {
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else if (!IsSibcall && (!IsTailCall || IsByVal)) {
      assert(VA.isMemLoc());
      if (!StackPtr.getNode()) {
        StackPtr = DAG.getCopyFromReg(Chain, DL, RegInfo->getStackRegister(),
                                      getPointerTy(DAG.getDataLayout()));
      }
      MemOpChains.push_back(
          LowerMemOpCallTo(Chain, StackPtr, Arg, DL, DAG, VA, Flags));
    }
  }

  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);

  // FIXME Make sure PIC style GOT works as expected
  // The only time GOT is really needed is for Medium-PIC static data
  // otherwise we are happy with pc-rel or static references

  if (IsVarArg && IsMustTail) {
    const auto &Forwards = MFI->getForwardedMustTailRegParms();
    for (const auto &F : Forwards) {
      SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
      RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
    }
  }

  // For tail calls lower the arguments to the 'real' stack slots.  Sibcalls
  // don't need this because the eligibility check rejects calls that require
  // shuffling arguments passed in memory.
  if (!IsSibcall && IsTailCall) {
    // Force all the incoming stack arguments to be loaded from the stack
    // before any new outgoing arguments are stored to the stack, because the
    // outgoing stack slots may alias the incoming argument stack slots, and
    // the alias isn't otherwise explicit. This is slightly more conservative
    // than necessary, because it means that each store effectively depends
    // on every argument instead of just those arguments it would clobber.
    SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);

    SmallVector<SDValue, 8> MemOpChains2;
    SDValue FIN;
    int FI = 0;
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
      CCValAssign &VA = ArgLocs[i];
      if (VA.isRegLoc())
        continue;
      assert(VA.isMemLoc());
      SDValue Arg = OutVals[i];
      ISD::ArgFlagsTy Flags = Outs[i].Flags;
      // Skip inalloca arguments.  They don't require any work.
      if (Flags.isInAlloca())
        continue;
      // Create frame index.
      int32_t Offset = VA.getLocMemOffset() + FPDiff;
      uint32_t OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8;
      FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
      FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

      if (Flags.isByVal()) {
        // Copy relative to framepointer.
        SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), DL);
        if (!StackPtr.getNode()) {
          StackPtr = DAG.getCopyFromReg(Chain, DL, RegInfo->getStackRegister(),
                                        getPointerTy(DAG.getDataLayout()));
        }
        Source = DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
                             StackPtr, Source);

        MemOpChains2.push_back(
            CreateCopyOfByValArgument(Source, FIN, ArgChain, Flags, DAG, DL));
      } else {
        // Store relative to framepointer.
        MemOpChains2.push_back(DAG.getStore(
            ArgChain, DL, Arg, FIN,
            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
      }
    }

    if (!MemOpChains2.empty())
      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains2);

    // Store the return address to the appropriate stack slot.
    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetFI,
                                     getPointerTy(DAG.getDataLayout()),
                                     Subtarget.getSlotSize(), FPDiff, DL);
  }

  // Build a sequence of copy-to-reg nodes chained together with token chain
  // and flag operands which copy the outgoing args into registers.
  SDValue InFlag;
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
                             RegsToPass[i].second, InFlag);
    InFlag = Chain.getValue(1);
  }

  if (Callee->getOpcode() == ISD::GlobalAddress) {
    // If the callee is a GlobalAddress node (quite common, every direct call
    // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
    // it.
    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);

    // We should use extra load for direct calls to dllimported functions in
    // non-JIT mode.
    const GlobalValue *GV = G->getGlobal();
    if (!GV->hasDLLImportStorageClass()) {
      unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);

      Callee = DAG.getTargetGlobalAddress(
          GV, DL, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);

      if (OpFlags == M68kII::MO_GOTPCREL) {

        // Add a wrapper.
        Callee = DAG.getNode(M68kISD::WrapperPC, DL,
                             getPointerTy(DAG.getDataLayout()), Callee);

        // Add extra indirection
        Callee = DAG.getLoad(
            getPointerTy(DAG.getDataLayout()), DL, DAG.getEntryNode(), Callee,
            MachinePointerInfo::getGOT(DAG.getMachineFunction()));
      }
    }
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
    unsigned char OpFlags =
        Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);

    Callee = DAG.getTargetExternalSymbol(
        S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
  }

  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  SmallVector<SDValue, 8> Ops;

  if (!IsSibcall && IsTailCall) {
    Chain = DAG.getCALLSEQ_END(Chain,
                               DAG.getIntPtrConstant(NumBytesToPop, DL, true),
                               DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
    InFlag = Chain.getValue(1);
  }

  Ops.push_back(Chain);
  Ops.push_back(Callee);

  if (IsTailCall)
    Ops.push_back(DAG.getConstant(FPDiff, DL, MVT::i32));

  // Add argument registers to the end of the list so that they are known live
  // into the call.
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                  RegsToPass[i].second.getValueType()));

  // Add a register mask operand representing the call-preserved registers.
  const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
  assert(Mask && "Missing call preserved mask for calling convention");

  Ops.push_back(DAG.getRegisterMask(Mask));

  if (InFlag.getNode())
    Ops.push_back(InFlag);

  if (IsTailCall) {
    MF.getFrameInfo().setHasTailCall();
    return DAG.getNode(M68kISD::TC_RETURN, DL, NodeTys, Ops);
  }

  Chain = DAG.getNode(M68kISD::CALL, DL, NodeTys, Ops);
  InFlag = Chain.getValue(1);

  // Create the CALLSEQ_END node.
  unsigned NumBytesForCalleeToPop;
  if (M68k::isCalleePop(CallConv, IsVarArg,
                        DAG.getTarget().Options.GuaranteedTailCallOpt)) {
    NumBytesForCalleeToPop = NumBytes; // Callee pops everything
  } else if (!canGuaranteeTCO(CallConv) && SR == StackStructReturn) {
    // If this is a call to a struct-return function, the callee
    // pops the hidden struct pointer, so we have to push it back.
    NumBytesForCalleeToPop = 4;
  } else {
    NumBytesForCalleeToPop = 0; // Callee pops nothing.
  }

  if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
    // No need to reset the stack after the call if the call doesn't return. To
    // make the MI verify, we'll pretend the callee does it for us.
    NumBytesForCalleeToPop = NumBytes;
  }

  // Returns a flag for retval copy to use.
  if (!IsSibcall) {
    Chain = DAG.getCALLSEQ_END(
        Chain, DAG.getIntPtrConstant(NumBytesToPop, DL, true),
        DAG.getIntPtrConstant(NumBytesForCalleeToPop, DL, true), InFlag, DL);
    InFlag = Chain.getValue(1);
  }

  // Handle result values, copying them out of physregs into vregs that we
  // return.
  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
                         InVals);
}

SDValue M68kTargetLowering::LowerCallResult(
    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

  // Assign locations to each value returned by this call.
  SmallVector<CCValAssign, 16> RVLocs;
  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
                 *DAG.getContext());
  CCInfo.AnalyzeCallResult(Ins, RetCC_M68k);

  // Copy all of the result registers out of their specified physreg.
  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
    CCValAssign &VA = RVLocs[i];
    EVT CopyVT = VA.getLocVT();

    /// ??? is this correct?
    Chain = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), CopyVT, InFlag)
                .getValue(1);
    SDValue Val = Chain.getValue(0);

    if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);

    InFlag = Chain.getValue(2);
    InVals.push_back(Val);
  }

  return Chain;
}

//===----------------------------------------------------------------------===//
//            Formal Arguments Calling Convention Implementation
//===----------------------------------------------------------------------===//

SDValue M68kTargetLowering::LowerFormalArguments(
    SDValue Chain, CallingConv::ID CCID, bool IsVarArg,
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
  MachineFunction &MF = DAG.getMachineFunction();
  M68kMachineFunctionInfo *MMFI = MF.getInfo<M68kMachineFunctionInfo>();
  // const TargetFrameLowering &TFL = *Subtarget.getFrameLowering();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  // Assign locations to all of the incoming arguments.
  SmallVector<CCValAssign, 16> ArgLocs;
  M68kCCState CCInfo(MF.getFunction(), CCID, IsVarArg, MF, ArgLocs,
                     *DAG.getContext());

  CCInfo.AnalyzeFormalArguments(Ins, CC_M68k);

  unsigned LastVal = ~0U;
  SDValue ArgValue;
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    assert(VA.getValNo() != LastVal && "Same value in different locations");

    LastVal = VA.getValNo();

    if (VA.isRegLoc()) {
      EVT RegVT = VA.getLocVT();
      const TargetRegisterClass *RC;
      if (RegVT == MVT::i32)
        RC = &M68k::XR32RegClass;
      else
        llvm_unreachable("Unknown argument type!");

      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
      ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);

      // If this is an 8 or 16-bit value, it is really passed promoted to 32
      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
      // right size.
      if (VA.getLocInfo() == CCValAssign::SExt) {
        ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
                               DAG.getValueType(VA.getValVT()));
      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
        ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
                               DAG.getValueType(VA.getValVT()));
      } else if (VA.getLocInfo() == CCValAssign::BCvt) {
        ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
      }

      if (VA.isExtInLoc()) {
        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
      }
    } else {
      assert(VA.isMemLoc());
      ArgValue = LowerMemArgument(Chain, CCID, Ins, DL, DAG, VA, MFI, i);
    }

    // If value is passed via pointer - do a load.
    // TODO Make sure this handling on indirect arguments is correct
    if (VA.getLocInfo() == CCValAssign::Indirect)
      ArgValue =
          DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo());

    InVals.push_back(ArgValue);
  }

  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    // Swift calling convention does not require we copy the sret argument
    // into %D0 for the return. We don't set SRetReturnReg for Swift.
    if (CCID == CallingConv::Swift)
      continue;

    // ABI require that for returning structs by value we copy the sret argument
    // into %D0 for the return. Save the argument into a virtual register so
    // that we can access it from the return points.
    if (Ins[i].Flags.isSRet()) {
      unsigned Reg = MMFI->getSRetReturnReg();
      if (!Reg) {
        MVT PtrTy = getPointerTy(DAG.getDataLayout());
        Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
        MMFI->setSRetReturnReg(Reg);
      }
      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
      break;
    }
  }

  unsigned StackSize = CCInfo.getNextStackOffset();
  // Align stack specially for tail calls.
  if (shouldGuaranteeTCO(CCID, MF.getTarget().Options.GuaranteedTailCallOpt))
    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);

  // If the function takes variable number of arguments, make a frame index for
  // the start of the first vararg value... for expansion of llvm.va_start. We
  // can skip this if there are no va_start calls.
  if (MFI.hasVAStart()) {
    MMFI->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
  }

  if (IsVarArg && MFI.hasMustTailInVarArgFunc()) {
    // We forward some GPRs and some vector types.
    SmallVector<MVT, 2> RegParmTypes;
    MVT IntVT = MVT::i32;
    RegParmTypes.push_back(IntVT);

    // Compute the set of forwarded registers. The rest are scratch.
    // ??? what is this for?
    SmallVectorImpl<ForwardedRegister> &Forwards =
        MMFI->getForwardedMustTailRegParms();
    CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_M68k);

    // Copy all forwards from physical to virtual registers.
    for (ForwardedRegister &F : Forwards) {
      // FIXME Can we use a less constrained schedule?
      SDValue RegVal = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
      F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
      Chain = DAG.getCopyToReg(Chain, DL, F.VReg, RegVal);
    }
  }

  // Some CCs need callee pop.
  if (M68k::isCalleePop(CCID, IsVarArg,
                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
    MMFI->setBytesToPopOnReturn(StackSize); // Callee pops everything.
  } else {
    MMFI->setBytesToPopOnReturn(0); // Callee pops nothing.
    // If this is an sret function, the return should pop the hidden pointer.
    if (!canGuaranteeTCO(CCID) && argsAreStructReturn(Ins) == StackStructReturn)
      MMFI->setBytesToPopOnReturn(4);
  }

  MMFI->setArgumentStackSize(StackSize);

  return Chain;
}

//===----------------------------------------------------------------------===//
//              Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//

SDValue
M68kTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CCID,
                                bool IsVarArg,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<SDValue> &OutVals,
                                const SDLoc &DL, SelectionDAG &DAG) const {
  MachineFunction &MF = DAG.getMachineFunction();
  M68kMachineFunctionInfo *MFI = MF.getInfo<M68kMachineFunctionInfo>();

  SmallVector<CCValAssign, 16> RVLocs;
  CCState CCInfo(CCID, IsVarArg, MF, RVLocs, *DAG.getContext());
  CCInfo.AnalyzeReturn(Outs, RetCC_M68k);

  SDValue Flag;
  SmallVector<SDValue, 6> RetOps;
  // Operand #0 = Chain (updated below)
  RetOps.push_back(Chain);
  // Operand #1 = Bytes To Pop
  RetOps.push_back(
      DAG.getTargetConstant(MFI->getBytesToPopOnReturn(), DL, MVT::i32));

  // Copy the result values into the output registers.
  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
    CCValAssign &VA = RVLocs[i];
    assert(VA.isRegLoc() && "Can only return in registers!");
    SDValue ValToCopy = OutVals[i];
    EVT ValVT = ValToCopy.getValueType();

    // Promote values to the appropriate types.
    if (VA.getLocInfo() == CCValAssign::SExt)
      ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), ValToCopy);
    else if (VA.getLocInfo() == CCValAssign::ZExt)
      ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), ValToCopy);
    else if (VA.getLocInfo() == CCValAssign::AExt) {
      if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
        ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), ValToCopy);
      else
        ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), ValToCopy);
    } else if (VA.getLocInfo() == CCValAssign::BCvt)
      ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);

    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), ValToCopy, Flag);
    Flag = Chain.getValue(1);
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
  }

  // Swift calling convention does not require we copy the sret argument
  // into %d0 for the return, and SRetReturnReg is not set for Swift.

  // ABI require that for returning structs by value we copy the sret argument
  // into %D0 for the return. Save the argument into a virtual register so that
  // we can access it from the return points.
  //
  // Checking Function.hasStructRetAttr() here is insufficient because the IR
  // may not have an explicit sret argument. If MFI.CanLowerReturn is
  // false, then an sret argument may be implicitly inserted in the SelDAG. In
  // either case MFI->setSRetReturnReg() will have been called.
  if (unsigned SRetReg = MFI->getSRetReturnReg()) {
    // ??? Can i just move this to the top and escape this explanation?
    // When we have both sret and another return value, we should use the
    // original Chain stored in RetOps[0], instead of the current Chain updated
    // in the above loop. If we only have sret, RetOps[0] equals to Chain.

    // For the case of sret and another return value, we have
    //   Chain_0 at the function entry
    //   Chain_1 = getCopyToReg(Chain_0) in the above loop
    // If we use Chain_1 in getCopyFromReg, we will have
    //   Val = getCopyFromReg(Chain_1)
    //   Chain_2 = getCopyToReg(Chain_1, Val) from below

    // getCopyToReg(Chain_0) will be glued together with
    // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
    // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
    //   Data dependency from Unit B to Unit A due to usage of Val in
    //     getCopyToReg(Chain_1, Val)
    //   Chain dependency from Unit A to Unit B

    // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
    SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
                                     getPointerTy(MF.getDataLayout()));

    // ??? How will this work if CC does not use registers for args passing?
    // ??? What if I return multiple structs?
    unsigned RetValReg = M68k::D0;
    Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
    Flag = Chain.getValue(1);

    RetOps.push_back(
        DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
  }

  RetOps[0] = Chain; // Update chain.

  // Add the flag if we have it.
  if (Flag.getNode())
    RetOps.push_back(Flag);

  return DAG.getNode(M68kISD::RET, DL, MVT::Other, RetOps);
}

//===----------------------------------------------------------------------===//
//                Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//

//  Like std call, callee cleans arguments, convention except that ECX is
//  reserved for storing the tail called function address. Only 2 registers are
//  free for argument passing (inreg). Tail call optimization is performed
//  provided:
//                * tailcallopt is enabled
//                * caller/callee are fastcc
//  On M68k_64 architecture with GOT-style position independent code only
//  local (within module) calls are supported at the moment. To keep the stack
//  aligned according to platform abi the function GetAlignedArgumentStackSize
//  ensures that argument delta is always multiples of stack alignment. (Dynamic
//  linkers need this - darwin's dyld for example) If a tail called function
//  callee has more arguments than the caller the caller needs to make sure that
//  there is room to move the RETADDR to. This is achieved by reserving an area
//  the size of the argument delta right after the original RETADDR, but before
//  the saved framepointer or the spilled registers e.g. caller(arg1, arg2)
//  calls callee(arg1, arg2,arg3,arg4) stack layout:
//    arg1
//    arg2
//    RETADDR
//    [ new RETADDR
//      move area ]
//    (possible EBP)
//    ESI
//    EDI
//    local1 ..

/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
/// requirement.
unsigned
M68kTargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
                                                SelectionDAG &DAG) const {
  const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
  unsigned StackAlignment = TFI.getStackAlignment();
  uint64_t AlignMask = StackAlignment - 1;
  int64_t Offset = StackSize;
  unsigned SlotSize = Subtarget.getSlotSize();
  if ((Offset & AlignMask) <= (StackAlignment - SlotSize)) {
    // Number smaller than 12 so just add the difference.
    Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
  } else {
    // Mask out lower bits, add stackalignment once plus the 12 bytes.
    Offset =
        ((~AlignMask) & Offset) + StackAlignment + (StackAlignment - SlotSize);
  }
  return Offset;
}

/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
bool M68kTargetLowering::IsEligibleForTailCallOptimization(
    SDValue Callee, CallingConv::ID CalleeCC, bool IsVarArg,
    bool IsCalleeStructRet, bool IsCallerStructRet, Type *RetTy,
    const SmallVectorImpl<ISD::OutputArg> &Outs,
    const SmallVectorImpl<SDValue> &OutVals,
    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
  if (!mayTailCallThisCC(CalleeCC))
    return false;

  // If -tailcallopt is specified, make fastcc functions tail-callable.
  MachineFunction &MF = DAG.getMachineFunction();
  const auto &CallerF = MF.getFunction();

  CallingConv::ID CallerCC = CallerF.getCallingConv();
  bool CCMatch = CallerCC == CalleeCC;

  if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
    if (canGuaranteeTCO(CalleeCC) && CCMatch)
      return true;
    return false;
  }

  // Look for obvious safe cases to perform tail call optimization that do not
  // require ABI changes. This is what gcc calls sibcall.

  // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
  // emit a special epilogue.
  const M68kRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
  if (RegInfo->hasStackRealignment(MF))
    return false;

  // Also avoid sibcall optimization if either caller or callee uses struct
  // return semantics.
  if (IsCalleeStructRet || IsCallerStructRet)
    return false;

  // Do not sibcall optimize vararg calls unless all arguments are passed via
  // registers.
  LLVMContext &C = *DAG.getContext();
  if (IsVarArg && !Outs.empty()) {

    SmallVector<CCValAssign, 16> ArgLocs;
    CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);

    CCInfo.AnalyzeCallOperands(Outs, CC_M68k);
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
      if (!ArgLocs[i].isRegLoc())
        return false;
  }

  // Check that the call results are passed in the same way.
  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, RetCC_M68k,
                                  RetCC_M68k))
    return false;

  // The callee has to preserve all registers the caller needs to preserve.
  const M68kRegisterInfo *TRI = Subtarget.getRegisterInfo();
  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
  if (!CCMatch) {
    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
      return false;
  }

  unsigned StackArgsSize = 0;

  // If the callee takes no arguments then go on to check the results of the
  // call.
  if (!Outs.empty()) {
    // Check if stack adjustment is needed. For now, do not do this if any
    // argument is passed on the stack.
    SmallVector<CCValAssign, 16> ArgLocs;
    CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);

    CCInfo.AnalyzeCallOperands(Outs, CC_M68k);
    StackArgsSize = CCInfo.getNextStackOffset();

    if (CCInfo.getNextStackOffset()) {
      // Check if the arguments are already laid out in the right way as
      // the caller's fixed stack objects.
      MachineFrameInfo &MFI = MF.getFrameInfo();
      const MachineRegisterInfo *MRI = &MF.getRegInfo();
      const M68kInstrInfo *TII = Subtarget.getInstrInfo();
      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
        CCValAssign &VA = ArgLocs[i];
        SDValue Arg = OutVals[i];
        ISD::ArgFlagsTy Flags = Outs[i].Flags;
        if (VA.getLocInfo() == CCValAssign::Indirect)
          return false;
        if (!VA.isRegLoc()) {
          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
                                   TII, VA))
            return false;
        }
      }
    }

    bool PositionIndependent = isPositionIndependent();
    // If the tailcall address may be in a register, then make sure it's
    // possible to register allocate for it. The call address can
    // only target %A0 or %A1 since the tail call must be scheduled after
    // callee-saved registers are restored. These happen to be the same
    // registers used to pass 'inreg' arguments so watch out for those.
    if ((!isa<GlobalAddressSDNode>(Callee) &&
         !isa<ExternalSymbolSDNode>(Callee)) ||
        PositionIndependent) {
      unsigned NumInRegs = 0;
      // In PIC we need an extra register to formulate the address computation
      // for the callee.
      unsigned MaxInRegs = PositionIndependent ? 1 : 2;

      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
        CCValAssign &VA = ArgLocs[i];
        if (!VA.isRegLoc())
          continue;
        unsigned Reg = VA.getLocReg();
        switch (Reg) {
        default:
          break;
        case M68k::A0:
        case M68k::A1:
          if (++NumInRegs == MaxInRegs)
            return false;
          break;
        }
      }
    }

    const MachineRegisterInfo &MRI = MF.getRegInfo();
    if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
      return false;
  }

  bool CalleeWillPop = M68k::isCalleePop(
      CalleeCC, IsVarArg, MF.getTarget().Options.GuaranteedTailCallOpt);

  if (unsigned BytesToPop =
          MF.getInfo<M68kMachineFunctionInfo>()->getBytesToPopOnReturn()) {
    // If we have bytes to pop, the callee must pop them.
    bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
    if (!CalleePopMatches)
      return false;
  } else if (CalleeWillPop && StackArgsSize > 0) {
    // If we don't have bytes to pop, make sure the callee doesn't pop any.
    return false;
  }

  return true;
}

//===----------------------------------------------------------------------===//
// Custom Lower
//===----------------------------------------------------------------------===//

SDValue M68kTargetLowering::LowerOperation(SDValue Op,
                                           SelectionDAG &DAG) const {
  switch (Op.getOpcode()) {
  default:
    llvm_unreachable("Should not custom lower this!");
  case ISD::SADDO:
  case ISD::UADDO:
  case ISD::SSUBO:
  case ISD::USUBO:
  case ISD::SMULO:
  case ISD::UMULO:
    return LowerXALUO(Op, DAG);
  case ISD::SETCC:
    return LowerSETCC(Op, DAG);
  case ISD::SETCCCARRY:
    return LowerSETCCCARRY(Op, DAG);
  case ISD::SELECT:
    return LowerSELECT(Op, DAG);
  case ISD::BRCOND:
    return LowerBRCOND(Op, DAG);
  case ISD::ADDC:
  case ISD::ADDE:
  case ISD::SUBC:
  case ISD::SUBE:
    return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
  case ISD::ConstantPool:
    return LowerConstantPool(Op, DAG);
  case ISD::GlobalAddress:
    return LowerGlobalAddress(Op, DAG);
  case ISD::ExternalSymbol:
    return LowerExternalSymbol(Op, DAG);
  case ISD::BlockAddress:
    return LowerBlockAddress(Op, DAG);
  case ISD::JumpTable:
    return LowerJumpTable(Op, DAG);
  case ISD::VASTART:
    return LowerVASTART(Op, DAG);
  case ISD::DYNAMIC_STACKALLOC:
    return LowerDYNAMIC_STACKALLOC(Op, DAG);
  }
}

bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
                                                SDValue C) const {
  // Shifts and add instructions in M68000 and M68010 support
  // up to 32 bits, but mul only has 16-bit variant. So it's almost
  // certainly beneficial to lower 8/16/32-bit mul to their
  // add / shifts counterparts. But for 64-bits mul, it might be
  // safer to just leave it to compiler runtime implementations.
  return VT.bitsLE(MVT::i32) || Subtarget.atLeastM68020();
}

SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
  // looks for this combo and may remove the "setcc" instruction if the "setcc"
  // has only one use.
  SDNode *N = Op.getNode();
  SDValue LHS = N->getOperand(0);
  SDValue RHS = N->getOperand(1);
  unsigned BaseOp = 0;
  unsigned Cond = 0;
  SDLoc DL(Op);
  switch (Op.getOpcode()) {
  default:
    llvm_unreachable("Unknown ovf instruction!");
  case ISD::SADDO:
    BaseOp = M68kISD::ADD;
    Cond = M68k::COND_VS;
    break;
  case ISD::UADDO:
    BaseOp = M68kISD::ADD;
    Cond = M68k::COND_CS;
    break;
  case ISD::SSUBO:
    BaseOp = M68kISD::SUB;
    Cond = M68k::COND_VS;
    break;
  case ISD::USUBO:
    BaseOp = M68kISD::SUB;
    Cond = M68k::COND_CS;
    break;
  }

  // Also sets CCR.
  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i8);
  SDValue Arith = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
  SDValue SetCC = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
                              DAG.getConstant(Cond, DL, MVT::i8),
                              SDValue(Arith.getNode(), 1));

  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Arith, SetCC);
}

/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
/// according to equal/not-equal condition code \p CC.
static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
                                   const SDLoc &DL, SelectionDAG &DAG) {
  // If Src is i8, promote it to i32 with any_extend.  There is no i8 BT
  // instruction.  Since the shift amount is in-range-or-undefined, we know
  // that doing a bittest on the i32 value is ok.
  if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
    Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);

  // If the operand types disagree, extend the shift amount to match.  Since
  // BT ignores high bits (like shifts) we can use anyextend.
  if (Src.getValueType() != BitNo.getValueType())
    BitNo = DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), BitNo);

  SDValue BT = DAG.getNode(M68kISD::BT, DL, MVT::i32, Src, BitNo);

  // NOTE BTST sets CCR.Z flag
  M68k::CondCode Cond = CC == ISD::SETEQ ? M68k::COND_NE : M68k::COND_EQ;
  return DAG.getNode(M68kISD::SETCC, DL, MVT::i8,
                     DAG.getConstant(Cond, DL, MVT::i8), BT);
}

/// Result of 'and' is compared against zero. Change to a BT node if possible.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &DL,
                            SelectionDAG &DAG) {
  SDValue Op0 = And.getOperand(0);
  SDValue Op1 = And.getOperand(1);
  if (Op0.getOpcode() == ISD::TRUNCATE)
    Op0 = Op0.getOperand(0);
  if (Op1.getOpcode() == ISD::TRUNCATE)
    Op1 = Op1.getOperand(0);

  SDValue LHS, RHS;
  if (Op1.getOpcode() == ISD::SHL)
    std::swap(Op0, Op1);
  if (Op0.getOpcode() == ISD::SHL) {
    if (isOneConstant(Op0.getOperand(0))) {
      // If we looked past a truncate, check that it's only truncating away
      // known zeros.
      unsigned BitWidth = Op0.getValueSizeInBits();
      unsigned AndBitWidth = And.getValueSizeInBits();
      if (BitWidth > AndBitWidth) {
        auto Known = DAG.computeKnownBits(Op0);
        if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
          return SDValue();
      }
      LHS = Op1;
      RHS = Op0.getOperand(1);
    }
  } else if (auto *AndRHS = dyn_cast<ConstantSDNode>(Op1)) {
    uint64_t AndRHSVal = AndRHS->getZExtValue();
    SDValue AndLHS = Op0;

    if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
      LHS = AndLHS.getOperand(0);
      RHS = AndLHS.getOperand(1);
    }

    // Use BT if the immediate can't be encoded in a TEST instruction.
    if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
      LHS = AndLHS;
      RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), DL, LHS.getValueType());
    }
  }

  if (LHS.getNode())
    return getBitTestCondition(LHS, RHS, CC, DL, DAG);

  return SDValue();
}

static M68k::CondCode TranslateIntegerM68kCC(ISD::CondCode SetCCOpcode) {
  switch (SetCCOpcode) {
  default:
    llvm_unreachable("Invalid integer condition!");
  case ISD::SETEQ:
    return M68k::COND_EQ;
  case ISD::SETGT:
    return M68k::COND_GT;
  case ISD::SETGE:
    return M68k::COND_GE;
  case ISD::SETLT:
    return M68k::COND_LT;
  case ISD::SETLE:
    return M68k::COND_LE;
  case ISD::SETNE:
    return M68k::COND_NE;
  case ISD::SETULT:
    return M68k::COND_CS;
  case ISD::SETUGE:
    return M68k::COND_CC;
  case ISD::SETUGT:
    return M68k::COND_HI;
  case ISD::SETULE:
    return M68k::COND_LS;
  }
}

/// Do a one-to-one translation of a ISD::CondCode to the M68k-specific
/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static unsigned TranslateM68kCC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
                                bool IsFP, SDValue &LHS, SDValue &RHS,
                                SelectionDAG &DAG) {
  if (!IsFP) {
    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
        // X > -1   -> X == 0, jump !sign.
        RHS = DAG.getConstant(0, DL, RHS.getValueType());
        return M68k::COND_PL;
      }
      if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
        // X < 0   -> X == 0, jump on sign.
        return M68k::COND_MI;
      }
      if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
        // X < 1   -> X <= 0
        RHS = DAG.getConstant(0, DL, RHS.getValueType());
        return M68k::COND_LE;
      }
    }

    return TranslateIntegerM68kCC(SetCCOpcode);
  }

  // First determine if it is required or is profitable to flip the operands.

  // If LHS is a foldable load, but RHS is not, flip the condition.
  if (ISD::isNON_EXTLoad(LHS.getNode()) && !ISD::isNON_EXTLoad(RHS.getNode())) {
    SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
    std::swap(LHS, RHS);
  }

  switch (SetCCOpcode) {
  default:
    break;
  case ISD::SETOLT:
  case ISD::SETOLE:
  case ISD::SETUGT:
  case ISD::SETUGE:
    std::swap(LHS, RHS);
    break;
  }

  // On a floating point condition, the flags are set as follows:
  // ZF  PF  CF   op
  //  0 | 0 | 0 | X > Y
  //  0 | 0 | 1 | X < Y
  //  1 | 0 | 0 | X == Y
  //  1 | 1 | 1 | unordered
  switch (SetCCOpcode) {
  default:
    llvm_unreachable("Condcode should be pre-legalized away");
  case ISD::SETUEQ:
  case ISD::SETEQ:
    return M68k::COND_EQ;
  case ISD::SETOLT: // flipped
  case ISD::SETOGT:
  case ISD::SETGT:
    return M68k::COND_HI;
  case ISD::SETOLE: // flipped
  case ISD::SETOGE:
  case ISD::SETGE:
    return M68k::COND_CC;
  case ISD::SETUGT: // flipped
  case ISD::SETULT:
  case ISD::SETLT:
    return M68k::COND_CS;
  case ISD::SETUGE: // flipped
  case ISD::SETULE:
  case ISD::SETLE:
    return M68k::COND_LS;
  case ISD::SETONE:
  case ISD::SETNE:
    return M68k::COND_NE;
  case ISD::SETOEQ:
  case ISD::SETUNE:
    return M68k::COND_INVALID;
  }
}

// Convert (truncate (srl X, N) to i1) to (bt X, N)
static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC, const SDLoc &DL,
                                 SelectionDAG &DAG) {

  assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
         "Expected TRUNCATE to i1 node");

  if (Op.getOperand(0).getOpcode() != ISD::SRL)
    return SDValue();

  SDValue ShiftRight = Op.getOperand(0);
  return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1),
                             CC, DL, DAG);
}

/// \brief return true if \c Op has a use that doesn't just read flags.
static bool hasNonFlagsUse(SDValue Op) {
  for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
       ++UI) {
    SDNode *User = *UI;
    unsigned UOpNo = UI.getOperandNo();
    if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
      // Look pass truncate.
      UOpNo = User->use_begin().getOperandNo();
      User = *User->use_begin();
    }

    if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
        !(User->getOpcode() == ISD::SELECT && UOpNo == 0))
      return true;
  }
  return false;
}

SDValue M68kTargetLowering::EmitTest(SDValue Op, unsigned M68kCC,
                                     const SDLoc &DL, SelectionDAG &DAG) const {

  // CF and OF aren't always set the way we want. Determine which
  // of these we need.
  bool NeedCF = false;
  bool NeedOF = false;
  switch (M68kCC) {
  default:
    break;
  case M68k::COND_HI:
  case M68k::COND_CC:
  case M68k::COND_CS:
  case M68k::COND_LS:
    NeedCF = true;
    break;
  case M68k::COND_GT:
  case M68k::COND_GE:
  case M68k::COND_LT:
  case M68k::COND_LE:
  case M68k::COND_VS:
  case M68k::COND_VC: {
    // Check if we really need to set the
    // Overflow flag. If NoSignedWrap is present
    // that is not actually needed.
    switch (Op->getOpcode()) {
    case ISD::ADD:
    case ISD::SUB:
    case ISD::MUL:
    case ISD::SHL: {
      if (Op.getNode()->getFlags().hasNoSignedWrap())
        break;
      LLVM_FALLTHROUGH;
    }
    default:
      NeedOF = true;
      break;
    }
    break;
  }
  }
  // See if we can use the CCR value from the operand instead of
  // doing a separate TEST. TEST always sets OF and CF to 0, so unless
  // we prove that the arithmetic won't overflow, we can't use OF or CF.
  if (Op.getResNo() != 0 || NeedOF || NeedCF) {
    // Emit a CMP with 0, which is the TEST pattern.
    return DAG.getNode(M68kISD::CMP, DL, MVT::i8,
                       DAG.getConstant(0, DL, Op.getValueType()), Op);
  }
  unsigned Opcode = 0;
  unsigned NumOperands = 0;

  // Truncate operations may prevent the merge of the SETCC instruction
  // and the arithmetic instruction before it. Attempt to truncate the operands
  // of the arithmetic instruction and use a reduced bit-width instruction.
  bool NeedTruncation = false;
  SDValue ArithOp = Op;
  if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
    SDValue Arith = Op->getOperand(0);
    // Both the trunc and the arithmetic op need to have one user each.
    if (Arith->hasOneUse())
      switch (Arith.getOpcode()) {
      default:
        break;
      case ISD::ADD:
      case ISD::SUB:
      case ISD::AND:
      case ISD::OR:
      case ISD::XOR: {
        NeedTruncation = true;
        ArithOp = Arith;
      }
      }
  }

  // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
  // which may be the result of a CAST.  We use the variable 'Op', which is the
  // non-casted variable when we check for possible users.
  switch (ArithOp.getOpcode()) {
  case ISD::ADD:
    Opcode = M68kISD::ADD;
    NumOperands = 2;
    break;
  case ISD::SHL:
  case ISD::SRL:
    // If we have a constant logical shift that's only used in a comparison
    // against zero turn it into an equivalent AND. This allows turning it into
    // a TEST instruction later.
    if ((M68kCC == M68k::COND_EQ || M68kCC == M68k::COND_NE) &&
        Op->hasOneUse() && isa<ConstantSDNode>(Op->getOperand(1)) &&
        !hasNonFlagsUse(Op)) {
      EVT VT = Op.getValueType();
      unsigned BitWidth = VT.getSizeInBits();
      unsigned ShAmt = Op->getConstantOperandVal(1);
      if (ShAmt >= BitWidth) // Avoid undefined shifts.
        break;
      APInt Mask = ArithOp.getOpcode() == ISD::SRL
                       ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
                       : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
      if (!Mask.isSignedIntN(32)) // Avoid large immediates.
        break;
      Op = DAG.getNode(ISD::AND, DL, VT, Op->getOperand(0),
                       DAG.getConstant(Mask, DL, VT));
    }
    break;

  case ISD::AND:
    // If the primary 'and' result isn't used, don't bother using
    // M68kISD::AND, because a TEST instruction will be better.
    if (!hasNonFlagsUse(Op)) {
      SDValue Op0 = ArithOp->getOperand(0);
      SDValue Op1 = ArithOp->getOperand(1);
      EVT VT = ArithOp.getValueType();
      bool IsAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
      bool IsLegalAndnType = VT == MVT::i32 || VT == MVT::i64;

      // But if we can combine this into an ANDN operation, then create an AND
      // now and allow it to be pattern matched into an ANDN.
      if (/*!Subtarget.hasBMI() ||*/ !IsAndn || !IsLegalAndnType)
        break;
    }
    LLVM_FALLTHROUGH;
  case ISD::SUB:
  case ISD::OR:
  case ISD::XOR:
    // Due to the ISEL shortcoming noted above, be conservative if this op is
    // likely to be selected as part of a load-modify-store instruction.
    for (const auto *U : Op.getNode()->uses())
      if (U->getOpcode() == ISD::STORE)
        goto default_case;

    // Otherwise use a regular CCR-setting instruction.
    switch (ArithOp.getOpcode()) {
    default:
      llvm_unreachable("unexpected operator!");
    case ISD::SUB:
      Opcode = M68kISD::SUB;
      break;
    case ISD::XOR:
      Opcode = M68kISD::XOR;
      break;
    case ISD::AND:
      Opcode = M68kISD::AND;
      break;
    case ISD::OR:
      Opcode = M68kISD::OR;
      break;
    }

    NumOperands = 2;
    break;
  case M68kISD::ADD:
  case M68kISD::SUB:
  case M68kISD::OR:
  case M68kISD::XOR:
  case M68kISD::AND:
    return SDValue(Op.getNode(), 1);
  default:
  default_case:
    break;
  }

  // If we found that truncation is beneficial, perform the truncation and
  // update 'Op'.
  if (NeedTruncation) {
    EVT VT = Op.getValueType();
    SDValue WideVal = Op->getOperand(0);
    EVT WideVT = WideVal.getValueType();
    unsigned ConvertedOp = 0;
    // Use a target machine opcode to prevent further DAGCombine
    // optimizations that may separate the arithmetic operations
    // from the setcc node.
    switch (WideVal.getOpcode()) {
    default:
      break;
    case ISD::ADD:
      ConvertedOp = M68kISD::ADD;
      break;
    case ISD::SUB:
      ConvertedOp = M68kISD::SUB;
      break;
    case ISD::AND:
      ConvertedOp = M68kISD::AND;
      break;
    case ISD::OR:
      ConvertedOp = M68kISD::OR;
      break;
    case ISD::XOR:
      ConvertedOp = M68kISD::XOR;
      break;
    }

    if (ConvertedOp) {
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
      if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
        SDValue V0 = DAG.getNode(ISD::TRUNCATE, DL, VT, WideVal.getOperand(0));
        SDValue V1 = DAG.getNode(ISD::TRUNCATE, DL, VT, WideVal.getOperand(1));
        Op = DAG.getNode(ConvertedOp, DL, VT, V0, V1);
      }
    }
  }

  if (Opcode == 0) {
    // Emit a CMP with 0, which is the TEST pattern.
    return DAG.getNode(M68kISD::CMP, DL, MVT::i8,
                       DAG.getConstant(0, DL, Op.getValueType()), Op);
  }
  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i8);
  SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);

  SDValue New = DAG.getNode(Opcode, DL, VTs, Ops);
  DAG.ReplaceAllUsesWith(Op, New);
  return SDValue(New.getNode(), 1);
}

/// \brief Return true if the condition is an unsigned comparison operation.
static bool isM68kCCUnsigned(unsigned M68kCC) {
  switch (M68kCC) {
  default:
    llvm_unreachable("Invalid integer condition!");
  case M68k::COND_EQ:
  case M68k::COND_NE:
  case M68k::COND_CS:
  case M68k::COND_HI:
  case M68k::COND_LS:
  case M68k::COND_CC:
    return true;
  case M68k::COND_GT:
  case M68k::COND_GE:
  case M68k::COND_LT:
  case M68k::COND_LE:
    return false;
  }
}

SDValue M68kTargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned M68kCC,
                                    const SDLoc &DL, SelectionDAG &DAG) const {
  if (isNullConstant(Op1))
    return EmitTest(Op0, M68kCC, DL, DAG);

  assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
         "Unexpected comparison operation for MVT::i1 operands");

  if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
       Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
    // Only promote the compare up to I32 if it is a 16 bit operation
    // with an immediate.  16 bit immediates are to be avoided.
    if ((Op0.getValueType() == MVT::i16 &&
         (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1))) &&
        !DAG.getMachineFunction().getFunction().hasMinSize()) {
      unsigned ExtendOp =
          isM68kCCUnsigned(M68kCC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
      Op0 = DAG.getNode(ExtendOp, DL, MVT::i32, Op0);
      Op1 = DAG.getNode(ExtendOp, DL, MVT::i32, Op1);
    }
    // Use SUB instead of CMP to enable CSE between SUB and CMP.
    SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i8);
    SDValue Sub = DAG.getNode(M68kISD::SUB, DL, VTs, Op0, Op1);
    return SDValue(Sub.getNode(), 1);
  }
  return DAG.getNode(M68kISD::CMP, DL, MVT::i8, Op0, Op1);
}

/// Result of 'and' or 'trunc to i1' is compared against zero.
/// Change to a BT node if possible.
SDValue M68kTargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
                                      const SDLoc &DL,
                                      SelectionDAG &DAG) const {
  if (Op.getOpcode() == ISD::AND)
    return LowerAndToBT(Op, CC, DL, DAG);
  if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
    return LowerTruncateToBT(Op, CC, DL, DAG);
  return SDValue();
}

SDValue M68kTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
  MVT VT = Op.getSimpleValueType();
  assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");

  SDValue Op0 = Op.getOperand(0);
  SDValue Op1 = Op.getOperand(1);
  SDLoc DL(Op);
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

  // Optimize to BT if possible.
  // Lower (X & (1 << N)) == 0 to BT(X, N).
  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
  // Lower (trunc (X >> N) to i1) to BT(X, N).
  if (Op0.hasOneUse() && isNullConstant(Op1) &&
      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
    if (SDValue NewSetCC = LowerToBT(Op0, CC, DL, DAG)) {
      if (VT == MVT::i1)
        return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, NewSetCC);
      return NewSetCC;
    }
  }

  // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
  // these.
  if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
      (CC == ISD::SETEQ || CC == ISD::SETNE)) {

    // If the input is a setcc, then reuse the input setcc or use a new one with
    // the inverted condition.
    if (Op0.getOpcode() == M68kISD::SETCC) {
      M68k::CondCode CCode = (M68k::CondCode)Op0.getConstantOperandVal(0);
      bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
      if (!Invert)
        return Op0;

      CCode = M68k::GetOppositeBranchCondition(CCode);
      SDValue SetCC =
          DAG.getNode(M68kISD::SETCC, DL, MVT::i8,
                      DAG.getConstant(CCode, DL, MVT::i8), Op0.getOperand(1));
      if (VT == MVT::i1)
        return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
      return SetCC;
    }
  }
  if (Op0.getValueType() == MVT::i1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
    if (isOneConstant(Op1)) {
      ISD::CondCode NewCC = ISD::GlobalISel::getSetCCInverse(CC, true);
      return DAG.getSetCC(DL, VT, Op0, DAG.getConstant(0, DL, MVT::i1), NewCC);
    }
    if (!isNullConstant(Op1)) {
      SDValue Xor = DAG.getNode(ISD::XOR, DL, MVT::i1, Op0, Op1);
      return DAG.getSetCC(DL, VT, Xor, DAG.getConstant(0, DL, MVT::i1), CC);
    }
  }

  bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
  unsigned M68kCC = TranslateM68kCC(CC, DL, IsFP, Op0, Op1, DAG);
  if (M68kCC == M68k::COND_INVALID)
    return SDValue();

  SDValue CCR = EmitCmp(Op0, Op1, M68kCC, DL, DAG);
  return DAG.getNode(M68kISD::SETCC, DL, MVT::i8,
                     DAG.getConstant(M68kCC, DL, MVT::i8), CCR);
}

SDValue M68kTargetLowering::LowerSETCCCARRY(SDValue Op,
                                            SelectionDAG &DAG) const {
  SDValue LHS = Op.getOperand(0);
  SDValue RHS = Op.getOperand(1);
  SDValue Carry = Op.getOperand(2);
  SDValue Cond = Op.getOperand(3);
  SDLoc DL(Op);

  assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
  M68k::CondCode CC = TranslateIntegerM68kCC(cast<CondCodeSDNode>(Cond)->get());

  EVT CarryVT = Carry.getValueType();
  APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
  Carry = DAG.getNode(M68kISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
                      DAG.getConstant(NegOne, DL, CarryVT));

  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
  SDValue Cmp =
      DAG.getNode(M68kISD::SUBX, DL, VTs, LHS, RHS, Carry.getValue(1));

  return DAG.getNode(M68kISD::SETCC, DL, MVT::i8,
                     DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
}

/// Return true if opcode is a M68k logical comparison.
static bool isM68kLogicalCmp(SDValue Op) {
  unsigned Opc = Op.getNode()->getOpcode();
  if (Opc == M68kISD::CMP)
    return true;
  if (Op.getResNo() == 1 &&
      (Opc == M68kISD::ADD || Opc == M68kISD::SUB || Opc == M68kISD::ADDX ||
       Opc == M68kISD::SUBX || Opc == M68kISD::SMUL || Opc == M68kISD::UMUL ||
       Opc == M68kISD::OR || Opc == M68kISD::XOR || Opc == M68kISD::AND))
    return true;

  if (Op.getResNo() == 2 && Opc == M68kISD::UMUL)
    return true;

  return false;
}

static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
  if (V.getOpcode() != ISD::TRUNCATE)
    return false;

  SDValue VOp0 = V.getOperand(0);
  unsigned InBits = VOp0.getValueSizeInBits();
  unsigned Bits = V.getValueSizeInBits();
  return DAG.MaskedValueIsZero(VOp0,
                               APInt::getHighBitsSet(InBits, InBits - Bits));
}

SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
  bool addTest = true;
  SDValue Cond = Op.getOperand(0);
  SDValue Op1 = Op.getOperand(1);
  SDValue Op2 = Op.getOperand(2);
  SDLoc DL(Op);
  SDValue CC;

  if (Cond.getOpcode() == ISD::SETCC) {
    if (SDValue NewCond = LowerSETCC(Cond, DAG))
      Cond = NewCond;
  }

  // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
  // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
  // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
  // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
  if (Cond.getOpcode() == M68kISD::SETCC &&
      Cond.getOperand(1).getOpcode() == M68kISD::CMP &&
      isNullConstant(Cond.getOperand(1).getOperand(0))) {
    SDValue Cmp = Cond.getOperand(1);

    unsigned CondCode =
        cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();

    if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
        (CondCode == M68k::COND_EQ || CondCode == M68k::COND_NE)) {
      SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;

      SDValue CmpOp0 = Cmp.getOperand(1);
      // Apply further optimizations for special cases
      // (select (x != 0), -1, 0) -> neg & sbb
      // (select (x == 0), 0, -1) -> neg & sbb
      if (isNullConstant(Y) &&
          (isAllOnesConstant(Op1) == (CondCode == M68k::COND_NE))) {

        SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);

        SDValue Neg =
            DAG.getNode(M68kISD::SUB, DL, VTs,
                        DAG.getConstant(0, DL, CmpOp0.getValueType()), CmpOp0);

        SDValue Res = DAG.getNode(M68kISD::SETCC_CARRY, DL, Op.getValueType(),
                                  DAG.getConstant(M68k::COND_CS, DL, MVT::i8),
                                  SDValue(Neg.getNode(), 1));
        return Res;
      }

      Cmp = DAG.getNode(M68kISD::CMP, DL, MVT::i8,
                        DAG.getConstant(1, DL, CmpOp0.getValueType()), CmpOp0);

      SDValue Res = // Res = 0 or -1.
          DAG.getNode(M68kISD::SETCC_CARRY, DL, Op.getValueType(),
                      DAG.getConstant(M68k::COND_CS, DL, MVT::i8), Cmp);

      if (isAllOnesConstant(Op1) != (CondCode == M68k::COND_EQ))
        Res = DAG.getNOT(DL, Res, Res.getValueType());

      if (!isNullConstant(Op2))
        Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
      return Res;
    }
  }

  // Look past (and (setcc_carry (cmp ...)), 1).
  if (Cond.getOpcode() == ISD::AND &&
      Cond.getOperand(0).getOpcode() == M68kISD::SETCC_CARRY &&
      isOneConstant(Cond.getOperand(1)))
    Cond = Cond.getOperand(0);

  // If condition flag is set by a M68kISD::CMP, then use it as the condition
  // setting operand in place of the M68kISD::SETCC.
  unsigned CondOpcode = Cond.getOpcode();
  if (CondOpcode == M68kISD::SETCC || CondOpcode == M68kISD::SETCC_CARRY) {
    CC = Cond.getOperand(0);

    SDValue Cmp = Cond.getOperand(1);
    unsigned Opc = Cmp.getOpcode();

    bool IllegalFPCMov = false;

    if ((isM68kLogicalCmp(Cmp) && !IllegalFPCMov) || Opc == M68kISD::BT) {
      Cond = Cmp;
      addTest = false;
    }
  } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
             CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
             CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
    SDValue LHS = Cond.getOperand(0);
    SDValue RHS = Cond.getOperand(1);
    unsigned MxOpcode;
    unsigned MxCond;
    SDVTList VTs;
    switch (CondOpcode) {
    case ISD::UADDO:
      MxOpcode = M68kISD::ADD;
      MxCond = M68k::COND_CS;
      break;
    case ISD::SADDO:
      MxOpcode = M68kISD::ADD;
      MxCond = M68k::COND_VS;
      break;
    case ISD::USUBO:
      MxOpcode = M68kISD::SUB;
      MxCond = M68k::COND_CS;
      break;
    case ISD::SSUBO:
      MxOpcode = M68kISD::SUB;
      MxCond = M68k::COND_VS;
      break;
    case ISD::UMULO:
      MxOpcode = M68kISD::UMUL;
      MxCond = M68k::COND_VS;
      break;
    case ISD::SMULO:
      MxOpcode = M68kISD::SMUL;
      MxCond = M68k::COND_VS;
      break;
    default:
      llvm_unreachable("unexpected overflowing operator");
    }
    if (CondOpcode == ISD::UMULO)
      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32);
    else
      VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);

    SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS);

    if (CondOpcode == ISD::UMULO)
      Cond = MxOp.getValue(2);
    else
      Cond = MxOp.getValue(1);

    CC = DAG.getConstant(MxCond, DL, MVT::i8);
    addTest = false;
  }

  if (addTest) {
    // Look past the truncate if the high bits are known zero.
    if (isTruncWithZeroHighBitsInput(Cond, DAG))
      Cond = Cond.getOperand(0);

    // We know the result of AND is compared against zero. Try to match
    // it to BT.
    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
      if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
        CC = NewSetCC.getOperand(0);
        Cond = NewSetCC.getOperand(1);
        addTest = false;
      }
    }
  }

  if (addTest) {
    CC = DAG.getConstant(M68k::COND_NE, DL, MVT::i8);
    Cond = EmitTest(Cond, M68k::COND_NE, DL, DAG);
  }

  // a <  b ? -1 :  0 -> RES = ~setcc_carry
  // a <  b ?  0 : -1 -> RES = setcc_carry
  // a >= b ? -1 :  0 -> RES = setcc_carry
  // a >= b ?  0 : -1 -> RES = ~setcc_carry
  if (Cond.getOpcode() == M68kISD::SUB) {
    unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();

    if ((CondCode == M68k::COND_CC || CondCode == M68k::COND_CS) &&
        (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
        (isNullConstant(Op1) || isNullConstant(Op2))) {
      SDValue Res =
          DAG.getNode(M68kISD::SETCC_CARRY, DL, Op.getValueType(),
                      DAG.getConstant(M68k::COND_CS, DL, MVT::i8), Cond);
      if (isAllOnesConstant(Op1) != (CondCode == M68k::COND_CS))
        return DAG.getNOT(DL, Res, Res.getValueType());
      return Res;
    }
  }

  // M68k doesn't have an i8 cmov. If both operands are the result of a
  // truncate widen the cmov and push the truncate through. This avoids
  // introducing a new branch during isel and doesn't add any extensions.
  if (Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE &&
      Op2.getOpcode() == ISD::TRUNCATE) {
    SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
    if (T1.getValueType() == T2.getValueType() &&
        // Blacklist CopyFromReg to avoid partial register stalls.
        T1.getOpcode() != ISD::CopyFromReg &&
        T2.getOpcode() != ISD::CopyFromReg) {
      SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
      SDValue Cmov = DAG.getNode(M68kISD::CMOV, DL, VTs, T2, T1, CC, Cond);
      return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
    }
  }

  // M68kISD::CMOV means set the result (which is operand 1) to the RHS if
  // condition is true.
  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
  SDValue Ops[] = {Op2, Op1, CC, Cond};
  return DAG.getNode(M68kISD::CMOV, DL, VTs, Ops);
}

/// Return true if node is an ISD::AND or ISD::OR of two M68k::SETcc nodes
/// each of which has no other use apart from the AND / OR.
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
  Opc = Op.getOpcode();
  if (Opc != ISD::OR && Opc != ISD::AND)
    return false;
  return (M68k::IsSETCC(Op.getOperand(0).getOpcode()) &&
          Op.getOperand(0).hasOneUse() &&
          M68k::IsSETCC(Op.getOperand(1).getOpcode()) &&
          Op.getOperand(1).hasOneUse());
}

/// Return true if node is an ISD::XOR of a M68kISD::SETCC and 1 and that the
/// SETCC node has a single use.
static bool isXor1OfSetCC(SDValue Op) {
  if (Op.getOpcode() != ISD::XOR)
    return false;
  if (isOneConstant(Op.getOperand(1)))
    return Op.getOperand(0).getOpcode() == M68kISD::SETCC &&
           Op.getOperand(0).hasOneUse();
  return false;
}

SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
  bool AddTest = true;
  SDValue Chain = Op.getOperand(0);
  SDValue Cond = Op.getOperand(1);
  SDValue Dest = Op.getOperand(2);
  SDLoc DL(Op);
  SDValue CC;
  bool Inverted = false;

  if (Cond.getOpcode() == ISD::SETCC) {
    // Check for setcc([su]{add,sub}o == 0).
    if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
        isNullConstant(Cond.getOperand(1)) &&
        Cond.getOperand(0).getResNo() == 1 &&
        (Cond.getOperand(0).getOpcode() == ISD::SADDO ||
         Cond.getOperand(0).getOpcode() == ISD::UADDO ||
         Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
         Cond.getOperand(0).getOpcode() == ISD::USUBO)) {
      Inverted = true;
      Cond = Cond.getOperand(0);
    } else {
      if (SDValue NewCond = LowerSETCC(Cond, DAG))
        Cond = NewCond;
    }
  }

  // Look pass (and (setcc_carry (cmp ...)), 1).
  if (Cond.getOpcode() == ISD::AND &&
      Cond.getOperand(0).getOpcode() == M68kISD::SETCC_CARRY &&
      isOneConstant(Cond.getOperand(1)))
    Cond = Cond.getOperand(0);

  // If condition flag is set by a M68kISD::CMP, then use it as the condition
  // setting operand in place of the M68kISD::SETCC.
  unsigned CondOpcode = Cond.getOpcode();
  if (CondOpcode == M68kISD::SETCC || CondOpcode == M68kISD::SETCC_CARRY) {
    CC = Cond.getOperand(0);

    SDValue Cmp = Cond.getOperand(1);
    unsigned Opc = Cmp.getOpcode();

    if (isM68kLogicalCmp(Cmp) || Opc == M68kISD::BT) {
      Cond = Cmp;
      AddTest = false;
    } else {
      switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
      default:
        break;
      case M68k::COND_VS:
      case M68k::COND_CS:
        // These can only come from an arithmetic instruction with overflow,
        // e.g. SADDO, UADDO.
        Cond = Cond.getNode()->getOperand(1);
        AddTest = false;
        break;
      }
    }
  }
  CondOpcode = Cond.getOpcode();
  if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
      CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO) {
    SDValue LHS = Cond.getOperand(0);
    SDValue RHS = Cond.getOperand(1);
    unsigned MxOpcode;
    unsigned MxCond;
    SDVTList VTs;
    // Keep this in sync with LowerXALUO, otherwise we might create redundant
    // instructions that can't be removed afterwards (i.e. M68kISD::ADD and
    // M68kISD::INC).
    switch (CondOpcode) {
    case ISD::UADDO:
      MxOpcode = M68kISD::ADD;
      MxCond = M68k::COND_CS;
      break;
    case ISD::SADDO:
      MxOpcode = M68kISD::ADD;
      MxCond = M68k::COND_VS;
      break;
    case ISD::USUBO:
      MxOpcode = M68kISD::SUB;
      MxCond = M68k::COND_CS;
      break;
    case ISD::SSUBO:
      MxOpcode = M68kISD::SUB;
      MxCond = M68k::COND_VS;
      break;
    case ISD::UMULO:
      MxOpcode = M68kISD::UMUL;
      MxCond = M68k::COND_VS;
      break;
    case ISD::SMULO:
      MxOpcode = M68kISD::SMUL;
      MxCond = M68k::COND_VS;
      break;
    default:
      llvm_unreachable("unexpected overflowing operator");
    }

    if (Inverted)
      MxCond = M68k::GetOppositeBranchCondition((M68k::CondCode)MxCond);

    if (CondOpcode == ISD::UMULO)
      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i8);
    else
      VTs = DAG.getVTList(LHS.getValueType(), MVT::i8);

    SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS);

    if (CondOpcode == ISD::UMULO)
      Cond = MxOp.getValue(2);
    else
      Cond = MxOp.getValue(1);

    CC = DAG.getConstant(MxCond, DL, MVT::i8);
    AddTest = false;
  } else {
    unsigned CondOpc;
    if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
      SDValue Cmp = Cond.getOperand(0).getOperand(1);
      if (CondOpc == ISD::OR) {
        // Also, recognize the pattern generated by an FCMP_UNE. We can emit
        // two branches instead of an explicit OR instruction with a
        // separate test.
        if (Cmp == Cond.getOperand(1).getOperand(1) && isM68kLogicalCmp(Cmp)) {
          CC = Cond.getOperand(0).getOperand(0);
          Chain = DAG.getNode(M68kISD::BRCOND, DL, Op.getValueType(), Chain,
                              Dest, CC, Cmp);
          CC = Cond.getOperand(1).getOperand(0);
          Cond = Cmp;
          AddTest = false;
        }
      } else { // ISD::AND
        // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
        // two branches instead of an explicit AND instruction with a
        // separate test. However, we only do this if this block doesn't
        // have a fall-through edge, because this requires an explicit
        // jmp when the condition is false.
        if (Cmp == Cond.getOperand(1).getOperand(1) && isM68kLogicalCmp(Cmp) &&
            Op.getNode()->hasOneUse()) {
          M68k::CondCode CCode =
              (M68k::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
          CCode = M68k::GetOppositeBranchCondition(CCode);
          CC = DAG.getConstant(CCode, DL, MVT::i8);
          SDNode *User = *Op.getNode()->use_begin();
          // Look for an unconditional branch following this conditional branch.
          // We need this because we need to reverse the successors in order
          // to implement FCMP_OEQ.
          if (User->getOpcode() == ISD::BR) {
            SDValue FalseBB = User->getOperand(1);
            SDNode *NewBR =
                DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
            assert(NewBR == User);
            (void)NewBR;
            Dest = FalseBB;

            Chain = DAG.getNode(M68kISD::BRCOND, DL, Op.getValueType(), Chain,
                                Dest, CC, Cmp);
            M68k::CondCode CCode =
                (M68k::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
            CCode = M68k::GetOppositeBranchCondition(CCode);
            CC = DAG.getConstant(CCode, DL, MVT::i8);
            Cond = Cmp;
            AddTest = false;
          }
        }
      }
    } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
      // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
      // It should be transformed during dag combiner except when the condition
      // is set by a arithmetics with overflow node.
      M68k::CondCode CCode =
          (M68k::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
      CCode = M68k::GetOppositeBranchCondition(CCode);
      CC = DAG.getConstant(CCode, DL, MVT::i8);
      Cond = Cond.getOperand(0).getOperand(1);
      AddTest = false;
    }
  }

  if (AddTest) {
    // Look pass the truncate if the high bits are known zero.
    if (isTruncWithZeroHighBitsInput(Cond, DAG))
      Cond = Cond.getOperand(0);

    // We know the result is compared against zero. Try to match it to BT.
    if (Cond.hasOneUse()) {
      if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
        CC = NewSetCC.getOperand(0);
        Cond = NewSetCC.getOperand(1);
        AddTest = false;
      }
    }
  }

  if (AddTest) {
    M68k::CondCode MxCond = Inverted ? M68k::COND_EQ : M68k::COND_NE;
    CC = DAG.getConstant(MxCond, DL, MVT::i8);
    Cond = EmitTest(Cond, MxCond, DL, DAG);
  }
  return DAG.getNode(M68kISD::BRCOND, DL, Op.getValueType(), Chain, Dest, CC,
                     Cond);
}

SDValue M68kTargetLowering::LowerADDC_ADDE_SUBC_SUBE(SDValue Op,
                                                     SelectionDAG &DAG) const {
  MVT VT = Op.getNode()->getSimpleValueType(0);

  // Let legalize expand this if it isn't a legal type yet.
  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
    return SDValue();

  SDVTList VTs = DAG.getVTList(VT, MVT::i8);

  unsigned Opc;
  bool ExtraOp = false;
  switch (Op.getOpcode()) {
  default:
    llvm_unreachable("Invalid code");
  case ISD::ADDC:
    Opc = M68kISD::ADD;
    break;
  case ISD::ADDE:
    Opc = M68kISD::ADDX;
    ExtraOp = true;
    break;
  case ISD::SUBC:
    Opc = M68kISD::SUB;
    break;
  case ISD::SUBE:
    Opc = M68kISD::SUBX;
    ExtraOp = true;
    break;
  }

  if (!ExtraOp)
    return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
                     Op.getOperand(2));
}

// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the M68kISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDValue M68kTargetLowering::LowerConstantPool(SDValue Op,
                                              SelectionDAG &DAG) const {
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

  // In PIC mode (unless we're in PCRel PIC mode) we add an offset to the
  // global base reg.
  unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);

  unsigned WrapperKind = M68kISD::Wrapper;
  if (M68kII::isPCRelGlobalReference(OpFlag)) {
    WrapperKind = M68kISD::WrapperPC;
  }

  MVT PtrVT = getPointerTy(DAG.getDataLayout());
  SDValue Result = DAG.getTargetConstantPool(
      CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag);

  SDLoc DL(CP);
  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);

  // With PIC, the address is actually $g + Offset.
  if (M68kII::isGlobalRelativeToPICBase(OpFlag)) {
    Result = DAG.getNode(ISD::ADD, DL, PtrVT,
                         DAG.getNode(M68kISD::GLOBAL_BASE_REG, SDLoc(), PtrVT),
                         Result);
  }

  return Result;
}

SDValue M68kTargetLowering::LowerExternalSymbol(SDValue Op,
                                                SelectionDAG &DAG) const {
  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();

  // In PIC mode (unless we're in PCRel PIC mode) we add an offset to the
  // global base reg.
  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
  unsigned char OpFlag = Subtarget.classifyExternalReference(*Mod);

  unsigned WrapperKind = M68kISD::Wrapper;
  if (M68kII::isPCRelGlobalReference(OpFlag)) {
    WrapperKind = M68kISD::WrapperPC;
  }

  auto PtrVT = getPointerTy(DAG.getDataLayout());
  SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);

  SDLoc DL(Op);
  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);

  // With PIC, the address is actually $g + Offset.
  if (M68kII::isGlobalRelativeToPICBase(OpFlag)) {
    Result = DAG.getNode(ISD::ADD, DL, PtrVT,
                         DAG.getNode(M68kISD::GLOBAL_BASE_REG, SDLoc(), PtrVT),
                         Result);
  }

  // For symbols that require a load from a stub to get the address, emit the
  // load.
  if (M68kII::isGlobalStubReference(OpFlag)) {
    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));
  }

  return Result;
}

SDValue M68kTargetLowering::LowerBlockAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
  unsigned char OpFlags = Subtarget.classifyBlockAddressReference();
  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
  int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
  SDLoc DL(Op);
  auto PtrVT = getPointerTy(DAG.getDataLayout());

  // Create the TargetBlockAddressAddress node.
  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);

  if (M68kII::isPCRelBlockReference(OpFlags)) {
    Result = DAG.getNode(M68kISD::WrapperPC, DL, PtrVT, Result);
  } else {
    Result = DAG.getNode(M68kISD::Wrapper, DL, PtrVT, Result);
  }

  // With PIC, the address is actually $g + Offset.
  if (M68kII::isGlobalRelativeToPICBase(OpFlags)) {
    Result =
        DAG.getNode(ISD::ADD, DL, PtrVT,
                    DAG.getNode(M68kISD::GLOBAL_BASE_REG, DL, PtrVT), Result);
  }

  return Result;
}

SDValue M68kTargetLowering::LowerGlobalAddress(const GlobalValue *GV,
                                               const SDLoc &DL, int64_t Offset,
                                               SelectionDAG &DAG) const {
  unsigned char OpFlags = Subtarget.classifyGlobalReference(GV);
  auto PtrVT = getPointerTy(DAG.getDataLayout());

  // Create the TargetGlobalAddress node, folding in the constant
  // offset if it is legal.
  SDValue Result;
  if (M68kII::isDirectGlobalReference(OpFlags)) {
    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
    Offset = 0;
  } else {
    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
  }

  if (M68kII::isPCRelGlobalReference(OpFlags))
    Result = DAG.getNode(M68kISD::WrapperPC, DL, PtrVT, Result);
  else
    Result = DAG.getNode(M68kISD::Wrapper, DL, PtrVT, Result);

  // With PIC, the address is actually $g + Offset.
  if (M68kII::isGlobalRelativeToPICBase(OpFlags)) {
    Result =
        DAG.getNode(ISD::ADD, DL, PtrVT,
                    DAG.getNode(M68kISD::GLOBAL_BASE_REG, DL, PtrVT), Result);
  }

  // For globals that require a load from a stub to get the address, emit the
  // load.
  if (M68kII::isGlobalStubReference(OpFlags)) {
    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));
  }

  // If there was a non-zero offset that we didn't fold, create an explicit
  // addition for it.
  if (Offset != 0) {
    Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
                         DAG.getConstant(Offset, DL, PtrVT));
  }

  return Result;
}

SDValue M68kTargetLowering::LowerGlobalAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
  return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
}

//===----------------------------------------------------------------------===//
// Custom Lower Jump Table
//===----------------------------------------------------------------------===//

SDValue M68kTargetLowering::LowerJumpTable(SDValue Op,
                                           SelectionDAG &DAG) const {
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

  // In PIC mode (unless we're in PCRel PIC mode) we add an offset to the
  // global base reg.
  unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);

  unsigned WrapperKind = M68kISD::Wrapper;
  if (M68kII::isPCRelGlobalReference(OpFlag)) {
    WrapperKind = M68kISD::WrapperPC;
  }

  auto PtrVT = getPointerTy(DAG.getDataLayout());
  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
  SDLoc DL(JT);
  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);

  // With PIC, the address is actually $g + Offset.
  if (M68kII::isGlobalRelativeToPICBase(OpFlag)) {
    Result = DAG.getNode(ISD::ADD, DL, PtrVT,
                         DAG.getNode(M68kISD::GLOBAL_BASE_REG, SDLoc(), PtrVT),
                         Result);
  }

  return Result;
}

unsigned M68kTargetLowering::getJumpTableEncoding() const {
  return Subtarget.getJumpTableEncoding();
}

const MCExpr *M68kTargetLowering::LowerCustomJumpTableEntry(
    const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
    unsigned uid, MCContext &Ctx) const {
  return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF,
                                 Ctx);
}

SDValue M68kTargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                     SelectionDAG &DAG) const {
  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_Custom32)
    return DAG.getNode(M68kISD::GLOBAL_BASE_REG, SDLoc(),
                       getPointerTy(DAG.getDataLayout()));

  // MachineJumpTableInfo::EK_LabelDifference32 entry
  return Table;
}

// NOTE This only used for MachineJumpTableInfo::EK_LabelDifference32 entries
const MCExpr *M68kTargetLowering::getPICJumpTableRelocBaseExpr(
    const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const {
  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}

M68kTargetLowering::ConstraintType
M68kTargetLowering::getConstraintType(StringRef Constraint) const {
  if (Constraint.size() > 0) {
    switch (Constraint[0]) {
    case 'a':
    case 'd':
      return C_RegisterClass;
    case 'I':
    case 'J':
    case 'K':
    case 'L':
    case 'M':
    case 'N':
    case 'O':
    case 'P':
      return C_Immediate;
    case 'C':
      if (Constraint.size() == 2)
        switch (Constraint[1]) {
        case '0':
        case 'i':
        case 'j':
          return C_Immediate;
        default:
          break;
        }
      break;
    default:
      break;
    }
  }

  return TargetLowering::getConstraintType(Constraint);
}

void M68kTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                      std::string &Constraint,
                                                      std::vector<SDValue> &Ops,
                                                      SelectionDAG &DAG) const {
  SDValue Result;

  if (Constraint.size() == 1) {
    // Constant constraints
    switch (Constraint[0]) {
    case 'I':
    case 'J':
    case 'K':
    case 'L':
    case 'M':
    case 'N':
    case 'O':
    case 'P': {
      auto *C = dyn_cast<ConstantSDNode>(Op);
      if (!C)
        return;

      int64_t Val = C->getSExtValue();
      switch (Constraint[0]) {
      case 'I': // constant integer in the range [1,8]
        if (Val > 0 && Val <= 8)
          break;
        return;
      case 'J': // constant signed 16-bit integer
        if (isInt<16>(Val))
          break;
        return;
      case 'K': // constant that is NOT in the range of [-0x80, 0x80)
        if (Val < -0x80 || Val >= 0x80)
          break;
        return;
      case 'L': // constant integer in the range [-8,-1]
        if (Val < 0 && Val >= -8)
          break;
        return;
      case 'M': // constant that is NOT in the range of [-0x100, 0x100]
        if (Val < -0x100 || Val >= 0x100)
          break;
        return;
      case 'N': // constant integer in the range [24,31]
        if (Val >= 24 && Val <= 31)
          break;
        return;
      case 'O': // constant integer 16
        if (Val == 16)
          break;
        return;
      case 'P': // constant integer in the range [8,15]
        if (Val >= 8 && Val <= 15)
          break;
        return;
      default:
        llvm_unreachable("Unhandled constant constraint");
      }

      Result = DAG.getTargetConstant(Val, SDLoc(Op), Op.getValueType());
      break;
    }
    default:
      break;
    }
  }

  if (Constraint.size() == 2) {
    switch (Constraint[0]) {
    case 'C':
      // Constant constraints start with 'C'
      switch (Constraint[1]) {
      case '0':
      case 'i':
      case 'j': {
        auto *C = dyn_cast<ConstantSDNode>(Op);
        if (!C)
          break;

        int64_t Val = C->getSExtValue();
        switch (Constraint[1]) {
        case '0': // constant integer 0
          if (!Val)
            break;
          return;
        case 'i': // constant integer
          break;
        case 'j': // integer constant that doesn't fit in 16 bits
          if (!isInt<16>(C->getSExtValue()))
            break;
          return;
        default:
          llvm_unreachable("Unhandled constant constraint");
        }

        Result = DAG.getTargetConstant(Val, SDLoc(Op), Op.getValueType());
        break;
      }
      default:
        break;
      }
      break;
    default:
      break;
    }
  }

  if (Result.getNode()) {
    Ops.push_back(Result);
    return;
  }

  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}

std::pair<unsigned, const TargetRegisterClass *>
M68kTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                 StringRef Constraint,
                                                 MVT VT) const {
  if (Constraint.size() == 1) {
    switch (Constraint[0]) {
    case 'r':
    case 'd':
      switch (VT.SimpleTy) {
      case MVT::i8:
        return std::make_pair(0U, &M68k::DR8RegClass);
      case MVT::i16:
        return std::make_pair(0U, &M68k::DR16RegClass);
      case MVT::i32:
        return std::make_pair(0U, &M68k::DR32RegClass);
      default:
        break;
      }
      break;
    case 'a':
      switch (VT.SimpleTy) {
      case MVT::i16:
        return std::make_pair(0U, &M68k::AR16RegClass);
      case MVT::i32:
        return std::make_pair(0U, &M68k::AR32RegClass);
      default:
        break;
      }
      break;
    default:
      break;
    }
  }

  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}

/// Determines whether the callee is required to pop its own arguments.
/// Callee pop is necessary to support tail calls.
bool M68k::isCalleePop(CallingConv::ID CallingConv, bool IsVarArg,
                       bool GuaranteeTCO) {
  return false;
}

// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
// together with other CMOV pseudo-opcodes into a single basic-block with
// conditional jump around it.
static bool isCMOVPseudo(MachineInstr &MI) {
  switch (MI.getOpcode()) {
  case M68k::CMOV8d:
  case M68k::CMOV16d:
  case M68k::CMOV32r:
    return true;

  default:
    return false;
  }
}

// The CCR operand of SelectItr might be missing a kill marker
// because there were multiple uses of CCR, and ISel didn't know
// which to mark. Figure out whether SelectItr should have had a
// kill marker, and set it if it should. Returns the correct kill
// marker value.
static bool checkAndUpdateCCRKill(MachineBasicBlock::iterator SelectItr,
                                  MachineBasicBlock *BB,
                                  const TargetRegisterInfo *TRI) {
  // Scan forward through BB for a use/def of CCR.
  MachineBasicBlock::iterator miI(std::next(SelectItr));
  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
    const MachineInstr &mi = *miI;
    if (mi.readsRegister(M68k::CCR))
      return false;
    if (mi.definesRegister(M68k::CCR))
      break; // Should have kill-flag - update below.
  }

  // If we hit the end of the block, check whether CCR is live into a
  // successor.
  if (miI == BB->end())
    for (const auto *SBB : BB->successors())
      if (SBB->isLiveIn(M68k::CCR))
        return false;

  // We found a def, or hit the end of the basic block and CCR wasn't live
  // out. SelectMI should have a kill flag on CCR.
  SelectItr->addRegisterKilled(M68k::CCR, TRI);
  return true;
}

MachineBasicBlock *
M68kTargetLowering::EmitLoweredSelect(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const {
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  DebugLoc DL = MI.getDebugLoc();

  // To "insert" a SELECT_CC instruction, we actually have to insert the
  // diamond control-flow pattern.  The incoming instruction knows the
  // destination vreg to set, the condition code register to branch on, the
  // true/false values to select between, and a branch opcode to use.
  const BasicBlock *BB = MBB->getBasicBlock();
  MachineFunction::iterator It = ++MBB->getIterator();

  //  ThisMBB:
  //  ...
  //   TrueVal = ...
  //   cmp ccX, r1, r2
  //   bcc Copy1MBB
  //   fallthrough --> Copy0MBB
  MachineBasicBlock *ThisMBB = MBB;
  MachineFunction *F = MBB->getParent();

  // This code lowers all pseudo-CMOV instructions. Generally it lowers these
  // as described above, by inserting a MBB, and then making a PHI at the join
  // point to select the true and false operands of the CMOV in the PHI.
  //
  // The code also handles two different cases of multiple CMOV opcodes
  // in a row.
  //
  // Case 1:
  // In this case, there are multiple CMOVs in a row, all which are based on
  // the same condition setting (or the exact opposite condition setting).
  // In this case we can lower all the CMOVs using a single inserted MBB, and
  // then make a number of PHIs at the join point to model the CMOVs. The only
  // trickiness here, is that in a case like:
  //
  // t2 = CMOV cond1 t1, f1
  // t3 = CMOV cond1 t2, f2
  //
  // when rewriting this into PHIs, we have to perform some renaming on the
  // temps since you cannot have a PHI operand refer to a PHI result earlier
  // in the same block.  The "simple" but wrong lowering would be:
  //
  // t2 = PHI t1(BB1), f1(BB2)
  // t3 = PHI t2(BB1), f2(BB2)
  //
  // but clearly t2 is not defined in BB1, so that is incorrect. The proper
  // renaming is to note that on the path through BB1, t2 is really just a
  // copy of t1, and do that renaming, properly generating:
  //
  // t2 = PHI t1(BB1), f1(BB2)
  // t3 = PHI t1(BB1), f2(BB2)
  //
  // Case 2, we lower cascaded CMOVs such as
  //
  //   (CMOV (CMOV F, T, cc1), T, cc2)
  //
  // to two successives branches.
  MachineInstr *CascadedCMOV = nullptr;
  MachineInstr *LastCMOV = &MI;
  M68k::CondCode CC = M68k::CondCode(MI.getOperand(3).getImm());
  M68k::CondCode OppCC = M68k::GetOppositeBranchCondition(CC);
  MachineBasicBlock::iterator NextMIIt =
      std::next(MachineBasicBlock::iterator(MI));

  // Check for case 1, where there are multiple CMOVs with the same condition
  // first.  Of the two cases of multiple CMOV lowerings, case 1 reduces the
  // number of jumps the most.

  if (isCMOVPseudo(MI)) {
    // See if we have a string of CMOVS with the same condition.
    while (NextMIIt != MBB->end() && isCMOVPseudo(*NextMIIt) &&
           (NextMIIt->getOperand(3).getImm() == CC ||
            NextMIIt->getOperand(3).getImm() == OppCC)) {
      LastCMOV = &*NextMIIt;
      ++NextMIIt;
    }
  }

  // This checks for case 2, but only do this if we didn't already find
  // case 1, as indicated by LastCMOV == MI.
  if (LastCMOV == &MI && NextMIIt != MBB->end() &&
      NextMIIt->getOpcode() == MI.getOpcode() &&
      NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() &&
      NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() &&
      NextMIIt->getOperand(1).isKill()) {
    CascadedCMOV = &*NextMIIt;
  }

  MachineBasicBlock *Jcc1MBB = nullptr;

  // If we have a cascaded CMOV, we lower it to two successive branches to
  // the same block.  CCR is used by both, so mark it as live in the second.
  if (CascadedCMOV) {
    Jcc1MBB = F->CreateMachineBasicBlock(BB);
    F->insert(It, Jcc1MBB);
    Jcc1MBB->addLiveIn(M68k::CCR);
  }

  MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(BB);
  MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
  F->insert(It, Copy0MBB);
  F->insert(It, SinkMBB);

  // If the CCR register isn't dead in the terminator, then claim that it's
  // live into the sink and copy blocks.
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

  MachineInstr *LastCCRSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
  if (!LastCCRSUser->killsRegister(M68k::CCR) &&
      !checkAndUpdateCCRKill(LastCCRSUser, MBB, TRI)) {
    Copy0MBB->addLiveIn(M68k::CCR);
    SinkMBB->addLiveIn(M68k::CCR);
  }

  // Transfer the remainder of MBB and its successor edges to SinkMBB.
  SinkMBB->splice(SinkMBB->begin(), MBB,
                  std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
  SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

  // Add the true and fallthrough blocks as its successors.
  if (CascadedCMOV) {
    // The fallthrough block may be Jcc1MBB, if we have a cascaded CMOV.
    MBB->addSuccessor(Jcc1MBB);

    // In that case, Jcc1MBB will itself fallthrough the Copy0MBB, and
    // jump to the SinkMBB.
    Jcc1MBB->addSuccessor(Copy0MBB);
    Jcc1MBB->addSuccessor(SinkMBB);
  } else {
    MBB->addSuccessor(Copy0MBB);
  }

  // The true block target of the first (or only) branch is always SinkMBB.
  MBB->addSuccessor(SinkMBB);

  // Create the conditional branch instruction.
  unsigned Opc = M68k::GetCondBranchFromCond(CC);
  BuildMI(MBB, DL, TII->get(Opc)).addMBB(SinkMBB);

  if (CascadedCMOV) {
    unsigned Opc2 = M68k::GetCondBranchFromCond(
        (M68k::CondCode)CascadedCMOV->getOperand(3).getImm());
    BuildMI(Jcc1MBB, DL, TII->get(Opc2)).addMBB(SinkMBB);
  }

  //  Copy0MBB:
  //   %FalseValue = ...
  //   # fallthrough to SinkMBB
  Copy0MBB->addSuccessor(SinkMBB);

  //  SinkMBB:
  //   %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
  //  ...
  MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
  MachineBasicBlock::iterator MIItEnd =
      std::next(MachineBasicBlock::iterator(LastCMOV));
  MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
  DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
  MachineInstrBuilder MIB;

  // As we are creating the PHIs, we have to be careful if there is more than
  // one.  Later CMOVs may reference the results of earlier CMOVs, but later
  // PHIs have to reference the individual true/false inputs from earlier PHIs.
  // That also means that PHI construction must work forward from earlier to
  // later, and that the code must maintain a mapping from earlier PHI's
  // destination registers, and the registers that went into the PHI.

  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
    unsigned DestReg = MIIt->getOperand(0).getReg();
    unsigned Op1Reg = MIIt->getOperand(1).getReg();
    unsigned Op2Reg = MIIt->getOperand(2).getReg();

    // If this CMOV we are generating is the opposite condition from
    // the jump we generated, then we have to swap the operands for the
    // PHI that is going to be generated.
    if (MIIt->getOperand(3).getImm() == OppCC)
      std::swap(Op1Reg, Op2Reg);

    if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
      Op1Reg = RegRewriteTable[Op1Reg].first;

    if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
      Op2Reg = RegRewriteTable[Op2Reg].second;

    MIB =
        BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(M68k::PHI), DestReg)
            .addReg(Op1Reg)
            .addMBB(Copy0MBB)
            .addReg(Op2Reg)
            .addMBB(ThisMBB);

    // Add this PHI to the rewrite table.
    RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
  }

  // If we have a cascaded CMOV, the second Jcc provides the same incoming
  // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
  if (CascadedCMOV) {
    MIB.addReg(MI.getOperand(2).getReg()).addMBB(Jcc1MBB);
    // Copy the PHI result to the register defined by the second CMOV.
    BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
            DL, TII->get(TargetOpcode::COPY),
            CascadedCMOV->getOperand(0).getReg())
        .addReg(MI.getOperand(0).getReg());
    CascadedCMOV->eraseFromParent();
  }

  // Now remove the CMOV(s).
  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;)
    (MIIt++)->eraseFromParent();

  return SinkMBB;
}

MachineBasicBlock *
M68kTargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
                                         MachineBasicBlock *BB) const {
  llvm_unreachable("Cannot lower Segmented Stack Alloca with stack-split on");
}

MachineBasicBlock *
M68kTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                MachineBasicBlock *BB) const {
  switch (MI.getOpcode()) {
  default:
    llvm_unreachable("Unexpected instr type to insert");
  case M68k::CMOV8d:
  case M68k::CMOV16d:
  case M68k::CMOV32r:
    return EmitLoweredSelect(MI, BB);
  case M68k::SALLOCA:
    return EmitLoweredSegAlloca(MI, BB);
  }
}

SDValue M68kTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
  MachineFunction &MF = DAG.getMachineFunction();
  auto PtrVT = getPointerTy(MF.getDataLayout());
  M68kMachineFunctionInfo *FuncInfo = MF.getInfo<M68kMachineFunctionInfo>();

  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
  SDLoc DL(Op);

  // vastart just stores the address of the VarArgsFrameIndex slot into the
  // memory location argument.
  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
                      MachinePointerInfo(SV));
}

// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca are needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                    SelectionDAG &DAG) const {
  MachineFunction &MF = DAG.getMachineFunction();
  bool SplitStack = MF.shouldSplitStack();

  SDLoc DL(Op);

  // Get the inputs.
  SDNode *Node = Op.getNode();
  SDValue Chain = Op.getOperand(0);
  SDValue Size = Op.getOperand(1);
  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
  EVT VT = Node->getValueType(0);

  // Chain the dynamic stack allocation so that it doesn't modify the stack
  // pointer when other instructions are using the stack.
  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);

  SDValue Result;
  if (SplitStack) {
    auto &MRI = MF.getRegInfo();
    auto SPTy = getPointerTy(DAG.getDataLayout());
    auto *ARClass = getRegClassFor(SPTy);
    unsigned Vreg = MRI.createVirtualRegister(ARClass);
    Chain = DAG.getCopyToReg(Chain, DL, Vreg, Size);
    Result = DAG.getNode(M68kISD::SEG_ALLOCA, DL, SPTy, Chain,
                         DAG.getRegister(Vreg, SPTy));
  } else {
    auto &TLI = DAG.getTargetLoweringInfo();
    unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
    assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
                    " not tell us which reg is the stack pointer!");

    SDValue SP = DAG.getCopyFromReg(Chain, DL, SPReg, VT);
    Chain = SP.getValue(1);
    const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
    unsigned StackAlign = TFI.getStackAlignment();
    Result = DAG.getNode(ISD::SUB, DL, VT, SP, Size); // Value
    if (Align > StackAlign)
      Result = DAG.getNode(ISD::AND, DL, VT, Result,
                           DAG.getConstant(-(uint64_t)Align, DL, VT));
    Chain = DAG.getCopyToReg(Chain, DL, SPReg, Result); // Output chain
  }

  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
                             DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);

  SDValue Ops[2] = {Result, Chain};
  return DAG.getMergeValues(Ops, DL);
}

//===----------------------------------------------------------------------===//
// DAG Combine
//===----------------------------------------------------------------------===//

static SDValue getSETCC(M68k::CondCode Cond, SDValue CCR, const SDLoc &dl,
                        SelectionDAG &DAG) {
  return DAG.getNode(M68kISD::SETCC, dl, MVT::i8,
                     DAG.getConstant(Cond, dl, MVT::i8), CCR);
}
// When legalizing carry, we create carries via add X, -1
// If that comes from an actual carry, via setcc, we use the
// carry directly.
static SDValue combineCarryThroughADD(SDValue CCR) {
  if (CCR.getOpcode() == M68kISD::ADD) {
    if (isAllOnesConstant(CCR.getOperand(1))) {
      SDValue Carry = CCR.getOperand(0);
      while (Carry.getOpcode() == ISD::TRUNCATE ||
             Carry.getOpcode() == ISD::ZERO_EXTEND ||
             Carry.getOpcode() == ISD::SIGN_EXTEND ||
             Carry.getOpcode() == ISD::ANY_EXTEND ||
             (Carry.getOpcode() == ISD::AND &&
              isOneConstant(Carry.getOperand(1))))
        Carry = Carry.getOperand(0);
      if (Carry.getOpcode() == M68kISD::SETCC ||
          Carry.getOpcode() == M68kISD::SETCC_CARRY) {
        if (Carry.getConstantOperandVal(0) == M68k::COND_CS)
          return Carry.getOperand(1);
      }
    }
  }

  return SDValue();
}

/// Optimize a CCR definition used according to the condition code \p CC into
/// a simpler CCR value, potentially returning a new \p CC and replacing uses
/// of chain values.
static SDValue combineSetCCCCR(SDValue CCR, M68k::CondCode &CC,
                               SelectionDAG &DAG,
                               const M68kSubtarget &Subtarget) {
  if (CC == M68k::COND_CS)
    if (SDValue Flags = combineCarryThroughADD(CCR))
      return Flags;

  return SDValue();
}

// Optimize  RES = M68kISD::SETCC CONDCODE, CCR_INPUT
static SDValue combineM68kSetCC(SDNode *N, SelectionDAG &DAG,
                                const M68kSubtarget &Subtarget) {
  SDLoc DL(N);
  M68k::CondCode CC = M68k::CondCode(N->getConstantOperandVal(0));
  SDValue CCR = N->getOperand(1);

  // Try to simplify the CCR and condition code operands.
  if (SDValue Flags = combineSetCCCCR(CCR, CC, DAG, Subtarget))
    return getSETCC(CC, Flags, DL, DAG);

  return SDValue();
}
static SDValue combineM68kBrCond(SDNode *N, SelectionDAG &DAG,
                                 const M68kSubtarget &Subtarget) {
  SDLoc DL(N);
  M68k::CondCode CC = M68k::CondCode(N->getConstantOperandVal(2));
  SDValue CCR = N->getOperand(3);

  // Try to simplify the CCR and condition code operands.
  // Make sure to not keep references to operands, as combineSetCCCCR can
  // RAUW them under us.
  if (SDValue Flags = combineSetCCCCR(CCR, CC, DAG, Subtarget)) {
    SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
    return DAG.getNode(M68kISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
                       N->getOperand(1), Cond, Flags);
  }

  return SDValue();
}

static SDValue combineSUBX(SDNode *N, SelectionDAG &DAG) {
  if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
    MVT VT = N->getSimpleValueType(0);
    SDVTList VTs = DAG.getVTList(VT, MVT::i32);
    return DAG.getNode(M68kISD::SUBX, SDLoc(N), VTs, N->getOperand(0),
                       N->getOperand(1), Flags);
  }

  return SDValue();
}

// Optimize RES, CCR = M68kISD::ADDX LHS, RHS, CCR
static SDValue combineADDX(SDNode *N, SelectionDAG &DAG,
                           TargetLowering::DAGCombinerInfo &DCI) {
  if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
    MVT VT = N->getSimpleValueType(0);
    SDVTList VTs = DAG.getVTList(VT, MVT::i32);
    return DAG.getNode(M68kISD::ADDX, SDLoc(N), VTs, N->getOperand(0),
                       N->getOperand(1), Flags);
  }

  return SDValue();
}

SDValue M68kTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
  SelectionDAG &DAG = DCI.DAG;
  switch (N->getOpcode()) {
  case M68kISD::SUBX:
    return combineSUBX(N, DAG);
  case M68kISD::ADDX:
    return combineADDX(N, DAG, DCI);
  case M68kISD::SETCC:
    return combineM68kSetCC(N, DAG, Subtarget);
  case M68kISD::BRCOND:
    return combineM68kBrCond(N, DAG, Subtarget);
  }

  return SDValue();
}

//===----------------------------------------------------------------------===//
// M68kISD Node Names
//===----------------------------------------------------------------------===//
const char *M68kTargetLowering::getTargetNodeName(unsigned Opcode) const {
  switch (Opcode) {
  case M68kISD::CALL:
    return "M68kISD::CALL";
  case M68kISD::TAIL_CALL:
    return "M68kISD::TAIL_CALL";
  case M68kISD::RET:
    return "M68kISD::RET";
  case M68kISD::TC_RETURN:
    return "M68kISD::TC_RETURN";
  case M68kISD::ADD:
    return "M68kISD::ADD";
  case M68kISD::SUB:
    return "M68kISD::SUB";
  case M68kISD::ADDX:
    return "M68kISD::ADDX";
  case M68kISD::SUBX:
    return "M68kISD::SUBX";
  case M68kISD::SMUL:
    return "M68kISD::SMUL";
  case M68kISD::UMUL:
    return "M68kISD::UMUL";
  case M68kISD::OR:
    return "M68kISD::OR";
  case M68kISD::XOR:
    return "M68kISD::XOR";
  case M68kISD::AND:
    return "M68kISD::AND";
  case M68kISD::CMP:
    return "M68kISD::CMP";
  case M68kISD::BT:
    return "M68kISD::BT";
  case M68kISD::SELECT:
    return "M68kISD::SELECT";
  case M68kISD::CMOV:
    return "M68kISD::CMOV";
  case M68kISD::BRCOND:
    return "M68kISD::BRCOND";
  case M68kISD::SETCC:
    return "M68kISD::SETCC";
  case M68kISD::SETCC_CARRY:
    return "M68kISD::SETCC_CARRY";
  case M68kISD::GLOBAL_BASE_REG:
    return "M68kISD::GLOBAL_BASE_REG";
  case M68kISD::Wrapper:
    return "M68kISD::Wrapper";
  case M68kISD::WrapperPC:
    return "M68kISD::WrapperPC";
  case M68kISD::SEG_ALLOCA:
    return "M68kISD::SEG_ALLOCA";
  default:
    return NULL;
  }
}

CCAssignFn *M68kTargetLowering::getCCAssignFn(CallingConv::ID CC, bool Return,
                                              bool IsVarArg) const {
  if (Return)
    return RetCC_M68k_C;
  else
    return CC_M68k_C;
}