[Hexagon] Reorganize and update instruction patterns

llvm-svn: 316228
2024-10-18 18:42:46 +02:00 · 2017-10-20 19:33:12 +00:00 · 2017-10-20 19:33:12 +00:00 · 8d03b9529a
commit 8d03b9529a
parent 4da2de6a49
25 changed files with 2621 additions and 2832 deletions
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@ -68,3 +68,4 @@ add_subdirectory(AsmParser)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
 add_subdirectory(Disassembler)
+
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@ -282,7 +282,6 @@ include "HexagonPseudo.td"
 include "HexagonPatterns.td"
 include "HexagonDepMappings.td"
 include "HexagonIntrinsics.td"
-include "HexagonIntrinsicsDerived.td"
 include "HexagonMapAsm2IntrinV62.gen.td"

 def HexagonInstrInfo : InstrInfo;
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@ -511,8 +511,8 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
                                        int64_t IVBump) const {
  Comparison::Kind Cmp = (Comparison::Kind)0;
  switch (CondOpc) {
-  case Hexagon::C2_cmpeqi:
  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpeqi:
  case Hexagon::C2_cmpeqp:
    Cmp = Comparison::EQ;
    break;
@ -520,21 +520,35 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
  case Hexagon::C4_cmpneqi:
    Cmp = Comparison::NE;
    break;
+  case Hexagon::C2_cmplt:
+    Cmp = Comparison::LTs;
+    break;
+  case Hexagon::C2_cmpltu:
+    Cmp = Comparison::LTu;
+    break;
  case Hexagon::C4_cmplte:
+  case Hexagon::C4_cmpltei:
    Cmp = Comparison::LEs;
    break;
  case Hexagon::C4_cmplteu:
+  case Hexagon::C4_cmplteui:
    Cmp = Comparison::LEu;
    break;
-  case Hexagon::C2_cmpgtui:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgtp:
+    Cmp = Comparison::GTs;
+    break;
  case Hexagon::C2_cmpgtu:
+  case Hexagon::C2_cmpgtui:
  case Hexagon::C2_cmpgtup:
    Cmp = Comparison::GTu;
    break;
-  case Hexagon::C2_cmpgti:
-  case Hexagon::C2_cmpgt:
-  case Hexagon::C2_cmpgtp:
-    Cmp = Comparison::GTs;
+  case Hexagon::C2_cmpgei:
+    Cmp = Comparison::GEs;
+    break;
+  case Hexagon::C2_cmpgeui:
+    Cmp = Comparison::GEs;
    break;
  default:
    return (Comparison::Kind)0;
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@ -43,6 +43,9 @@ cl::opt<bool>
 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));

+static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
+  cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
+
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
 //===----------------------------------------------------------------------===//
@ -82,10 +85,19 @@ public:
  // Complex Pattern Selectors.
  inline bool SelectAddrGA(SDValue &N, SDValue &R);
  inline bool SelectAddrGP(SDValue &N, SDValue &R);
-  bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
+  inline bool SelectAnyImm(SDValue &N, SDValue &R);
+  inline bool SelectAnyInt(SDValue &N, SDValue &R);
+  bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign);
+  bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP,
+                           uint32_t LogAlign);
  bool SelectAddrFI(SDValue &N, SDValue &R);
  bool DetectUseSxtw(SDValue &N, SDValue &R);

+  inline bool SelectAnyImm0(SDValue &N, SDValue &R);
+  inline bool SelectAnyImm1(SDValue &N, SDValue &R);
+  inline bool SelectAnyImm2(SDValue &N, SDValue &R);
+  inline bool SelectAnyImm3(SDValue &N, SDValue &R);
+
  StringRef getPassName() const override {
    return "Hexagon DAG->DAG Pattern Instruction Selection";
  }
@ -126,6 +138,7 @@ private:
  bool isAlignedMemNode(const MemSDNode *N) const;
  bool isSmallStackStore(const StoreSDNode *N) const;
  bool isPositiveHalfWord(const SDNode *N) const;
+  bool hasOneUse(const SDNode *N) const;

  // DAG preprocessing functions.
  void ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes);
@ -1250,15 +1263,88 @@ bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
 }

 inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
-  return SelectGlobalAddress(N, R, false);
+  return SelectGlobalAddress(N, R, false, 0);
 }

 inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
-  return SelectGlobalAddress(N, R, true);
+  return SelectGlobalAddress(N, R, true, 0);
+}
+
+inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) {
+  return SelectAnyImmediate(N, R, 0);
+}
+
+inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) {
+  return SelectAnyImmediate(N, R, 0);
+}
+inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) {
+  return SelectAnyImmediate(N, R, 1);
+}
+inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) {
+  return SelectAnyImmediate(N, R, 2);
+}
+inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) {
+  return SelectAnyImmediate(N, R, 3);
+}
+
+inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
+  EVT T = N.getValueType();
+  if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
+    return false;
+  R = N;
+  return true;
+}
+
+bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
+                                             uint32_t LogAlign) {
+  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
+    return alignTo(V, 1u << LogAlign) == V;
+  };
+
+  switch (N.getOpcode()) {
+  case ISD::Constant: {
+    if (N.getValueType() != MVT::i32)
+      return false;
+    int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
+    if (!IsAligned(V))
+      return false;
+    R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
+    return true;
+  }
+  case HexagonISD::JT:
+  case HexagonISD::CP:
+    // These are assumed to always be aligned at at least 8-byte boundary.
+    if (LogAlign > 3)
+      return false;
+    R = N.getOperand(0);
+    return true;
+  case ISD::ExternalSymbol:
+    // Symbols may be aligned at any boundary.
+    if (LogAlign > 0)
+      return false;
+    R = N;
+    return true;
+  case ISD::BlockAddress:
+    // Block address is always aligned at at least 4-byte boundary.
+    if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
+      return false;
+    R = N;
+    return true;
+  }
+
+  if (SelectGlobalAddress(N, R, false, LogAlign) ||
+      SelectGlobalAddress(N, R, true, LogAlign))
+    return true;
+
+  return false;
 }

 bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
-                                              bool UseGP) {
+                                              bool UseGP, uint32_t LogAlign) {
+  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
+    return alignTo(V, 1u << LogAlign) == V;
+  };
+
  switch (N.getOpcode()) {
  case ISD::ADD: {
    SDValue N0 = N.getOperand(0);
@ -1270,6 +1356,9 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
      return false;
    if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
      SDValue Addr = N0.getOperand(0);
+      // For the purpose of alignment, sextvalue and zextvalue are the same.
+      if (!IsAligned(Const->getZExtValue()))
+        return false;
      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
        if (GA->getOpcode() == ISD::TargetGlobalAddress) {
          uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
@ -1281,6 +1370,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
    }
    break;
  }
+  case HexagonISD::CP:
+  case HexagonISD::JT:
  case HexagonISD::CONST32:
    // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
    // want in the instruction.
@ -1434,7 +1525,8 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
 bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
  assert(N->getOpcode() == ISD::OR);
  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  assert(C);
+  if (!C)
+    return false;

  // Detect when "or" is used to add an offset to a stack object.
  if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
@ -1480,6 +1572,10 @@ bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
  return false;
 }

+bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
+  return !CheckSingleUse || N->hasOneUse();
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Rebalancing of address calculation trees

--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -1967,6 +1967,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::SRL, VT, Custom);
  }

+  // Extending loads from (native) vectors of i8 into (native) vectors of i16
+  // are legal.
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+
  // Types natively supported:
  for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1,
                       MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32,
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@ -1590,10 +1590,14 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
    case Hexagon::A4_cmpbgtui:
    case Hexagon::A4_cmpheqi:
    case Hexagon::A4_cmphgti:
-    case Hexagon::A4_cmphgtui:
+    case Hexagon::A4_cmphgtui: {
      SrcReg2 = 0;
+      const MachineOperand &Op2 = MI.getOperand(2);
+      if (!Op2.isImm())
+        return false;
      Value = MI.getOperand(2).getImm();
      return true;
+    }
  }

  return false;
--- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@ -1,40 +0,0 @@
-//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Multiply 64-bit and use lower result
-//
-// Optimized with intrinisics accumulates
-//
-def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
-      (i64
-       (A2_combinew
-        (M2_maci
-         (M2_maci
-          (i32
-           (EXTRACT_SUBREG
-            (i64
-             (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
-                                          isub_lo)),
-                     (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                          isub_lo)))),
-            isub_hi)),
-          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)),
-          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_hi))),
-         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_lo)),
-         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_hi))),
-        (i32
-         (EXTRACT_SUBREG
-          (i64
-           (M2_dpmpyuu_s0 
-             (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)),
-                   (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                        isub_lo)))), isub_lo))))>;
-
-
-
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@ -228,7 +228,11 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
  // If the second operand of the compare is an imm, make sure it's in the
  // range specified by the arch.
  if (!secondReg) {
-    int64_t v = MI.getOperand(2).getImm();
+    const MachineOperand &Op2 = MI.getOperand(2);
+    if (!Op2.isImm())
+      return false;
+
+    int64_t v = Op2.getImm();
    bool Valid = false;

    switch (MI.getOpcode()) {
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@ -29,17 +29,5 @@ def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
 def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
 def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }

-// This complex pattern exists only to create a machine instruction operand
-// of type "frame index". There doesn't seem to be a way to do that directly
-// in the patterns.
-def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
-
-// These complex patterns are not strictly necessary, since global address
-// folding will happen during DAG combining. For distinguishing between GA
-// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used.
-def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
-def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
-
-
 def bblabel : Operand<i32>;
 def bbl     : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
--- a/test/CodeGen/Hexagon/PR33749.ll
+++ b/test/CodeGen/Hexagon/PR33749.ll
@ -0,0 +1,50 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; This testcase used to fail with "cannot select 'i1 = add x, y'".
+; Check for some sane output:
+; CHECK: xor(p{{[0-3]}},p{{[0-3]}})
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @foo(i32* nocapture %a0) local_unnamed_addr #0 {
+b1:
+  %v2 = getelementptr inbounds i32, i32* %a0, i32 26
+  %v3 = load i32, i32* %v2, align 4
+  %v4 = add nsw i32 %v3, 1
+  %v5 = load i32, i32* %a0, align 4
+  br label %b6
+
+b6:                                               ; preds = %b28, %b1
+  %v7 = phi i32 [ %v29, %b28 ], [ %v5, %b1 ]
+  %v8 = mul nsw i32 %v4, %v7
+  %v9 = add nsw i32 %v8, %v7
+  %v10 = mul i32 %v7, %v7
+  %v11 = mul i32 %v10, %v9
+  %v12 = add nsw i32 %v11, 1
+  %v13 = mul nsw i32 %v12, %v7
+  %v14 = add nsw i32 %v13, %v7
+  %v15 = mul i32 %v10, %v14
+  %v16 = and i32 %v15, 1
+  %v17 = add nsw i32 %v16, -1
+  %v18 = mul i32 %v10, %v7
+  %v19 = mul i32 %v18, %v11
+  %v20 = mul i32 %v19, %v17
+  %v21 = and i32 %v20, 1
+  %v22 = add nsw i32 %v21, -1
+  %v23 = mul nsw i32 %v22, %v3
+  %v24 = sub nsw i32 %v7, %v23
+  %v25 = mul i32 %v10, %v24
+  %v26 = sub i32 0, %v7
+  %v27 = icmp eq i32 %v25, %v26
+  br i1 %v27, label %b30, label %b28
+
+b28:                                              ; preds = %b6
+  %v29 = add nsw i32 %v3, %v7
+  store i32 %v29, i32* %a0, align 4
+  br label %b6
+
+b30:                                              ; preds = %b6
+  ret void
+}
+
+attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" }
--- a/test/CodeGen/Hexagon/addrmode-indoff.ll
+++ b/test/CodeGen/Hexagon/addrmode-indoff.ll
@ -3,72 +3,90 @@
 ; Bug 6840. Use absolute+index addressing.

@ga = common global [1024 x i8] zeroinitializer, align 8
-@gb = common global [1024 x i8] zeroinitializer, align 8

-; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga)
-define zeroext i8 @lf2(i32 %i) nounwind readonly {
+; CHECK-LABEL: test0
+; CHECK: memub(r{{[0-9]+}}+##ga)
+define zeroext i8 @test0(i32 %i) nounwind readonly {
 entry:
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i
-  %0 = load i8, i8* %arrayidx, align 1
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i
+  %0 = load i8, i8* %t, align 1
  ret i8 %0
 }

-; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb)
-define signext i8 @lf2s(i32 %i) nounwind readonly {
+; CHECK-LABEL: test1
+; CHECK: memb(r{{[0-9]+}}+##ga)
+define signext i8 @test1(i32 %i) nounwind readonly {
 entry:
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i
-  %0 = load i8, i8* %arrayidx, align 1
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i
+  %0 = load i8, i8* %t, align 1
  ret i8 %0
 }

-; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga)
-define zeroext i8 @lf3(i32 %i) nounwind readonly {
+; CHECK-LABEL: test2
+; CHECK: memub(r{{[0-9]+}}<<#1+##ga)
+define zeroext i8 @test2(i32 %i) nounwind readonly {
 entry:
-  %mul = shl nsw i32 %i, 2
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %j = shl nsw i32 %i, 1
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j
+  %0 = load i8, i8* %t, align 1
  ret i8 %0
 }

-; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb)
-define signext i8 @lf3s(i32 %i) nounwind readonly {
+; CHECK-LABEL: test3
+; CHECK: memb(r{{[0-9]+}}<<#1+##ga)
+define signext i8 @test3(i32 %i) nounwind readonly {
 entry:
-  %mul = shl nsw i32 %i, 2
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %j = shl nsw i32 %i, 1
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j
+  %0 = load i8, i8* %t, align 1
  ret i8 %0
 }

-; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga)
-define void @sf4(i32 %i, i8 zeroext %j) nounwind {
+; CHECK-LABEL: test4
+; CHECK: memub(r{{[0-9]+}}<<#2+##ga)
+define zeroext i8 @test4(i32 %i) nounwind readonly {
 entry:
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i
-  store i8 %j, i8* %arrayidx, align 1
+  %j = shl nsw i32 %i, 2
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j
+  %0 = load i8, i8* %t, align 1
+  ret i8 %0
+}
+
+; CHECK-LABEL: test5
+; CHECK: memb(r{{[0-9]+}}<<#2+##ga)
+define signext i8 @test5(i32 %i) nounwind readonly {
+entry:
+  %j = shl nsw i32 %i, 2
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j
+  %0 = load i8, i8* %t, align 1
+  ret i8 %0
+}
+
+; CHECK-LABEL: test10
+; CHECK: memb(r{{[0-9]+}}+##ga)
+define void @test10(i32 %i, i8 zeroext %v) nounwind {
+entry:
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i
+  store i8 %v, i8* %t, align 1
  ret void
 }

-; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb)
-define void @sf4s(i32 %i, i8 signext %j) nounwind {
+; CHECK-LABEL: test11
+; CHECK: memb(r{{[0-9]+}}<<#1+##ga)
+define void @test11(i32 %i, i8 signext %v) nounwind {
 entry:
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i
-  store i8 %j, i8* %arrayidx, align 1
+  %j = shl nsw i32 %i, 1
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j
+  store i8 %v, i8* %t, align 1
  ret void
 }

-; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga)
-define void @sf5(i32 %i, i8 zeroext %j) nounwind {
+; CHECK-LABEL: test12
+; CHECK: memb(r{{[0-9]+}}<<#2+##ga)
+define void @test12(i32 %i, i8 zeroext %v) nounwind {
 entry:
-  %mul = shl nsw i32 %i, 2
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul
-  store i8 %j, i8* %arrayidx, align 1
-  ret void
-}
-
-; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb)
-define void @sf5s(i32 %i, i8 signext %j) nounwind {
-entry:
-  %mul = shl nsw i32 %i, 2
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul
-  store i8 %j, i8* %arrayidx, align 1
+  %j = shl nsw i32 %i, 2
+  %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j
+  store i8 %v, i8* %t, align 1
  ret void
 }
--- a/test/CodeGen/Hexagon/block-addr.ll
+++ b/test/CodeGen/Hexagon/block-addr.ll
@ -1,7 +1,6 @@
 ; RUN: llc -march=hexagon < %s | FileCheck %s

-; CHECK: .LJTI
-; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+<<#[0-9]+}})
+; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+<<#[0-9]+}}+##.LJTI{{.*}})
 ; CHECK-DAG: jumpr r[[REG]]

 define void @main() #0 {
--- a/test/CodeGen/Hexagon/hwloop-loop1.ll
+++ b/test/CodeGen/Hexagon/hwloop-loop1.ll
@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner=0 < %s | FileCheck %s
 ;
 ; Generate loop1 instruction for double loop sequence.

--- a/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll
+++ b/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll
@ -1,31 +1,34 @@
-; RUN: llc -march=hexagon -hexagon-eif=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-eif=0 -disable-machine-sink < %s | FileCheck %s
 target triple = "hexagon"

 %struct.0 = type { i16, i16 }

@t = external local_unnamed_addr global %struct.0, align 2

-define void @foo(i32 %p) local_unnamed_addr #0 {
+define void @foo(i32 %p, i16 %x, i16 %y, i16 %z) local_unnamed_addr #0 {
 entry:
  %conv90 = trunc i32 %p to i16
  %call105 = call signext i16 @bar(i16 signext 16384, i16 signext undef) #0
  %call175 = call signext i16 @bar(i16 signext %conv90, i16 signext 4) #0
  %call197 = call signext i16 @bar(i16 signext %conv90, i16 signext 4) #0
+  %x1 = add i16 %x, 1
+  %z1 = add i16 %z, 1
  %cmp199 = icmp eq i16 %call197, 0
  br i1 %cmp199, label %if.then200, label %if.else201

-; CHECK-DAG: [[R4:r[0-9]+]] = #4
+; CHECK-DAG: [[R4:r[0-9]+]] = add
 ; CHECK: p0 = cmp.eq(r0,#0)
-; CHECK: if (!p0.new) [[R3:r[0-9]+]] = #3
+; CHECK: if (!p0) [[R3:r[0-9]+]] = add(r{{[0-9]+}},#3)
 ; CHECK-DAG: if (!p0) memh(##t) = [[R3]]
 ; CHECK-DAG: if (p0) memh(##t) = [[R4]]
 if.then200:                                       ; preds = %entry
-  store i16 4, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2
-  store i16 0, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 1), align 2
+  store i16 %x1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2
+  store i16 %z1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 1), align 2
  br label %if.end202

 if.else201:                                       ; preds = %entry
-  store i16 3, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2
+  %y1 = add i16 %y, 3
+  store i16 %y1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2
  br label %if.end202

 if.end202:                                        ; preds = %if.else201, %if.then200
@ -34,4 +37,4 @@ if.end202:                                        ; preds = %if.else201, %if.the

 declare signext i16 @bar(i16 signext, i16 signext) local_unnamed_addr #0

-attributes #0 = { optsize "target-cpu"="hexagonv55" }
+attributes #0 = { "target-cpu"="hexagonv55" }
--- a/test/CodeGen/Hexagon/sdata-array.ll
+++ b/test/CodeGen/Hexagon/sdata-array.ll
@ -5,9 +5,9 @@

@foo = common global [4 x i8] zeroinitializer, align 1

-define void @set() nounwind {
+define void @set(i8 %x) nounwind {
 entry:
-  store i8 0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @foo, i32 0, i32 0), align 1
+  store i8 %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @foo, i32 0, i32 0), align 1
  ret void
 }

--- a/test/CodeGen/Hexagon/store-imm-amode.ll
+++ b/test/CodeGen/Hexagon/store-imm-amode.ll
@ -0,0 +1,97 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that a store with a proper addressing mode is selected for various
+; cases of storing an immediate value.
+
+
+@var_i8 = global [10 x i8] zeroinitializer, align 8
+
+; CHECK-LABEL: store_imm_i8:
+; CHECK: memb(r0+#0) = #-1
+define void @store_imm_i8(i8* %p) nounwind {
+  store i8 255, i8* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_rr_i8:
+; CHECK: [[RV:r[0-9]+]] = #255
+; CHECK: memb(r0+r1<<#0) = [[RV]]
+define void @store_rr_i8(i8* %p, i32 %x) nounwind {
+  %t0 = getelementptr i8, i8* %p, i32 %x
+  store i8 255, i8* %t0, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_io_i8:
+; CHECK: [[RV:r[0-9]+]] = #255
+; CHECK: memb(r0+##var_i8) = [[RV]]
+define void @store_io_i8(i32 %x) nounwind {
+  %t0 = getelementptr [10 x i8], [10 x i8]* @var_i8, i32 0, i32 %x
+  store i8 255, i8* %t0, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ur_i8:
+; CHECK: [[RV:r[0-9]+]] = #255
+; CHECK: memb(r0<<#2+##var_i8) = [[RV]]
+define void @store_ur_i8(i32 %x) nounwind {
+  %t0 = shl i32 %x, 2
+  %t1 = getelementptr [10 x i8], [10 x i8]* @var_i8, i32 0, i32 %t0
+  store i8 255, i8* %t1, align 4
+  ret void
+}
+
+@var_i16 = global [10 x i16] zeroinitializer, align 8
+
+; CHECK-LABEL: store_imm_i16:
+; CHECK: memh(r0+#0) = #-1
+define void @store_imm_i16(i16* %p) nounwind {
+  store i16 65535, i16* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_rr_i16:
+; CHECK: [[RV:r[0-9]+]] = ##65535
+; CHECK: memh(r0+r1<<#1) = [[RV]]
+define void @store_rr_i16(i16* %p, i32 %x) nounwind {
+  %t0 = getelementptr i16, i16* %p, i32 %x
+  store i16 65535, i16* %t0, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ur_i16:
+; CHECK: [[RV:r[0-9]+]] = ##65535
+; CHECK: memh(r0<<#1+##var_i16) = [[RV]]
+define void @store_ur_i16(i32 %x) nounwind {
+  %t0 = getelementptr [10 x i16], [10 x i16]* @var_i16, i32 0, i32 %x
+  store i16 65535, i16* %t0, align 4
+  ret void
+}
+
+@var_i32 = global [10 x i32] zeroinitializer, align 8
+
+; CHECK-LABEL: store_imm_i32:
+; CHECK: memw(r0+#0) = #-1
+define void @store_imm_i32(i32* %p) nounwind {
+  store i32 4294967295, i32* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_rr_i32:
+; CHECK: [[RV:r[0-9]+]] = #-1
+; CHECK: memw(r0+r1<<#2) = [[RV]]
+define void @store_rr_i32(i32* %p, i32 %x) nounwind {
+  %t0 = getelementptr i32, i32* %p, i32 %x
+  store i32 4294967295, i32* %t0, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ur_i32:
+; CHECK: [[RV:r[0-9]+]] = #-1
+; CHECK: memw(r0<<#2+##var_i32) = [[RV]]
+define void @store_ur_i32(i32 %x) nounwind {
+  %t0 = getelementptr [10 x i32], [10 x i32]* @var_i32, i32 0, i32 %x
+  store i32 4294967295, i32* %t0, align 4
+  ret void
+}
+
--- a/test/CodeGen/Hexagon/store-imm-stack-object.ll
+++ b/test/CodeGen/Hexagon/store-imm-stack-object.ll
@ -3,8 +3,7 @@
 target triple = "hexagon"

 ; CHECK-LABEL: test1:
-; CHECK: [[REG1:(r[0-9]+)]] = ##875770417
-; CHECK-DAG: memw(r29+#4) = [[REG1]]
+; CHECK-DAG: memw(r29+#4) = ##875770417
 ; CHECK-DAG: memw(r29+#8) = #51
 ; CHECK-DAG: memh(r29+#12) = #50
 ; CHECK-DAG: memb(r29+#15) = #49
--- a/test/CodeGen/Hexagon/store-shift.ll
+++ b/test/CodeGen/Hexagon/store-shift.ll
@ -1,6 +1,6 @@
 ; RUN: llc -march=hexagon < %s | FileCheck %s

-; CHECK-DAG: r[[BASE:[0-9]+]] += add
+; CHECK-DAG: r[[BASE:[0-9]+]] = add(r1,#1000)
 ; CHECK-DAG: r[[IDX0:[0-9]+]] = add(r2,#5)
 ; CHECK-DAG: r[[IDX1:[0-9]+]] = add(r2,#6)
 ; CHECK-DAG: memw(r0+r[[IDX0]]<<#2) = r3
--- a/test/CodeGen/Hexagon/tfr-to-combine.ll
+++ b/test/CodeGen/Hexagon/tfr-to-combine.ll
@ -6,30 +6,33 @@
@b = external global i16
@c = external global i16

-; Function Attrs: nounwind
-define i64 @test1() #0 {
+declare void @test0a(i32, i32) #0
+declare void @test0b(i32, i32, i32, i32) #0
+
+; CHECK-LABEL: test1:
 ; CHECK: combine(#10,#0)
+define i32 @test1() #0 {
 entry:
-  store i16 0, i16* @a, align 2
-  store i16 10, i16* @b, align 2
-  ret i64 10
+  call void @test0a(i32 0, i32 10) #0
+  ret i32 10
 }

-; Function Attrs: nounwind
-define i64 @test2() #0 {
+; CHECK-LABEL: test2:
 ; CHECK: combine(#0,r{{[0-9]+}})
+define i32 @test2() #0 {
 entry:
-  store i16 0, i16* @a, align 2
-  %0 = load i16, i16* @c, align 2
-  %conv2 = zext i16 %0 to i64
-  ret i64 %conv2
+  %t0 = load i16, i16* @c, align 2
+  %t1 = zext i16 %t0 to i32
+  call void @test0b(i32 %t1, i32 0, i32 %t1, i32 0)
+  ret i32 0
 }

-; Function Attrs: nounwind
-define i64 @test4() #0 {
+; CHECK-LABEL: test3:
 ; CHECK: combine(#0,#100)
+define i32 @test3() #0 {
 entry:
-  store i16 100, i16* @b, align 2
-  store i16 0, i16* @a, align 2
-  ret i64 0
+  call void @test0a(i32 100, i32 0)
+  ret i32 0
 }
+
+attributes #0 = { nounwind }
--- a/test/CodeGen/Hexagon/tls_pic.ll
+++ b/test/CodeGen/Hexagon/tls_pic.ll
@ -5,8 +5,8 @@

 ; CHECK-LABEL:    test_initial_exec
 ; CHECK-DAG:      = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL)
-; CHECK-DAG:      = ##src_ie@IEGOT
-; CHECK-DAG:      = ##dst_ie@IEGOT
+; CHECK-DAG:      ##src_ie@IEGOT
+; CHECK-DAG:      ##dst_ie@IEGOT
 ; CHECK-NOT:  call
 define i32 @test_initial_exec() nounwind {
 entry:
@ -23,8 +23,8 @@ entry:

 ; CHECK-LABEL: test_dynamic
 ; CHECK-DAG:   = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL)
-; CHECK-DAG:   = ##src_gd@GDGOT
-; CHECK-DAG:   = ##dst_gd@GDGOT
+; CHECK-DAG:   ##src_gd@GDGOT
+; CHECK-DAG:   ##dst_gd@GDGOT
 ; CHECK-DAG:   call src_gd@GDPLT
 ; CHECK-DAG:   call dst_gd@GDPLT

--- a/test/CodeGen/Hexagon/tls_static.ll
+++ b/test/CodeGen/Hexagon/tls_static.ll
@ -4,8 +4,8 @@
@src_le = thread_local global i32 0, align 4

 ; CHECK-LABEL: test_local_exec
-; CHECK-DAG:   = ##src_le@TPREL
-; CHECK-DAG:   = ##dst_le@TPREL
+; CHECK-DAG:   ##src_le@TPREL
+; CHECK-DAG:   ##dst_le@TPREL
 define i32 @test_local_exec() nounwind {
 entry:
  %0 = load i32, i32* @src_le, align 4
--- a/test/CodeGen/Hexagon/vect/vect-load-1.ll
+++ b/test/CodeGen/Hexagon/vect/vect-load-1.ll
@ -1,11 +1,10 @@
 ; RUN: llc -march=hexagon < %s
-; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>", 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>"
+;
+; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>"

-; ModuleID = 'bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
 target triple = "hexagon-unknown-linux-gnu"

-define void @foo() nounwind {
+define void @foo(<2 x i8>* %p) nounwind {
 entry:
  br label %polly.loop_header

@ -17,7 +16,7 @@ polly.loop_header:                                ; preds = %polly.loop_body, %e
  br i1 %0, label %polly.loop_body, label %polly.loop_after

 polly.loop_body:                                  ; preds = %polly.loop_header
-  %_p_vec_full = load <2 x i8>, <2 x i8>* undef, align 8
+  %_p_vec_full = load <2 x i8>, <2 x i8>* %p, align 8
  %1 = sext <2 x i8> %_p_vec_full to <2 x i32>
  %p_vec = mul <2 x i32> %1, <i32 3, i32 3>
  %mulp_vec = add <2 x i32> %p_vec, <i32 21, i32 21>
--- a/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll
@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
-; CHECK: vmpybsu
+; CHECK: vmpybu
 ; CHECK: vtrunehb

 define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
--- a/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll
@ -1,6 +1,6 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
-; CHECK: vmpybsu
-; CHECK: vmpybsu
+; CHECK: vmpybu
+; CHECK: vmpybu

 define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
 entry: