Add AArch64 as an experimental target.

This patch adds support for AArch64 (ARM's 64-bit architecture) to LLVM in the "experimental" category. Currently, it won't be built unless requested explicitly. This initial commit should have support for: + Assembly of all scalar (i.e. non-NEON, non-Crypto) instructions (except the late addition CRC instructions). + CodeGen features required for C++03 and C99. + Compilation for the "small" memory model: code+static data < 4GB. + Absolute and position-independent code. + GNU-style (i.e. "__thread") TLS. + Debugging information. The principal omission, currently, is performance tuning. This patch excludes the NEON support also reviewed due to an outbreak of batshit insanity in our legal department. That will be committed soon bringing the changes to precisely what has been approved. Further reviews would be gratefully received. llvm-svn: 174054
2025-01-31 12:41:49 +01:00 · 2013-01-31 12:12:40 +00:00 · 2013-01-31 12:12:40 +00:00 · e2b0519ed8
commit e2b0519ed8
parent 433a41f8f3
192 changed files with 45628 additions and 13 deletions
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@ -251,7 +251,8 @@ case $basic_machine in
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+   | aarch64 \
+   | be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@ -359,6 +360,7 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+   | aarch64-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@ -389,6 +389,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
  mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
  mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
  xcore-*)                llvm_cv_target_arch="XCore" ;;
@ -422,6 +423,7 @@ case $host in
  sparc*-*)               host_arch="Sparc" ;;
  powerpc*-*)             host_arch="PowerPC" ;;
  arm*-*)                 host_arch="ARM" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
  mips-* | mips64-*)      host_arch="Mips" ;;
  mipsel-* | mips64el-*)  host_arch="Mips" ;;
  xcore-*)                host_arch="XCore" ;;
@ -640,6 +642,7 @@ else
    PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
    x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
    ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
    Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
    XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
    MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@ -771,7 +774,7 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
    [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, arm, mips, hexagon,
+     host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
     xcore, msp430, nvptx, and cpp (default=all)]),,
    enableval=all)
 if test "$enableval" = host-only ; then
--- a/10
+++ b/10
@ -1438,8 +1438,8 @@ Optional Features:
                          YES)
  --enable-targets        Build specific host targets: all or
                          target1,target2,... Valid targets are: host, x86,
-                          x86_64, sparc, powerpc, arm, mips, hexagon, xcore,
-                          msp430, nvptx, and cpp (default=all)
+                          x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
+                          xcore, msp430, nvptx, and cpp (default=all)
  --enable-experimental-targets
                          Build experimental host targets: disable or
                          target1,target2,... (default=disable)
@ -4008,6 +4008,7 @@ else
  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
  mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
  mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
  xcore-*)                llvm_cv_target_arch="XCore" ;;
@ -4041,6 +4042,7 @@ case $host in
  sparc*-*)               host_arch="Sparc" ;;
  powerpc*-*)             host_arch="PowerPC" ;;
  arm*-*)                 host_arch="ARM" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
  mips-* | mips64-*)      host_arch="Mips" ;;
  mipsel-* | mips64el-*)  host_arch="Mips" ;;
  xcore-*)                host_arch="XCore" ;;
@ -5372,6 +5374,8 @@ else
    x86_64)      TARGET_HAS_JIT=1
 ;;
    ARM)         TARGET_HAS_JIT=1
+ ;;
+    AArch64)     TARGET_HAS_JIT=0
 ;;
    Mips)        TARGET_HAS_JIT=1
 ;;
@ -10489,7 +10493,7 @@ else
  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
  lt_status=$lt_dlunknown
  cat > conftest.$ac_ext <<EOF
-#line 10492 "configure"
+#line 10496 "configure"
 #include "confdefs.h"

 #if HAVE_DLFCN_H
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@ -22,6 +22,11 @@ ARM

 * `ABI <http://www.arm.com/products/DevTools/ABI.html>`_

+AArch64
+-------
+
+* `ARMv8 Instruction Set Overview <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.genc010197a/index.html>`_
+
 Itanium (ia64)
 --------------

@ -99,6 +104,8 @@ Linux
 -----

 * `PowerPC 64-bit ELF ABI Supplement <http://www.linuxbase.org/spec/ELF/ppc64/>`_
+* `Procedure Call Standard for the AArch64 Architecture <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf>`_
+* `ELF for the ARM 64-bit Architecture (AArch64) <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf>`_

 OS X
 ----
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@ -44,6 +44,7 @@ public:
    UnknownArch,

    arm,     // ARM; arm, armv.*, xscale
+    aarch64, // AArch64: aarch64
    hexagon, // Hexagon: hexagon
    mips,    // MIPS: mips, mipsallegrex
    mipsel,  // MIPSEL: mipsel, mipsallegrexel
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@ -472,6 +472,8 @@ public:
  virtual void AddValueSymbols(MCAssembler *) const = 0;
  virtual const MCSection *FindAssociatedSection() const = 0;

+  virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
+
  static bool classof(const MCExpr *E) {
    return E->getKind() == MCExpr::Target;
  }
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H

+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/raw_ostream.h"
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@ -1624,6 +1624,86 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
      res = "Unknown";
    }
    break;
+  case ELF::EM_AARCH64:
+    switch (type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
+
+    default:
+      res = "Unknown";
+    }
+    break;
  case ELF::EM_ARM:
    switch (type) {
      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE);
@ -1937,6 +2017,7 @@ error_code ELFObjectFile<ELFT>::getRelocationValueString(
      res = "Unknown";
    }
    break;
+  case ELF::EM_AARCH64:
  case ELF::EM_ARM:
  case ELF::EM_HEXAGON:
    res = symname;
@ -2356,6 +2437,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
      return "ELF64-i386";
    case ELF::EM_X86_64:
      return "ELF64-x86-64";
+    case ELF::EM_AARCH64:
+      return "ELF64-aarch64";
    case ELF::EM_PPC64:
      return "ELF64-ppc64";
    default:
@ -2374,6 +2457,8 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
    return Triple::x86;
  case ELF::EM_X86_64:
    return Triple::x86_64;
+  case ELF::EM_AARCH64:
+    return Triple::aarch64;
  case ELF::EM_ARM:
    return Triple::arm;
  case ELF::EM_HEXAGON:
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@ -271,6 +271,7 @@ enum {
  EM_SLE9X         = 179, // Infineon Technologies SLE9X core
  EM_L10M          = 180, // Intel L10M
  EM_K10M          = 181, // Intel K10M
+  EM_AARCH64       = 183, // ARM AArch64
  EM_AVR32         = 185, // Atmel Corporation 32-bit microprocessor family
  EM_STM8          = 186, // STMicroeletronics STM8 8-bit microcontroller
  EM_TILE64        = 187, // Tilera TILE64 multicore architecture family
@ -494,6 +495,96 @@ enum {
  R_PPC64_TLSLD               = 108
 };

+// ELF Relocation types for AArch64
+
+enum {
+  R_AARCH64_NONE                        = 0x100,
+
+  R_AARCH64_ABS64                       = 0x101,
+  R_AARCH64_ABS32                       = 0x102,
+  R_AARCH64_ABS16                       = 0x103,
+  R_AARCH64_PREL64                      = 0x104,
+  R_AARCH64_PREL32                      = 0x105,
+  R_AARCH64_PREL16                      = 0x106,
+
+  R_AARCH64_MOVW_UABS_G0                = 0x107,
+  R_AARCH64_MOVW_UABS_G0_NC             = 0x108,
+  R_AARCH64_MOVW_UABS_G1                = 0x109,
+  R_AARCH64_MOVW_UABS_G1_NC             = 0x10a,
+  R_AARCH64_MOVW_UABS_G2                = 0x10b,
+  R_AARCH64_MOVW_UABS_G2_NC             = 0x10c,
+  R_AARCH64_MOVW_UABS_G3                = 0x10d,
+  R_AARCH64_MOVW_SABS_G0                = 0x10e,
+  R_AARCH64_MOVW_SABS_G1                = 0x10f,
+  R_AARCH64_MOVW_SABS_G2                = 0x110,
+
+  R_AARCH64_LD_PREL_LO19                = 0x111,
+  R_AARCH64_ADR_PREL_LO21               = 0x112,
+  R_AARCH64_ADR_PREL_PG_HI21            = 0x113,
+  R_AARCH64_ADD_ABS_LO12_NC             = 0x115,
+  R_AARCH64_LDST8_ABS_LO12_NC           = 0x116,
+
+  R_AARCH64_TSTBR14                     = 0x117,
+  R_AARCH64_CONDBR19                    = 0x118,
+  R_AARCH64_JUMP26                      = 0x11a,
+  R_AARCH64_CALL26                      = 0x11b,
+
+  R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c,
+  R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d,
+  R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e,
+
+  R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b,
+
+  R_AARCH64_ADR_GOT_PAGE                = 0x137,
+  R_AARCH64_LD64_GOT_LO12_NC            = 0x138,
+
+  R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0        = 0x20e,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC     = 0x20f,
+  R_AARCH64_TLSLD_ADD_DTPREL_HI12       = 0x210,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12       = 0x211,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC    = 0x212,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12     = 0x213,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC  = 0x214,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12    = 0x215,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12    = 0x217,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12    = 0x219,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a,
+
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G1      = 0x21b,
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC   = 0x21c,
+  R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21   = 0x21d,
+  R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e,
+  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19    = 0x21f,
+
+  R_AARCH64_TLSLE_MOVW_TPREL_G2         = 0x220,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1         = 0x221,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1_NC      = 0x222,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0         = 0x223,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0_NC      = 0x224,
+  R_AARCH64_TLSLE_ADD_TPREL_HI12        = 0x225,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12        = 0x226,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12_NC     = 0x227,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12      = 0x228,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC   = 0x229,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12     = 0x22a,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC  = 0x22b,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12     = 0x22c,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f,
+
+  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232,
+  R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233,
+  R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234,
+
+  R_AARCH64_TLSDESC_CALL                = 0x239
+};
+
 // ARM Specific e_flags
 enum {
  EF_ARM_EABI_UNKNOWN =   0x00000000U,
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@ -300,7 +300,9 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) {

 void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
  switch (expr->getKind()) {
-  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Target:
+    cast<MCTargetExpr>(expr)->fixELFSymbolsInTLSFixups(getAssembler());
+    break;
  case MCExpr::Constant:
    break;

--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@ -256,6 +256,25 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
      TTypeEncoding = (CMModel == CodeModel::Small)
        ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
    }
+  }  else if (T.getArch() ==  Triple::aarch64) {
+    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+    // The small model guarantees static code/data size < 4GB, but not where it
+    // will be in memory. Most of these could end up >2GB away so even a signed
+    // pc-relative 32-bit address is insufficient, theoretically.
+    if (RelocM == Reloc::PIC_) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+      FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      FDEEncoding = dwarf::DW_EH_PE_udata4;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
  } else if (T.getArch() == Triple::ppc64) {
    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
      dwarf::DW_EH_PE_udata8;
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@ -19,6 +19,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
  switch (Kind) {
  case UnknownArch: return "unknown";

+  case aarch64: return "aarch64";
  case arm:     return "arm";
  case hexagon: return "hexagon";
  case mips:    return "mips";
@ -53,6 +54,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
  default:
    return 0;

+  case aarch64: return "aarch64";
+
  case arm:
  case thumb:   return "arm";

@ -152,6 +155,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {

 Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
  return StringSwitch<Triple::ArchType>(Name)
+    .Case("aarch64", aarch64)
    .Case("arm", arm)
    .Case("mips", mips)
    .Case("mipsel", mipsel)
@ -215,6 +219,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
    .Case("powerpc", Triple::ppc)
    .Cases("powerpc64", "ppu", Triple::ppc64)
    .Case("mblaze", Triple::mblaze)
+    .Case("aarch64", Triple::aarch64)
    .Cases("arm", "xscale", Triple::arm)
    // FIXME: It would be good to replace these with explicit names for all the
    // various suffixes supported.
@ -676,6 +681,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
  case llvm::Triple::spir:
    return 32;

+  case llvm::Triple::aarch64:
  case llvm::Triple::mips64:
  case llvm::Triple::mips64el:
  case llvm::Triple::nvptx64:
@ -704,6 +710,7 @@ Triple Triple::get32BitArchVariant() const {
  Triple T(*this);
  switch (getArch()) {
  case Triple::UnknownArch:
+  case Triple::aarch64:
  case Triple::msp430:
    T.setArch(UnknownArch);
    break;
@ -755,6 +762,7 @@ Triple Triple::get64BitArchVariant() const {
    T.setArch(UnknownArch);
    break;

+  case Triple::aarch64:
  case Triple::spir64:
  case Triple::mips64:
  case Triple::mips64el:
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@ -0,0 +1,42 @@
+//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_H
+#define LLVM_TARGET_AARCH64_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64AsmPrinter;
+class FunctionPass;
+class AArch64TargetMachine;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel);
+
+FunctionPass *createAArch64ConstantIslandPass();
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                      AArch64AsmPrinter &AP);
+
+
+}
+
+#endif
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@ -0,0 +1,68 @@
+//===- AArch64.td - Describe the AArch64 Target Machine ---------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the AArch64 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+  "Enable Advanced SIMD instructions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+  "Enable cryptographic instructions">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors
+//
+
+include "AArch64Schedule.td"
+
+def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+
+include "AArch64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+
+def A64InstPrinter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+  let InstructionSet = AArch64InstrInfo;
+  let AssemblyWriters = [A64InstPrinter];
+}
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@ -0,0 +1,361 @@
+//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64AsmPrinter.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
+  // expected to be created.
+  assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
+         && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
+  return MachineLocation(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+}
+
+/// Try to print a floating-point register as if it belonged to a specified
+/// register-class. For example the inline asm operand modifier "b" requires its
+/// argument to be printed as "bN".
+static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
+                                       const TargetRegisterInfo *TRI,
+                                       const TargetRegisterClass &RegClass,
+                                       raw_ostream &O) {
+  if (!MO.isReg())
+    return true;
+
+  for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+    if (RegClass.contains(*AR)) {
+      O << AArch64InstPrinter::getRegisterName(*AR);
+      return false;
+    }
+  }
+  return true;
+}
+
+/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
+/// with the obvious type and an immediate 0 as either wzr or xzr.
+static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
+                                       const TargetRegisterInfo *TRI,
+                                       const TargetRegisterClass &RegClass,
+                                       raw_ostream &O) {
+  char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+
+  if (MO.isImm() && MO.getImm() == 0) {
+    O << Prefix << "zr";
+    return false;
+  } else if (MO.isReg()) {
+    if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
+      O << (Prefix == 'x' ? "sp" : "wsp");
+      return false;
+    }
+
+    for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+      if (RegClass.contains(*AR)) {
+        O << AArch64InstPrinter::getRegisterName(*AR);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
+                                             bool PrintImmediatePrefix,
+                                             StringRef Suffix, raw_ostream &O) {
+  StringRef Name;
+  StringRef Modifier;
+  switch (MO.getType()) {
+  default: llvm_unreachable("Unexpected operand for symbolic address constraint");
+  case MachineOperand::MO_GlobalAddress:
+    Name = Mang->getSymbol(MO.getGlobal())->getName();
+
+    // Global variables may be accessed either via a GOT or in various fun and
+    // interesting TLS-model specific ways. Set the prefix modifier as
+    // appropriate here.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
+      Reloc::Model RelocM = TM.getRelocationModel();
+      if (GV->isThreadLocal()) {
+        switch (TM.getTLSModel(GV)) {
+        case TLSModel::GeneralDynamic:
+          Modifier = "tlsdesc";
+          break;
+        case TLSModel::LocalDynamic:
+          Modifier = "dtprel";
+          break;
+        case TLSModel::InitialExec:
+          Modifier = "gottprel";
+          break;
+        case TLSModel::LocalExec:
+          Modifier = "tprel";
+          break;
+        }
+      } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+        Modifier = "got";
+      }
+    }
+    break;
+  case MachineOperand::MO_BlockAddress:
+    Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    Name = MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    Name = GetCPISymbol(MO.getIndex())->getName();
+    break;
+  }
+
+  // Some instructions (notably ADRP) don't take the # prefix for
+  // immediates. Only print it if asked to.
+  if (PrintImmediatePrefix)
+    O << '#';
+
+  // Only need the joining "_" if both the prefix and the suffix are
+  // non-null. This little block simply takes care of the four possibly
+  // combinations involved there.
+  if (Modifier == "" && Suffix == "")
+    O << Name;
+  else if (Modifier == "" && Suffix != "")
+    O << ":" << Suffix << ':' << Name;
+  else if (Modifier != "" && Suffix == "")
+    O << ":" << Modifier << ':' << Name;
+  else
+    O << ":" << Modifier << '_' << Suffix << ':' << Name;
+
+  return false;
+}
+
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode, raw_ostream &O) {
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  if (!ExtraCode || !ExtraCode[0]) {
+    // There's actually no operand modifier, which leads to a slightly eclectic
+    // set of behaviour which we have to handle here.
+    const MachineOperand &MO = MI->getOperand(OpNum);
+    switch (MO.getType()) {
+    default:
+      llvm_unreachable("Unexpected operand for inline assembly");
+    case MachineOperand::MO_Register:
+      // GCC prints the unmodified operand of a 'w' constraint as the vector
+      // register. Technically, we could allocate the argument as a VPR128, but
+      // that leads to extremely dodgy copies being generated to get the data
+      // there.
+      if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
+        O << AArch64InstPrinter::getRegisterName(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      O << '#' << MO.getImm();
+      break;
+    case MachineOperand::MO_FPImmediate:
+      assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+      O << "#0.0";
+      break;
+    case MachineOperand::MO_BlockAddress:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      return printSymbolicAddress(MO, false, "", O);
+    }
+    return false;
+  }
+
+  // We have a real modifier to handle.
+  switch(ExtraCode[0]) {
+  default:
+    // See if this is a generic operand
+    return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+  case 'c': // Don't print "#" before an immediate operand.
+    if (!MI->getOperand(OpNum).isImm())
+      return true;
+    O << MI->getOperand(OpNum).getImm();
+    return false;
+  case 'w':
+    // Output 32-bit general register operand, constant zero as wzr, or stack
+    // pointer as wsp. Ignored when used with other operand types.
+    return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::GPR32RegClass, O);
+  case 'x':
+    // Output 64-bit general register operand, constant zero as xzr, or stack
+    // pointer as sp. Ignored when used with other operand types.
+    return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::GPR64RegClass, O);
+  case 'H':
+    // Output higher numbered of a 64-bit general register pair
+  case 'Q':
+    // Output least significant register of a 64-bit general register pair
+  case 'R':
+    // Output most significant register of a 64-bit general register pair
+
+    // FIXME note: these three operand modifiers will require, to some extent,
+    // adding a paired GPR64 register class. Initial investigation suggests that
+    // assertions are hit unless it has a type and is made legal for that type
+    // in ISelLowering. After that step is made, the number of modifications
+    // needed explodes (operation legality, calling conventions, stores, reg
+    // copies ...).
+    llvm_unreachable("FIXME: Unimplemented register pairs");
+  case 'b':
+    // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR8RegClass, O);
+  case 'h':
+    // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR16RegClass, O);
+  case 's':
+    // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR32RegClass, O);
+  case 'd':
+    // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR64RegClass, O);
+  case 'q':
+    // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR128RegClass, O);
+  case 'A':
+    // Output symbolic address with appropriate relocation modifier (also
+    // suitable for ADRP).
+    return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+  case 'L':
+    // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
+    // modifier.
+    return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+  case 'G':
+    // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
+    // modifier (currently only for TLS local exec).
+    return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+  }
+
+
+}
+
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                              unsigned OpNum,
+                                              unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &O) {
+  // Currently both the memory constraints (m and Q) behave the same and amount
+  // to the address as a single register. In future, we may allow "m" to provide
+  // both a base and an offset.
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline assembly memory operand");
+  O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+  return false;
+}
+
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                               raw_ostream &OS) {
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==4);
+  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  OS << V.getName();
+  OS << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+  OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
+  OS << '+' << MI->getOperand(1).getImm();
+  OS << ']';
+  OS << "+" << MI->getOperand(NOps - 2).getImm();
+}
+
+
+#include "AArch64GenMCPseudoLowering.inc"
+
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  // Do any auto-generated pseudo lowerings.
+  if (emitPseudoExpansionLowering(OutStreamer, MI))
+    return;
+
+  switch (MI->getOpcode()) {
+  case AArch64::CONSTPOOL_ENTRY: {
+    unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+    unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+    OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+    if (MCPE.isMachineConstantPoolEntry())
+      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+    else
+      EmitGlobalConstant(MCPE.Val.ConstVal);
+
+    return;
+  }
+  case AArch64::DBG_VALUE: {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      SmallString<128> TmpStr;
+      raw_svector_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+  }
+  }
+
+  MCInst TmpInst;
+  LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const DataLayout *TD = TM.getDataLayout();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(0), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  MCP = MF.getConstantPool();
+  return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+    RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target);
+}
+
--- a/lib/Target/AArch64/AArch64AsmPrinter.h
+++ b/lib/Target/AArch64/AArch64AsmPrinter.h
@ -0,0 +1,85 @@
+// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64ASMPRINTER_H
+#define LLVM_AARCH64ASMPRINTER_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
+
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+  /// make the right decision when printing asm code for different targets.
+  const AArch64Subtarget *Subtarget;
+  const MachineConstantPool *MCP;
+
+  // emitPseudoExpansionLowering - tblgen'erated.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+
+  public:
+  explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+  }
+
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+  MCOperand lowerSymbolOperand(const MachineOperand &MO,
+                               const MCSymbol *Sym) const;
+
+  void EmitInstruction(const MachineInstr *MI);
+  void EmitEndOfAsmFile(Module &M);
+
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O);
+
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  /// printSymbolicAddress - Given some kind of reasonably bare symbolic
+  /// reference, print out the appropriate asm string to represent it. If
+  /// appropriate, a relocation-specifier will be produced, composed of a
+  /// general class derived from the MO parameter and an instruction-specific
+  /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
+  /// given.
+  bool printSymbolicAddress(const MachineOperand &MO,
+                            bool PrintImmediatePrefix,
+                            StringRef Suffix, raw_ostream &O);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+  virtual const char *getPassName() const {
+    return "AArch64 Assembly Printer";
+  }
+
+  /// A no-op on AArch64 because we emit our constant pool entries inline with
+  /// the function.
+  virtual void EmitConstantPool() {}
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+};
+} // end namespace llvm
+
+#endif
--- a/lib/Target/AArch64/AArch64CallingConv.td
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@ -0,0 +1,196 @@
+//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AArch64 architecture.
+//===----------------------------------------------------------------------===//
+
+
+// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
+// higher level of abstraction than LLVM's target interface presents. In
+// particular, it refers (like other ABIs, in fact) directly to
+// structs. However, generic LLVM code takes the liberty of lowering structure
+// arguments to the component fields before we see them.
+//
+// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
+// implemented, so the goals of this calling convention are, in decreasing
+// priority order:
+//     1. Expose *some* way to express the concepts required to implement the
+//        generic PCS from a front-end.
+//     2. Provide a sane ABI for pure LLVM.
+//     3. Follow the generic PCS as closely as is naturally possible.
+//
+// The suggested front-end implementation of PCS features is:
+//     * Integer, float and vector arguments of all sizes which end up in
+//       registers are passed and returned via the natural LLVM type.
+//     * Structure arguments with size <= 16 bytes are passed and returned in
+//       registers as similar integer or composite types. For example:
+//       [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
+//     * HFAs in registers follow rules similar to small structs: appropriate
+//       composite types.
+//     * Structure arguments with size > 16 bytes are passed via a pointer,
+//       handled completely by the front-end.
+//     * Structure return values > 16 bytes via an sret pointer argument.
+//     * Other stack-based arguments (not large structs) are passed using byval
+//       pointers. Padding arguments are added beforehand to guarantee a large
+//       struct doesn't later use integer registers.
+//
+// N.b. this means that it is the front-end's responsibility (if it cares about
+// PCS compliance) to check whether enough registers are available for an
+// argument when deciding how to pass it.
+
+class CCIfAlign<int Align, CCAction A>:
+  CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
+
+def CC_A64_APCS : CallingConv<[
+  // SRet is an LLVM-specific concept, so it takes precedence over general ABI
+  // concerns. However, this rule will be used by C/C++ frontends to implement
+  // structure return.
+  CCIfSRet<CCAssignToReg<[X8]>>,
+
+  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+  // slot is 64-bit.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // Canonicalise the various types that live in different floating-point
+  // registers. This makes sense because the PCS does not distinguish Short
+  // Vectors and Floating-point types.
+  CCIfType<[v2i8], CCBitConvertToType<f16>>,
+  CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
+  CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCBitConvertToType<f128>>,
+
+  // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
+  // Floating-point or Short Vector Type and the NSRN is less than 8, then the
+  // argument is allocated to the least significant bits of register
+  // v[NSRN]. The NSRN is incremented by one. The argument has now been
+  // allocated."
+  CCIfType<[f16],  CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+  CCIfType<[f32],  CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64],  CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
+  // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
+  // argument is allocated to SIMD and Floating-point registers (with one
+  // register per element of the HFA). The NSRN is incremented by the number of
+  // registers used. The argument has now been allocated."
+  //
+  // N.b. As above, this rule is the responsibility of the front-end.
+
+  // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
+  // the argument is rounded up to the nearest multiple of 8 bytes."
+  //
+  // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
+  // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
+  // Alignment of the Argument's type."
+  //
+  // It is expected that these will be satisfied by adding dummy arguments to
+  // the prototype.
+
+  // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
+  // type then the size of the argument is set to 8 bytes. The effect is as if
+  // the argument had been copied to the least significant bits of a 64-bit
+  // register and the remaining bits filled with unspecified values."
+  CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+  // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
+  // precision Floating-point or Short Vector Type, then the argument is copied
+  // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
+  // argument. The argument has now been allocated."
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+  CCIfType<[f128], CCAssignToStack<16, 16>>,
+
+  // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
+  // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
+  // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
+  // one. The argument has now been allocated."
+
+  // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
+  // represented as two i64s, the first one being split. If we delayed this
+  // operation C.8 would never be reached.
+  CCIfType<[i64],
+        CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
+
+  // Note: the promotion also implements C.14.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // And now the real implementation of C.7
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+
+  // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
+  // up to the next even number."
+  //
+  // "C.9: If the argument is an Integral Type, the size of the argument is
+  // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
+  // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
+  // memory representation of the argument. The NGRN is incremented by two. The
+  // argument has now been allocated."
+  //
+  // Subtlety here: what if alignment is 16 but it is not an integral type? All
+  // floating-point types have been allocated already, which leaves composite
+  // types: this is why a front-end may need to produce i128 for a struct <= 16
+  // bytes.
+
+  // PCS: "C.10 If the argument is a Composite Type and the size in double-words
+  // of the argument is not more than 8 minus NGRN, then the argument is copied
+  // into consecutive general-purpose registers, starting at x[NGRN]. The
+  // argument is passed as though it had been loaded into the registers from a
+  // double-word aligned address with an appropriate sequence of LDR
+  // instructions loading consecutive registers from memory (the contents of any
+  // unused parts of the registers are unspecified by this standard). The NGRN
+  // is incremented by the number of registers used. The argument has now been
+  // allocated."
+  //
+  // Another one that's the responsibility of the front-end (sigh).
+
+  // PCS: "C.11: The NGRN is set to 8."
+  CCCustom<"CC_AArch64NoMoreRegs">,
+
+  // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
+  // Alignment of the argument's type."
+  //
+  // PCS: "C.13: If the argument is a composite type then the argument is copied
+  // to memory at the adjusted NSAA. The NSAA is by the size of the
+  // argument. The argument has now been allocated."
+  //
+  // Note that the effect of this corresponds to a memcpy rather than register
+  // stores so that the struct ends up correctly addressable at the adjusted
+  // NSAA.
+
+  // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
+  // of the argument is set to 8 bytes. The effect is as if the argument was
+  // copied to the least significant bits of a 64-bit register and the remaining
+  // bits filled with unspecified values."
+  //
+  // Integer types were widened above. Floating-point and composite types have
+  // already been allocated completely. Nothing to do.
+
+  // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
+  // is incremented by the size of the argument. The argument has now been
+  // allocated."
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64], CCAssignToStack<8, 8>>
+
+]>;
+
+// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
+// of vector registers (8-15) are callee-saved. The order here is is picked up
+// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
+// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
+// [sp-16], ...
+def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
+                                   (sequence "D%u", 15, 8))>;
+
+
+// TLS descriptor calls are extremely restricted in their changes, to allow
+// optimisations in the (hopefully) more common fast path where no real action
+// is needed. They actually have to preserve all registers, except for the
+// unavoidable X30 and the return register X0.
+def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
+                                   (sequence "Q%u", 31, 0))>;
--- a/lib/Target/AArch64/AArch64ConstantIslandPass.cpp
+++ b/lib/Target/AArch64/AArch64ConstantIslandPass.cpp
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@ -0,0 +1,644 @@
+//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
+                                              uint64_t &Initial,
+                                              uint64_t &Residual) const {
+  // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
+  // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
+  // 0x1f8, but stack adjustment should always be a multiple of 16.
+  if (Total <= 0x1f0) {
+    Initial = Total;
+    Residual = 0;
+  } else {
+    Initial = 0x1f0;
+    Residual = Total - Initial;
+  }
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  bool NeedsFrameMoves = MMI.hasDebugInfo()
+    || MF.getFunction()->needsUnwindTableEntry();
+
+  uint64_t NumInitialBytes, NumResidualBytes;
+
+  // Currently we expect the stack to be laid out by
+  //     sub sp, sp, #initial
+  //     stp x29, x30, [sp, #offset]
+  //     ...
+  //     str xxx, [sp, #offset]
+  //     sub sp, sp, #rest (possibly via extra instructions).
+  if (MFI->getCalleeSavedInfo().size()) {
+    // If there are callee-saved registers, we want to store them efficiently as
+    // a block, and virtual base assignment happens too early to do it for us so
+    // we adjust the stack in two phases: first just for callee-saved fiddling,
+    // then to allocate the rest of the frame.
+    splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
+  } else {
+    // If there aren't any callee-saved registers, two-phase adjustment is
+    // inefficient. It's more efficient to adjust with NumInitialBytes too
+    // because when we're in a "callee pops argument space" situation, that pop
+    // must be tacked onto Initial for correctness.
+    NumInitialBytes = MFI->getStackSize();
+    NumResidualBytes = 0;
+  }
+
+  // Tell everyone else how much adjustment we're expecting them to use. In
+  // particular if an adjustment is required for a tail call the epilogue could
+  // have a different view of things.
+  FuncInfo->setInitialStackAdjust(NumInitialBytes);
+
+  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
+               MachineInstr::FrameSetup);
+
+  if (NeedsFrameMoves && NumInitialBytes) {
+    // We emit this update even if the CFA is set from a frame pointer later so
+    // that the CFA is valid in the interim.
+    MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+      .addSym(SPLabel);
+
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(AArch64::XSP, NumInitialBytes);
+    Moves.push_back(MachineMove(SPLabel, Dst, Src));
+  }
+
+  // Otherwise we need to set the frame pointer and/or add a second stack
+  // adjustment.
+
+  bool FPNeedsSetting = hasFP(MF);
+  for (; MBBI != MBB.end(); ++MBBI) {
+    // Note that this search makes strong assumptions about the operation used
+    // to store the frame-pointer: it must be "STP x29, x30, ...". This could
+    // change in future, but until then there's no point in implementing
+    // untestable more generic cases.
+    if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
+                       && MBBI->getOperand(0).getReg() == AArch64::X29) {
+      int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
+      FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
+
+      ++MBBI;
+      emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
+                    AArch64::X29,
+                    NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
+                    MachineInstr::FrameSetup);
+
+      // The offset adjustment used when emitting debugging locations relative
+      // to whatever frame base is set. AArch64 uses the default frame base (FP
+      // or SP) and this adjusts the calculations to be correct.
+      MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
+                               - MFI->getStackSize());
+
+      if (NeedsFrameMoves) {
+        MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+          .addSym(FPLabel);
+        MachineLocation Dst(MachineLocation::VirtualFP);
+        MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
+        Moves.push_back(MachineMove(FPLabel, Dst, Src));
+      }
+
+      FPNeedsSetting = false;
+    }
+
+    if (!MBBI->getFlag(MachineInstr::FrameSetup))
+      break;
+  }
+
+  assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+
+  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
+               MachineInstr::FrameSetup);
+
+  // Now we emit the rest of the frame setup information, if necessary: we've
+  // already noted the FP and initial SP moves so we're left with the prologue's
+  // final SP update and callee-saved register locations.
+  if (!NeedsFrameMoves)
+    return;
+
+  // Reuse the label if appropriate, so create it in this outer scope.
+  MCSymbol *CSLabel = 0;
+
+  // The rest of the stack adjustment
+  if (!hasFP(MF) && NumResidualBytes) {
+    CSLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+      .addSym(CSLabel);
+
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
+    Moves.push_back(MachineMove(CSLabel, Dst, Src));
+  }
+
+  // And any callee-saved registers (it's fine to leave them to the end here,
+  // because the old values are still valid at this point.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.size()) {
+    if (!CSLabel) {
+      CSLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+        .addSym(CSLabel);
+    }
+
+    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+           E = CSI.end(); I != E; ++I) {
+      MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx()));
+      MachineLocation Src(I->getReg());
+      Moves.push_back(MachineMove(CSLabel, Dst, Src));
+    }
+  }
+}
+
+void
+AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  DebugLoc DL = MBBI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  // Initial and residual are named for consitency with the prologue. Note that
+  // in the epilogue, the residual adjustment is executed first.
+  uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
+  uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
+  uint64_t ArgumentPopSize = 0;
+  if (RetOpcode == AArch64::TC_RETURNdi ||
+      RetOpcode == AArch64::TC_RETURNxi) {
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+    MachineInstrBuilder MIB;
+    if (RetOpcode == AArch64::TC_RETURNdi) {
+      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
+      if (JumpTarget.isGlobal()) {
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      } else {
+        assert(JumpTarget.isSymbol() && "unexpected tail call destination");
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else {
+      assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
+             && "Unexpected tail call");
+
+      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
+      MIB.addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    // Add the extra operands onto the new tail call instruction even though
+    // they're not used directly (so that liveness is tracked properly etc).
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+        MIB->addOperand(MBBI->getOperand(i));
+
+
+    // Delete the pseudo instruction TC_RETURN.
+    MachineInstr *NewMI = prior(MBBI);
+    MBB.erase(MBBI);
+    MBBI = NewMI;
+
+    // For a tail-call in a callee-pops-arguments environment, some or all of
+    // the stack may actually be in use for the call's arguments, this is
+    // calculated during LowerCall and consumed here...
+    ArgumentPopSize = StackAdjust.getImm();
+  } else {
+    // ... otherwise the amount to pop is *all* of the argument space,
+    // conveniently stored in the MachineFunctionInfo by
+    // LowerFormalArguments. This will, of course, be zero for the C calling
+    // convention.
+    ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+  }
+
+  assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
+         && "refusing to adjust stack by misaligned amt");
+
+  // We may need to address callee-saved registers differently, so find out the
+  // bound on the frame indices.
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  // The "residual" stack update comes first from this direction and guarantees
+  // that SP is NumInitialBytes below its value on function entry, either by a
+  // direct update or restoring it from the frame pointer.
+  if (NumInitialBytes + ArgumentPopSize != 0) {
+    emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
+                 NumInitialBytes + ArgumentPopSize);
+    --MBBI;
+  }
+
+
+  // MBBI now points to the instruction just past the last callee-saved
+  // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
+  // otherwise).
+
+  // Now we need to find out where to put the bulk of the stack adjustment
+  MachineBasicBlock::iterator FirstEpilogue = MBBI;
+  while (MBBI != MBB.begin()) {
+    --MBBI;
+
+    unsigned FrameOp;
+    for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
+      if (MBBI->getOperand(FrameOp).isFI())
+        break;
+    }
+
+    // If this instruction doesn't have a frame index we've reached the end of
+    // the callee-save restoration.
+    if (FrameOp == MBBI->getNumOperands())
+      break;
+
+    // Likewise if it *is* a local reference, but not to a callee-saved object.
+    int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
+    if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
+      break;
+
+    FirstEpilogue = MBBI;
+  }
+
+  if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    int64_t StaticFrameBase;
+    StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
+    emitRegUpdate(MBB, FirstEpilogue, DL, TII,
+                  AArch64::XSP, AArch64::X29, AArch64::NoRegister,
+                  StaticFrameBase);
+  } else {
+    emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+  }
+}
+
+int64_t
+AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
+                                                 int FrameIndex,
+                                                 unsigned &FrameReg,
+                                                 int SPAdj,
+                                                 bool IsCalleeSaveOp) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
+
+  assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
+         && "callee-saved register in unexpected place");
+
+  // If the frame for this function is particularly large, we adjust the stack
+  // in two phases which means the callee-save related operations see a
+  // different (intermediate) stack size.
+  int64_t FrameRegPos;
+  if (IsCalleeSaveOp) {
+    FrameReg = AArch64::XSP;
+    FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
+  } else if (useFPForAddressing(MF)) {
+    // Have to use the frame pointer since we have no idea where SP is.
+    FrameReg = AArch64::X29;
+    FrameRegPos = FuncInfo->getFramePointerOffset();
+  } else {
+    FrameReg = AArch64::XSP;
+    FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+  }
+
+  return TopOfFrameOffset - FrameRegPos;
+}
+
+/// Estimate and return the size of the frame.
+static unsigned estimateStackSize(MachineFunction &MF) {
+  // FIXME: Make generic? Really consider after upstreaming.  This code is now
+  // shared between PEI, ARM *and* here.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  int Offset = 0;
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -MFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    Offset += MFI->getObjectSize(i);
+    unsigned Align = MFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += MFI->getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+  return (unsigned)Offset;
+}
+
+void
+AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  const AArch64RegisterInfo *RegInfo =
+    static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64InstrInfo &TII =
+    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+
+  if (hasFP(MF)) {
+    MF.getRegInfo().setPhysRegUsed(AArch64::X29);
+    MF.getRegInfo().setPhysRegUsed(AArch64::X30);
+  }
+
+  // If addressing of local variables is going to be more complicated than
+  // shoving a base register and an offset into the instruction then we may well
+  // need to scavenge registers. We should either specifically add an
+  // callee-save register for this purpose or allocate an extra spill slot.
+
+  bool BigStack =
+    (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+    || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
+    || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
+
+  if (!BigStack)
+    return;
+
+  // We certainly need some slack space for the scavenger, preferably an extra
+  // register.
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+  uint16_t ExtraReg = AArch64::NoRegister;
+
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
+        !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
+      ExtraReg = CSRegs[i];
+      break;
+    }
+  }
+
+  if (ExtraReg != 0) {
+    MF.getRegInfo().setPhysRegUsed(ExtraReg);
+  } else {
+    // Create a stack slot for scavenging purposes. PrologEpilogInserter
+    // helpfully places it near either SP or FP for us to avoid
+    // infinitely-regression during scavenging.
+    const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+}
+
+bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
+                                                  unsigned Reg) const {
+  // If @llvm.returnaddress is called then it will refer to X30 by some means;
+  // the prologue store does not kill the register.
+  if (Reg == AArch64::X30) {
+    if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
+        && MBB.getParent()->getRegInfo().isLiveIn(Reg))
+    return false;
+  }
+
+  // In all other cases, physical registers are dead after they've been saved
+  // but live at the beginning of the prologue block.
+  MBB.addLiveIn(Reg);
+  return true;
+}
+
+void
+AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      const std::vector<CalleeSavedInfo> &CSI,
+                                      const TargetRegisterInfo *TRI,
+                                      LoadStoreMethod PossClasses[],
+                                      unsigned NumClasses) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // A certain amount of implicit contract is present here. The actual stack
+  // offsets haven't been allocated officially yet, so for strictly correct code
+  // we rely on the fact that the elements of CSI are allocated in order
+  // starting at SP, purely as dictated by size and alignment. In practice since
+  // this function handles the only accesses to those slots it's not quite so
+  // important.
+  //
+  // We have also ordered the Callee-saved register list in AArch64CallingConv
+  // so that the above scheme puts registers in order: in particular we want
+  // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
+  for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    // First we need to find out which register class the register belongs to so
+    // that we can use the correct load/store instrucitons.
+    unsigned ClassIdx;
+    for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
+      if (PossClasses[ClassIdx].RegClass->contains(Reg))
+        break;
+    }
+    assert(ClassIdx != NumClasses
+           && "Asked to store register in unexpected class");
+    const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
+
+    // Now we need to decide whether it's possible to emit a paired instruction:
+    // for this we want the next register to be in the same class.
+    MachineInstrBuilder NewMI;
+    bool Pair = false;
+    if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
+      Pair = true;
+      unsigned StLow = 0, StHigh = 0;
+      if (isPrologue) {
+        // Most of these registers will be live-in to the MBB and killed by our
+        // store, though there are exceptions (see determinePrologueDeath).
+        StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
+        StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+      } else {
+        StLow = RegState::Define;
+        StHigh = RegState::Define;
+      }
+
+      NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
+                .addReg(CSI[i+1].getReg(), StLow)
+                .addReg(CSI[i].getReg(), StHigh);
+
+      // If it's a paired op, we've consumed two registers
+      ++i;
+    } else {
+      unsigned State;
+      if (isPrologue) {
+        State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+      } else {
+        State = RegState::Define;
+      }
+
+      NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode))
+                .addReg(CSI[i].getReg(), State);
+    }
+
+    // Note that the FrameIdx refers to the second register in a pair: it will
+    // be allocated the smaller numeric address and so is the one an LDP/STP
+    // address must use.
+    int FrameIdx = CSI[i].getFrameIdx();
+    MachineMemOperand::MemOperandFlags Flags;
+    Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              Flags,
+                              Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
+                              MFI.getObjectAlignment(FrameIdx));
+
+    NewMI.addFrameIndex(FrameIdx)
+      .addImm(0)                  // address-register offset
+      .addMemOperand(MMO);
+
+    if (isPrologue)
+      NewMI.setMIFlags(MachineInstr::FrameSetup);
+
+    // For aesthetic reasons, during an epilogue we want to emit complementary
+    // operations to the prologue, but in the opposite order. So we still
+    // iterate through the CalleeSavedInfo list in order, but we put the
+    // instructions successively earlier in the MBB.
+    if (!isPrologue)
+      --MBBI;
+  }
+}
+
+bool
+AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  static LoadStoreMethod PossibleClasses[] = {
+    {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
+    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
+  };
+  unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+  emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
+                  PossibleClasses, NumClasses);
+
+  return true;
+}
+
+bool
+AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+
+  if (CSI.empty())
+    return false;
+
+  static LoadStoreMethod PossibleClasses[] = {
+    {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
+    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
+  };
+  unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+  emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
+                  PossibleClasses, NumClasses);
+
+  return true;
+}
+
+bool
+AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // This is a decision of ABI compliance. The AArch64 PCS gives various options
+  // for conformance, and even at the most stringent level more or less permits
+  // elimination for leaf functions because there's no loss of functionality
+  // (for debugging etc)..
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
+    return true;
+
+  // The following are hard-limits: incorrect code will be generated if we try
+  // to omit the frame.
+  return (RI->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool
+AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
+  return MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Of the various reasons for having a frame pointer, it's actually only
+  // variable-sized objects that prevent reservation of a call frame.
+  return !(hasFP(MF) && MFI->hasVarSizedObjects());
+}
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@ -0,0 +1,103 @@
+//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_FRAMEINFO_H
+#define LLVM_AARCH64_FRAMEINFO_H
+
+#include "AArch64Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class AArch64Subtarget;
+
+class AArch64FrameLowering : public TargetFrameLowering {
+private:
+  // In order to unify the spilling and restoring of callee-saved registers into
+  // emitFrameMemOps, we need to be able to specify which instructions to use
+  // for the relevant memory operations on each register class. An array of the
+  // following struct is populated and passed in to achieve this.
+  struct LoadStoreMethod {
+    const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
+
+    // The preferred instruction. 
+    unsigned PairOpcode; // E.g. LSPair64_STR
+
+    // Sometimes only a single register can be handled at once.
+    unsigned SingleOpcode; // E.g. LS64_STR
+  };
+protected:
+  const AArch64Subtarget &STI;
+
+public:
+  explicit AArch64FrameLowering(const AArch64Subtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  /// Decides how much stack adjustment to perform in each phase of the prologue
+  /// and epilogue.
+  void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
+                          uint64_t &Residual) const;
+
+  int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
+                                     unsigned &FrameReg, int SPAdj,
+                                     bool IsCalleeSaveOp) const;
+
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                    RegScavenger *RS) const;
+
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const;
+
+  /// If the register is X30 (i.e. LR) and the return address is used in the
+  /// function then the callee-save store doesn't actually kill the register,
+  /// otherwise it does.
+  bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
+
+  /// This function emits the loads or stores required during prologue and
+  /// epilogue as efficiently as possible.
+  ///
+  /// The operations involved in setting up and tearing down the frame are
+  /// similar enough to warrant a shared function, particularly as discrepancies
+  /// between the two would be disastrous.
+  void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MI,
+                       const std::vector<CalleeSavedInfo> &CSI,
+                       const TargetRegisterInfo *TRI,
+                       LoadStoreMethod PossibleClasses[],
+                       unsigned NumClasses) const;
+
+
+  virtual bool hasFP(const MachineFunction &MF) const;
+
+  virtual bool useFPForAddressing(const MachineFunction &MF) const;
+
+  /// On AA
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+};
+
+} // End llvm namespace
+
+#endif
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@ -0,0 +1,422 @@
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// AArch64 specific code to select AArch64 machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+  AArch64TargetMachine &TM;
+  const AArch64InstrInfo *TII;
+
+  /// Keep a pointer to the AArch64Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const AArch64Subtarget *Subtarget;
+
+public:
+  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+                               CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(tm, OptLevel), TM(tm),
+      TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())),
+      Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+  }
+
+  virtual const char *getPassName() const {
+    return "AArch64 Instruction Selection";
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+  template<unsigned MemSize>
+  bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
+    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+    if (!CN || CN->getZExtValue() % MemSize != 0
+        || CN->getZExtValue() / MemSize > 0xfff)
+      return false;
+
+    UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
+    return true;
+  }
+
+  template<unsigned RegWidth>
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
+
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                    char ConstraintCode,
+                                    std::vector<SDValue> &OutOps);
+
+  bool SelectLogicalImm(SDValue N, SDValue &Imm);
+
+  template<unsigned RegWidth>
+  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
+    return SelectTSTBOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+  SDNode *TrySelectToMoveImm(SDNode *N);
+  SDNode *SelectToLitPool(SDNode *N);
+  SDNode *SelectToFPLitPool(SDNode *N);
+
+  SDNode* Select(SDNode*);
+private:
+};
+}
+
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+                                              unsigned RegWidth) {
+  const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+  if (!CN) return false;
+
+  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+  // x-register.
+  //
+  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+  // integers.
+  bool IsExact;
+
+  // fbits is between 1 and 64 in the worst-case, which means the fmul
+  // could have 2^64 as an actual operand. Need 65 bits of precision.
+  APSInt IntVal(65, true);
+  CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+  // N.b. isPowerOf2 also checks for > 0.
+  if (!IsExact || !IntVal.isPowerOf2()) return false;
+  unsigned FBits = IntVal.logBase2();
+
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding FBits, but it must still be in range.
+  if (FBits == 0 || FBits > RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+  return true;
+}
+
+bool
+AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                                  char ConstraintCode,
+                                                  std::vector<SDValue> &OutOps) {
+  switch (ConstraintCode) {
+  default: llvm_unreachable("Unrecognised AArch64 memory constraint");
+  case 'm':
+    // FIXME: more freedom is actually permitted for 'm'. We can go
+    // hunting for a base and an offset if we want. Of course, since
+    // we don't really know how the operand is going to be used we're
+    // probably restricted to the load/store pair's simm7 as an offset
+    // range anyway.
+  case 'Q':
+    OutOps.push_back(Op);
+  }
+
+  return false;
+}
+
+bool
+AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
+  ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
+  if (!Imm || !Imm->getValueAPF().isPosZero())
+    return false;
+  
+  // Doesn't actually carry any information, but keeps TableGen quiet.
+  Dummy = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
+  uint32_t Bits;
+  uint32_t RegWidth = N.getValueType().getSizeInBits();
+
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) return false;
+
+  if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+    return false;
+
+  Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+  return true;
+}
+
+SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
+  SDNode *ResNode;
+  DebugLoc dl = Node->getDebugLoc();
+  EVT DestType = Node->getValueType(0);
+  unsigned DestWidth = DestType.getSizeInBits();
+
+  unsigned MOVOpcode;
+  EVT MOVType;
+  int UImm16, Shift;
+  uint32_t LogicalBits;
+
+  uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
+  if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
+    MOVType = DestType;
+    MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
+  } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
+    MOVType = DestType;
+    MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
+  } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
+    // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
+    // use a 32-bit instruction: "movn w0, 0xedbc".
+    MOVType = MVT::i32;
+    MOVOpcode = AArch64::MOVNwii;
+  } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
+    MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
+    uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
+
+    return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
+                              CurDAG->getRegister(ZR, DestType),
+                              CurDAG->getTargetConstant(LogicalBits, MVT::i32));
+  } else {
+    // Can't handle it in one instruction. There's scope for permitting two (or
+    // more) instructions, but that'll need more thought.
+    return NULL;
+  }
+
+  ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
+                                   CurDAG->getTargetConstant(UImm16, MVT::i32),
+                                   CurDAG->getTargetConstant(Shift, MVT::i32));
+
+  if (MOVType != DestType) {
+    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+                          MVT::i64, MVT::i32, MVT::Other,
+                          CurDAG->getTargetConstant(0, MVT::i64),
+                          SDValue(ResNode, 0),
+                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+  }
+
+  return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
+  int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
+  EVT DestType = Node->getValueType(0);
+
+  // Since we may end up loading a 64-bit constant from a 32-bit entry the
+  // constant in the pool may have a different type to the eventual node.
+  SDValue PoolEntry;
+  EVT LoadType;
+  unsigned LoadInst;
+
+  assert((DestType == MVT::i64 || DestType == MVT::i32)
+         && "Only expect integer constants at the moment");
+
+  if (DestType == MVT::i32 || UnsignedVal <= UINT32_MAX) {
+    // LDR w3, lbl
+    LoadInst = AArch64::LDRw_lit;
+    LoadType = MVT::i32;
+
+    PoolEntry = CurDAG->getTargetConstantPool(
+      ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), UnsignedVal),
+      MVT::i32);
+  } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
+    // We can use a sign-extending 32-bit load: LDRSW x3, lbl
+    LoadInst = AArch64::LDRSWx_lit;
+    LoadType = MVT::i64;
+
+    PoolEntry = CurDAG->getTargetConstantPool(
+      ConstantInt::getSigned(Type::getInt32Ty(*CurDAG->getContext()),
+                             SignedVal),
+      MVT::i32);
+  } else {
+    // Full 64-bit load needed: LDR x3, lbl
+    LoadInst = AArch64::LDRx_lit;
+    LoadType = MVT::i64;
+
+    PoolEntry = CurDAG->getTargetConstantPool(
+      ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), UnsignedVal),
+      MVT::i64);
+  }
+
+  SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl,
+                                           LoadType, MVT::Other,
+                                           PoolEntry, CurDAG->getEntryNode());
+
+  if (DestType != LoadType) {
+    // We used the implicit zero-extension of "LDR w3, lbl", tell LLVM this
+    // fact.
+    assert(DestType == MVT::i64 && LoadType == MVT::i32
+           && "Unexpected load combination");
+
+    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+                          MVT::i64, MVT::i32, MVT::Other,
+                          CurDAG->getTargetConstant(0, MVT::i64),
+                          SDValue(ResNode, 0),
+                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+  }
+
+  return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToFPLitPool(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
+  EVT DestType = Node->getValueType(0);
+
+  unsigned LoadInst;
+  switch (DestType.getSizeInBits()) {
+  case 32:
+      LoadInst = AArch64::LDRs_lit;
+      break;
+  case 64:
+      LoadInst = AArch64::LDRd_lit;
+      break;
+  case 128:
+      LoadInst = AArch64::LDRq_lit;
+      break;
+  default: llvm_unreachable("cannot select floating-point litpool");
+  }
+
+  SDValue PoolEntry = CurDAG->getTargetConstantPool(FV, DestType);
+  SDNode *ResNode = CurDAG->getMachineNode(LoadInst, dl,
+                                           DestType, MVT::Other,
+                                           PoolEntry, CurDAG->getEntryNode());
+
+  return ResNode;
+}
+
+bool
+AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
+                                       unsigned RegWidth) {
+  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) return false;
+
+  uint64_t Val = CN->getZExtValue();
+
+  if (!isPowerOf2_64(Val)) return false;
+
+  unsigned TestedBit = Log2_64(Val);
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding TestedBit, but it must still be in range.
+  if (TestedBit >= RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
+  return true;
+}
+
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+  // Dump information about the Node being selected
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+
+  if (Node->isMachineOpcode()) {
+    DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+    return NULL;
+  }
+
+  switch (Node->getOpcode()) {
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+    EVT PtrTy = TLI.getPointerTy();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
+    return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
+                                TFI, CurDAG->getTargetConstant(0, PtrTy));
+  }
+  case ISD::ConstantPool: {
+    // Constant pools are fine, just create a Target entry.
+    ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
+    const Constant *C = CN->getConstVal();
+    SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
+
+    ReplaceUses(SDValue(Node, 0), CP);
+    return NULL;
+  }
+  case ISD::Constant: {
+    SDNode *ResNode = 0;
+    if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
+      // XZR and WZR are probably even better than an actual move: most of the
+      // time they can be folded into another instruction with *no* cost.
+
+      EVT Ty = Node->getValueType(0);
+      assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
+      uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
+      ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                       Node->getDebugLoc(),
+                                       Register, Ty).getNode();
+    }
+
+    // Next best option is a move-immediate, see if we can do that.
+    if (!ResNode) {
+      ResNode = TrySelectToMoveImm(Node);
+    }
+
+    // If even that fails we fall back to a lit-pool entry at the moment. Future
+    // tuning or restrictions like non-readable code-sections may mandate a
+    // sequence of MOVZ/MOVN/MOVK instructions.
+    if (!ResNode) {
+      ResNode = SelectToLitPool(Node);
+    }
+
+    assert(ResNode && "We need *some* way to materialise a constant");
+
+    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+    return NULL;
+  }
+  case ISD::ConstantFP: {
+    if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
+      // FMOV will take care of it from TableGen
+      break;
+    }
+
+    SDNode *ResNode = SelectToFPLitPool(Node);
+    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+    return NULL;
+  }
+  default:
+    break; // Let generic code handle it
+  }
+
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << "\n");
+
+  return ResNode;
+}
+
+/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
+/// instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new AArch64DAGToDAGISel(TM, OptLevel);
+}
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@ -0,0 +1,247 @@
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
+#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+
+namespace llvm {
+namespace AArch64ISD {
+  enum NodeType {
+    // Start the numbering from where ISD NodeType finishes.
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+    // This is a conditional branch which also notes the flag needed
+    // (eq/sgt/...). A64 puts this information on the branches rather than
+    // compares as LLVM does.
+    BR_CC,
+
+    // A node to be selected to an actual call operation: either BL or BLR in
+    // the absence of tail calls.
+    Call,
+
+    // Indicates a floating-point immediate which fits into the format required
+    // by the FMOV instructions. First (and only) operand is the 8-bit encoded
+    // value of that immediate.
+    FPMOV,
+
+    // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
+    // and an LSB.
+    EXTR,
+
+    // Wraps a load from the GOT, which should always be performed with a 64-bit
+    // load instruction. This prevents the DAG combiner folding a truncate to
+    // form a smaller memory access.
+    GOTLoad,
+
+    // Performs a bitfield insert. Arguments are: the value being inserted into;
+    // the value being inserted; least significant bit changed; width of the
+    // field.
+    BFI,
+
+    // Simply a convenient node inserted during ISelLowering to represent
+    // procedure return. Will almost certainly be selected to "RET".
+    Ret,
+
+    /// Extracts a field of contiguous bits from the source and sign extends
+    /// them into a single register. Arguments are: source; immr; imms. Note
+    /// these are pre-encoded since DAG matching can't cope with combining LSB
+    /// and Width into these values itself.
+    SBFX,
+
+    /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
+    /// main difference is that it only has the values and an A64 condition,
+    /// which will be produced by a setcc instruction.
+    SELECT_CC,
+
+    /// This serves most of the functions of the LLVM SETCC instruction, for two
+    /// purposes. First, it prevents optimisations from fiddling with the
+    /// compare after we've moved the CondCode information onto the SELECT_CC or
+    /// BR_CC instructions. Second, it gives a legal instruction for the actual
+    /// comparison.
+    ///
+    /// It keeps a record of the condition flags asked for because certain
+    /// instructions are only valid for a subset of condition codes.
+    SETCC,
+
+    // Designates a node which is a tail call: both a call and a return
+    // instruction as far as selction is concerned. It should be selected to an
+    // unconditional branch. Has the usual plethora of call operands, but: 1st
+    // is callee, 2nd is stack adjustment required immediately before branch.
+    TC_RETURN,
+
+    // Designates a call used to support the TLS descriptor ABI. The call itself
+    // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
+    // var") must be attached somehow during code generation. It takes two
+    // operands: the callee and the symbol to be relocated against.
+    TLSDESCCALL,
+
+    // Leaf node which will be lowered to an appropriate MRS to obtain the
+    // thread pointer: TPIDR_EL0.
+    THREAD_POINTER,
+
+    /// Extracts a field of contiguous bits from the source and zero extends
+    /// them into a single register. Arguments are: source; immr; imms. Note
+    /// these are pre-encoded since DAG matching can't cope with combining LSB
+    /// and Width into these values itself.
+    UBFX,
+
+    // Wraps an address which the ISelLowering phase has decided should be
+    // created using the small absolute memory model: i.e. adrp/add or
+    // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
+    // get selected.
+    WrapperSmall
+  };
+}
+
+
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64TargetLowering : public TargetLowering {
+public:
+  explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+
+  const char *getTargetNodeName(unsigned Opcode) const;
+
+  CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+
+  SDValue LowerFormalArguments(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerReturn(SDValue Chain,
+                      CallingConv::ID CallConv, bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+
+  SDValue LowerCall(CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                          CallingConv::ID CallConv, bool IsVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          DebugLoc dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals) const;
+
+  void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                           DebugLoc DL, SDValue &Chain) const;
+
+
+  /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+  /// for tail call optimization. Targets which want to do tail call
+  /// optimization should implement this function.
+  bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                    CallingConv::ID CalleeCC,
+                                    bool IsVarArg,
+                                    bool IsCalleeStructRet,
+                                    bool IsCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SelectionDAG& DAG) const;
+
+  /// Finds the incoming stack arguments which overlap the given fixed stack
+  /// object and incorporates their load into the current chain. This prevents an
+  /// upcoming store from clobbering the stack argument before it's used.
+  SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+                              MachineFrameInfo *MFI, int ClobberedFI) const;
+
+  EVT getSetCCResultType(EVT VT) const;
+
+  bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+
+  bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  bool isLegalICmpImmediate(int64_t Val) const;
+  SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                         SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const;
+
+  virtual MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  MachineBasicBlock *
+  emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
+                   unsigned Size, unsigned Opcode) const;
+
+  MachineBasicBlock *
+  emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
+                         unsigned Size, unsigned CmpOp,
+                         A64CC::CondCodes Cond) const;
+  MachineBasicBlock *
+  emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
+                    unsigned Size) const;
+
+  MachineBasicBlock *
+  EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+                          RTLIB::Libcall Call) const;
+  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+  SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
+                           SelectionDAG &DAG) const;
+  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+  /// is expanded to mul + add.
+  virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+  ConstraintType getConstraintType(const std::string &Constraint) const;
+
+  ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+                                                  const char *Constraint) const;
+  void LowerAsmOperandForConstraint(SDValue Op,
+                                    std::string &Constraint,
+                                    std::vector<SDValue> &Ops,
+                                    SelectionDAG &DAG) const;
+
+  std::pair<unsigned, const TargetRegisterClass*>
+  getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+private:
+  const AArch64Subtarget *Subtarget;
+  const TargetRegisterInfo *RegInfo;
+  const InstrItineraryData *Itins;
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@ -0,0 +1,805 @@
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <algorithm>
+
+#define GET_INSTRINFO_CTOR
+#include "AArch64GenInstrInfo.inc"
+
+using namespace llvm;
+
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+    RI(*this, STI), Subtarget(STI) {}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  unsigned Opc = 0;
+  unsigned ZeroReg = 0;
+  if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
+    // E.g. ADD xDst, xsp, #0 (, lsl #0)
+    BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
+      .addReg(SrcReg)
+      .addImm(0);
+    return;
+  } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+    // E.g. ADD wDST, wsp, #0 (, lsl #0)
+    BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
+      .addReg(SrcReg)
+      .addImm(0);
+    return;
+  } else if (DestReg == AArch64::NZCV) {
+    assert(AArch64::GPR64RegClass.contains(SrcReg));
+    // E.g. MSR NZCV, xDST
+    BuildMI(MBB, I, DL, get(AArch64::MSRix))
+      .addImm(A64SysReg::NZCV)
+      .addReg(SrcReg);
+  } else if (SrcReg == AArch64::NZCV) {
+    assert(AArch64::GPR64RegClass.contains(DestReg));
+    // E.g. MRS xDST, NZCV
+    BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
+      .addImm(A64SysReg::NZCV);
+  } else if (AArch64::GPR64RegClass.contains(DestReg)) {
+    assert(AArch64::GPR64RegClass.contains(SrcReg));
+    Opc = AArch64::ORRxxx_lsl;
+    ZeroReg = AArch64::XZR;
+  } else if (AArch64::GPR32RegClass.contains(DestReg)) {
+    assert(AArch64::GPR32RegClass.contains(SrcReg));
+    Opc = AArch64::ORRwww_lsl;
+    ZeroReg = AArch64::WZR;
+  } else if (AArch64::FPR32RegClass.contains(DestReg)) {
+    assert(AArch64::FPR32RegClass.contains(SrcReg));
+    BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+      .addReg(SrcReg);
+    return;
+  } else if (AArch64::FPR64RegClass.contains(DestReg)) {
+    assert(AArch64::FPR64RegClass.contains(SrcReg));
+    BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+      .addReg(SrcReg);
+    return;
+  } else if (AArch64::FPR128RegClass.contains(DestReg)) {
+    assert(AArch64::FPR128RegClass.contains(SrcReg));
+
+    // FIXME: there's no good way to do this, at least without NEON:
+    //   + There's no single move instruction for q-registers
+    //   + We can't create a spill slot and use normal STR/LDR because stack
+    //     allocation has already happened
+    //   + We can't go via X-registers with FMOV because register allocation has
+    //     already happened.
+    // This may not be efficient, but at least it works.
+    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+      .addReg(SrcReg)
+      .addReg(AArch64::XSP)
+      .addImm(0x1ff & -16);
+
+    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+      .addReg(AArch64::XSP, RegState::Define)
+      .addReg(AArch64::XSP)
+      .addImm(16);
+    return;
+  } else {
+    llvm_unreachable("Unknown register class in copyPhysReg");
+  }
+
+  // E.g. ORR xDst, xzr, xSrc, lsl #0
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(ZeroReg)
+    .addReg(SrcReg)
+    .addImm(0);
+}
+
+MachineInstr *
+AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                           uint64_t Offset, const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0)
+    .addImm(Offset)
+    .addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// Does the Opcode represent a conditional branch that we can remove and re-add
+/// at the end of a basic block?
+static bool isCondBranch(unsigned Opc) {
+  return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
+         Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
+         Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
+         Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
+}
+
+/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
+/// setting TBB to the destination basic block and populating the Cond vector
+/// with data necessary to recreate the conditional branch at a later
+/// date. First element will be the opcode, and subsequent ones define the
+/// conditions being branched on in an instruction-specific manner.
+static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
+                               SmallVectorImpl<MachineOperand> &Cond) {
+  switch(I->getOpcode()) {
+  case AArch64::Bcc:
+  case AArch64::CBZw:
+  case AArch64::CBZx:
+  case AArch64::CBNZw:
+  case AArch64::CBNZx:
+    // These instructions just have one predicate operand in position 0 (either
+    // a condition code or a register being compared).
+    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+    Cond.push_back(I->getOperand(0));
+    TBB = I->getOperand(1).getMBB();
+    return;
+  case AArch64::TBZwii:
+  case AArch64::TBZxii:
+  case AArch64::TBNZwii:
+  case AArch64::TBNZxii:
+    // These have two predicate operands: a register and a bit position.
+    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+    Cond.push_back(I->getOperand(0));
+    Cond.push_back(I->getOperand(1));
+    TBB = I->getOperand(2).getMBB();
+    return;
+  default:
+    llvm_unreachable("Unknown conditional branch to classify");
+  }
+}
+
+
+bool
+AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastOpc == AArch64::Bimm) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranch(LastOpc)) {
+      classifyCondBranch(LastInst, TBB, Cond);
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && LastOpc == AArch64::Bimm) {
+    while (SecondLastOpc == AArch64::Bimm) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (LastOpc == AArch64::Bimm) {
+    if (SecondLastOpc == AArch64::Bcc) {
+      TBB =  SecondLastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
+      Cond.push_back(SecondLastInst->getOperand(0));
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (isCondBranch(SecondLastOpc)) {
+      classifyCondBranch(SecondLastInst, TBB, Cond);
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+bool AArch64InstrInfo::ReverseBranchCondition(
+                                  SmallVectorImpl<MachineOperand> &Cond) const {
+  switch (Cond[0].getImm()) {
+  case AArch64::Bcc: {
+    A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
+    CC = A64InvertCondCode(CC);
+    Cond[1].setImm(CC);
+    return false;
+  }
+  case AArch64::CBZw:
+    Cond[0].setImm(AArch64::CBNZw);
+    return false;
+  case AArch64::CBZx:
+    Cond[0].setImm(AArch64::CBNZx);
+    return false;
+  case AArch64::CBNZw:
+    Cond[0].setImm(AArch64::CBZw);
+    return false;
+  case AArch64::CBNZx:
+    Cond[0].setImm(AArch64::CBZx);
+    return false;
+  case AArch64::TBZwii:
+    Cond[0].setImm(AArch64::TBNZwii);
+    return false;
+  case AArch64::TBZxii:
+    Cond[0].setImm(AArch64::TBNZxii);
+    return false;
+  case AArch64::TBNZwii:
+    Cond[0].setImm(AArch64::TBZwii);
+    return false;
+  case AArch64::TBNZxii:
+    Cond[0].setImm(AArch64::TBZxii);
+    return false;
+  default:
+    llvm_unreachable("Unknown branch type");
+  }
+}
+
+
+unsigned
+AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  if (FBB == 0 && Cond.empty()) {
+    BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
+    return 1;
+  } else if (FBB == 0) {
+    MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+    for (int i = 1, e = Cond.size(); i != e; ++i)
+      MIB.addOperand(Cond[i]);
+    MIB.addMBB(TBB);
+    return 1;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+  for (int i = 1, e = Cond.size(); i != e; ++i)
+    MIB.addOperand(Cond[i]);
+  MIB.addMBB(TBB);
+
+  BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
+  return 2;
+}
+
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!isCondBranch(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  case AArch64::TLSDESC_BLRx: {
+    MachineInstr *NewMI =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
+        .addOperand(MI.getOperand(1));
+    MI.setDesc(get(AArch64::BLRx));
+
+    llvm::finalizeBundle(MBB, NewMI, *++MBBI);
+    return true;
+    }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+void
+AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned SrcReg, bool isKill,
+                                      int FrameIdx,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+  MachineMemOperand *MMO
+    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FrameIdx),
+                              Align);
+
+  unsigned StoreOp = 0;
+  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+    switch(RC->getSize()) {
+    case 4: StoreOp = AArch64::LS32_STR; break;
+    case 8: StoreOp = AArch64::LS64_STR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  } else {
+    assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+            RC->hasType(MVT::f128))
+           && "Expected integer or floating type for store");
+    switch (RC->getSize()) {
+    case 4: StoreOp = AArch64::LSFP32_STR; break;
+    case 8: StoreOp = AArch64::LSFP64_STR; break;
+    case 16: StoreOp = AArch64::LSFP128_STR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  }
+
+  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+  NewMI.addReg(SrcReg, getKillRegState(isKill))
+    .addFrameIndex(FrameIdx)
+    .addImm(0)
+    .addMemOperand(MMO);
+
+}
+
+void
+AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       unsigned DestReg, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+  MachineMemOperand *MMO
+    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FrameIdx),
+                              Align);
+
+  unsigned LoadOp = 0;
+  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+    switch(RC->getSize()) {
+    case 4: LoadOp = AArch64::LS32_LDR; break;
+    case 8: LoadOp = AArch64::LS64_LDR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  } else {
+    assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
+            || RC->hasType(MVT::f128))
+           && "Expected integer or floating type for store");
+    switch (RC->getSize()) {
+    case 4: LoadOp = AArch64::LSFP32_LDR; break;
+    case 8: LoadOp = AArch64::LSFP64_LDR; break;
+    case 16: LoadOp = AArch64::LSFP128_LDR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  }
+
+  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+  NewMI.addFrameIndex(FrameIdx)
+       .addImm(0)
+       .addMemOperand(MMO);
+}
+
+unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
+  unsigned Limit = (1 << 16) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
+        // is the largest offset guaranteed to fit in the immediate offset.
+        if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
+          Limit = std::min(Limit, 0xfffu);
+          break;
+        }
+
+        int AccessScale, MinOffset, MaxOffset;
+        getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
+        Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
+                                             int &AccessScale, int &MinOffset,
+                                             int &MaxOffset) const {
+  switch (MI.getOpcode()) {
+  default: llvm_unreachable("Unkown load/store kind");
+  case TargetOpcode::DBG_VALUE:
+    AccessScale = 1;
+    MinOffset = INT_MIN;
+    MaxOffset = INT_MAX;
+    return;
+  case AArch64::LS8_LDR: case AArch64::LS8_STR:
+  case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
+  case AArch64::LDRSBw:
+  case AArch64::LDRSBx:
+    AccessScale = 1;
+    MinOffset = 0;
+    MaxOffset = 0xfff;
+    return;
+  case AArch64::LS16_LDR: case AArch64::LS16_STR:
+  case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
+  case AArch64::LDRSHw:
+  case AArch64::LDRSHx:
+    AccessScale = 2;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LS32_LDR:  case AArch64::LS32_STR:
+  case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
+  case AArch64::LDRSWx:
+  case AArch64::LDPSWx:
+    AccessScale = 4;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LS64_LDR: case AArch64::LS64_STR:
+  case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
+  case AArch64::PRFM:
+    AccessScale = 8;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
+    AccessScale = 16;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
+  case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
+    AccessScale = 4;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
+  case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
+    AccessScale = 8;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
+    AccessScale = 16;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  }
+}
+
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  const MCInstrDesc &MCID = MI.getDesc();
+  const MachineBasicBlock &MBB = *MI.getParent();
+  const MachineFunction &MF = *MBB.getParent();
+  const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+
+  if (MCID.getSize())
+    return MCID.getSize();
+
+  if (MI.getOpcode() == AArch64::INLINEASM)
+    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+
+  if (MI.isLabel())
+    return 0;
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::BUNDLE:
+    return getInstBundleLength(MI);
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::DBG_VALUE:
+    return 0;
+  case AArch64::CONSTPOOL_ENTRY:
+    return MI.getOperand(2).getImm();
+  case AArch64::TLSDESCCALL:
+    return 0;
+  default:
+    llvm_unreachable("Unknown instruction class");
+  }
+}
+
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+  unsigned Size = 0;
+  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+  while (++I != E && I->isInsideBundle()) {
+    assert(!I->isBundle() && "No nested bundle!");
+    Size += getInstSizeInBytes(*I);
+  }
+  return Size;
+}
+
+bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                unsigned FrameReg, int &Offset,
+                                const AArch64InstrInfo &TII) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MFI.getObjectOffset(FrameRegIdx);
+  llvm_unreachable("Unimplemented rewriteFrameIndex");
+}
+
+void llvm::emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                         DebugLoc dl, const TargetInstrInfo &TII,
+                         unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+                         int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
+  if (NumBytes == 0 && DstReg == SrcReg)
+    return;
+  else if (abs(NumBytes) & ~0xffffff) {
+    // Generically, we have to materialize the offset into a temporary register
+    // and subtract it. There are a couple of ways this could be done, for now
+    // we'll go for a literal-pool load.
+    MachineFunction &MF = *MBB.getParent();
+    MachineConstantPool *MCP = MF.getConstantPool();
+    const Constant *C
+      = ConstantInt::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+                         abs(NumBytes));
+    unsigned CPI = MCP->getConstantPoolIndex(C, 8);
+
+    // LDR xTMP, .LITPOOL
+    BuildMI(MBB, MBBI, dl, TII.get(AArch64::LDRx_lit), ScratchReg)
+      .addConstantPoolIndex(CPI)
+      .setMIFlag(MIFlags);
+
+    // ADD DST, SRC, xTMP (, lsl #0)
+    unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
+    BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill)
+      .addImm(0)
+      .setMIFlag(MIFlags);
+    return;
+  }
+
+  // Now we know that the adjustment can be done in at most two add/sub
+  // (immediate) instructions, which is always more efficient than a
+  // literal-pool load, or even a hypothetical movz/movk/add sequence
+
+  // Decide whether we're doing addition or subtraction
+  unsigned LowOp, HighOp;
+  if (NumBytes >= 0) {
+    LowOp = AArch64::ADDxxi_lsl0_s;
+    HighOp = AArch64::ADDxxi_lsl12_s;
+  } else {
+    LowOp = AArch64::SUBxxi_lsl0_s;
+    HighOp = AArch64::SUBxxi_lsl12_s;
+    NumBytes = abs(NumBytes);
+  }
+
+  // If we're here, at the very least a move needs to be produced, which just
+  // happens to be materializable by an ADD.
+  if ((NumBytes & 0xfff) || NumBytes == 0) {
+    BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addImm(NumBytes & 0xfff)
+      .setMIFlag(MIFlags);
+
+    // Next update should use the register we've just defined.
+    SrcReg = DstReg;
+  }
+
+  if (NumBytes & 0xfff000) {
+    BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addImm(NumBytes >> 12)
+      .setMIFlag(MIFlags);
+  }
+}
+
+void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                        DebugLoc dl, const TargetInstrInfo &TII,
+                        unsigned ScratchReg, int64_t NumBytes,
+                        MachineInstr::MIFlag MIFlags) {
+  emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
+                NumBytes, MIFlags);
+}
+
+
+namespace {
+  struct LDTLSCleanup : public MachineFunctionPass {
+    static char ID;
+    LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      AArch64MachineFunctionInfo* MFI = MF.getInfo<AArch64MachineFunctionInfo>();
+      if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+        // No point folding accesses if there isn't at least two.
+        return false;
+      }
+
+      MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+      return VisitNode(DT->getRootNode(), 0);
+    }
+
+    // Visit the dominator subtree rooted at Node in pre-order.
+    // If TLSBaseAddrReg is non-null, then use that to replace any
+    // TLS_base_addr instructions. Otherwise, create the register
+    // when the first such instruction is seen, and then use it
+    // as we encounter more instructions.
+    bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+      MachineBasicBlock *BB = Node->getBlock();
+      bool Changed = false;
+
+      // Traverse the current block.
+      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+           ++I) {
+        switch (I->getOpcode()) {
+        case AArch64::TLSDESC_BLRx:
+          // Make sure it's a local dynamic access.
+          if (!I->getOperand(1).isSymbol() ||
+              strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+            break;
+
+          if (TLSBaseAddrReg)
+            I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+          else
+            I = SetRegister(I, &TLSBaseAddrReg);
+          Changed = true;
+          break;
+        default:
+          break;
+        }
+      }
+
+      // Visit the children of this block in the dominator tree.
+      for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+           I != E; ++I) {
+        Changed |= VisitNode(*I, TLSBaseAddrReg);
+      }
+
+      return Changed;
+    }
+
+    // Replace the TLS_base_addr instruction I with a copy from
+    // TLSBaseAddrReg, returning the new instruction.
+    MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+                                         unsigned TLSBaseAddrReg) {
+      MachineFunction *MF = I->getParent()->getParent();
+      const AArch64TargetMachine *TM =
+          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+      const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+      // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
+      // code sequence assumes the address will be.
+      MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY),
+                                   AArch64::X0)
+        .addReg(TLSBaseAddrReg);
+
+      // Erase the TLS_base_addr instruction.
+      I->eraseFromParent();
+
+      return Copy;
+    }
+
+    // Create a virtal register in *TLSBaseAddrReg, and populate it by
+    // inserting a copy instruction after I. Returns the new instruction.
+    MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+      MachineFunction *MF = I->getParent()->getParent();
+      const AArch64TargetMachine *TM =
+          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+      const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+      // Create a virtual register for the TLS base address.
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+
+      // Insert a copy from X0 to TLSBaseAddrReg for later.
+      MachineInstr *Next = I->getNextNode();
+      MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY),
+                                   *TLSBaseAddrReg)
+        .addReg(AArch64::X0);
+
+      return Copy;
+    }
+
+    virtual const char *getPassName() const {
+      return "Local Dynamic TLS Access Clean-up";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@ -0,0 +1,110 @@
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
+#define LLVM_TARGET_AARCH64INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AArch64RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AArch64GenInstrInfo.inc"
+
+namespace llvm {
+
+class AArch64Subtarget;
+
+class AArch64InstrInfo : public AArch64GenInstrInfo {
+  const AArch64RegisterInfo RI;
+  const AArch64Subtarget &Subtarget;
+public:
+  explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  const AArch64Subtarget &getSubTarget() const { return Subtarget; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                         uint64_t Offset, const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIdx,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify = false) const;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond,
+                        DebugLoc DL) const;
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+  /// Look through the instructions in this function and work out the largest
+  /// the stack frame can be while maintaining the ability to address local
+  /// slots with no complexities.
+  unsigned estimateRSStackLimit(MachineFunction &MF) const;
+
+  /// getAddressConstraints - For loads and stores (and PRFMs) taking an
+  /// immediate offset, this function determines the constraints required for
+  /// the immediate. It must satisfy:
+  ///    + MinOffset <= imm <= MaxOffset
+  ///    + imm % OffsetScale == 0
+  void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
+                             int &MinOffset, int &MaxOffset) const;
+
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+
+  unsigned getInstBundleLength(const MachineInstr &MI) const;
+};
+
+bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                          unsigned FrameReg, int &Offset,
+                          const AArch64InstrInfo &TII);
+
+
+void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   DebugLoc dl, const TargetInstrInfo &TII,
+                   unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+                   int64_t NumBytes,
+                   MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                  DebugLoc dl, const TargetInstrInfo &TII,
+                  unsigned ScratchReg, int64_t NumBytes,
+                  MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+}
+
+#endif
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@ -0,0 +1,140 @@
+//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64AsmPrinter.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MCOperand
+AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
+                                      const MCSymbol *Sym) const {
+  const MCExpr *Expr = 0;
+
+  Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+
+  switch (MO.getTargetFlags()) {
+  case AArch64II::MO_GOT:
+    Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOT_LO12:
+    Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_LO12:
+    Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_DTPREL_G1:
+    Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
+    break;
+  case AArch64II::MO_DTPREL_G0_NC:
+    Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOTTPREL:
+    Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOTTPREL_LO12:
+    Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_TLSDESC:
+    Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
+    break;
+  case AArch64II::MO_TLSDESC_LO12:
+    Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_TPREL_G1:
+    Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
+    break;
+  case AArch64II::MO_TPREL_G0_NC:
+    Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_NO_FLAG:
+    // Expr is already correct
+    break;
+  default:
+    llvm_unreachable("Unexpected MachineOperand flag");
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(),
+                                                          OutContext),
+                                   OutContext);
+
+  return MCOperand::CreateExpr(Expr);
+}
+
+bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
+                                     MCOperand &MCOp) const {
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown operand type");
+  case MachineOperand::MO_Register:
+    if (MO.isImplicit())
+      return false;
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    MCOp = MCOperand::CreateReg(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::CreateImm(MO.getImm());
+    break;
+  case MachineOperand::MO_BlockAddress:
+    MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                   MO.getMBB()->getSymbol(), OutContext));
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_RegisterMask:
+    // Ignore call clobbers
+    return false;
+
+  }
+
+  return true;
+}
+
+void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
+                                            MCInst &OutMI,
+                                            AArch64AsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    if (AP.lowerOperand(MO, MCOp))
+      OutMI.addOperand(MCOp);
+  }
+}
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@ -0,0 +1,14 @@
+//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void AArch64MachineFunctionInfo::anchor() { }
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@ -0,0 +1,158 @@
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AArch64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64MACHINEFUNCTIONINFO_H
+#define AARCH64MACHINEFUNCTIONINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private AArch64
+/// target-specific information for each MachineFunction.
+class AArch64MachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
+  /// Number of bytes of arguments this function has on the stack. If the callee
+  /// is expected to restore the argument stack this should be a multiple of 16,
+  /// all usable during a tail call.
+  ///
+  /// The alternative would forbid tail call optimisation in some cases: if we
+  /// want to transfer control from a function with 8-bytes of stack-argument
+  /// space to a function with 16-bytes then misalignment of this value would
+  /// make a stack adjustment necessary, which could not be undone by the
+  /// callee.
+  unsigned BytesInStackArgArea;
+
+  /// The number of bytes to restore to deallocate space for incoming
+  /// arguments. Canonically 0 in the C calling convention, but non-zero when
+  /// callee is expected to pop the args.
+  unsigned ArgumentStackToRestore;
+
+  /// If the stack needs to be adjusted on frame entry in two stages, this
+  /// records the size of the first adjustment just prior to storing
+  /// callee-saved registers. The callee-saved slots are addressed assuming
+  /// SP == <incoming-SP> - InitialStackAdjust.
+  unsigned InitialStackAdjust;
+
+  /// Number of local-dynamic TLS accesses.
+  unsigned NumLocalDynamics;
+
+  /// Keep track of the next label to be created within this function to
+  /// represent a cloned constant pool entry. Used by constant islands pass.
+  unsigned PICLabelUId;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of the area where LowerFormalArguments puts the
+  /// general-purpose registers that might contain variadic parameters.
+  int VariadicGPRIdx;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The size of the frame object used to store the general-purpose registers
+  /// which might contain variadic arguments. This is the offset from
+  /// VariadicGPRIdx to what's stored in __gr_top.
+  unsigned VariadicGPRSize;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of the area where LowerFormalArguments puts the
+  /// floating-point registers that might contain variadic parameters.
+  int VariadicFPRIdx;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The size of the frame object used to store the floating-point registers
+  /// which might contain variadic arguments. This is the offset from
+  /// VariadicFPRIdx to what's stored in __vr_top.
+  unsigned VariadicFPRSize;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of an object pointing just past the last known stacked
+  /// argument on entry to a variadic function. This goes into the __stack field
+  /// of the va_list type.
+  int VariadicStackIdx;
+
+  /// The offset of the frame pointer from the stack pointer on function
+  /// entry. This is expected to be negative.
+  int FramePointerOffset;
+
+public:
+  AArch64MachineFunctionInfo()
+    : BytesInStackArgArea(0),
+      ArgumentStackToRestore(0),
+      InitialStackAdjust(0),
+      NumLocalDynamics(0),
+      PICLabelUId(0),
+      VariadicGPRIdx(0),
+      VariadicGPRSize(0),
+      VariadicFPRIdx(0),
+      VariadicFPRSize(0),
+      VariadicStackIdx(0),
+      FramePointerOffset(0) {}
+
+  explicit AArch64MachineFunctionInfo(MachineFunction &MF)
+    : BytesInStackArgArea(0),
+      ArgumentStackToRestore(0),
+      InitialStackAdjust(0),
+      NumLocalDynamics(0),
+      PICLabelUId(0),
+      VariadicGPRIdx(0),
+      VariadicGPRSize(0),
+      VariadicFPRIdx(0),
+      VariadicFPRSize(0),
+      VariadicStackIdx(0),
+      FramePointerOffset(0) {}
+
+  unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
+  void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+
+  unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
+  void setArgumentStackToRestore(unsigned bytes) { ArgumentStackToRestore = bytes; }
+
+  unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
+  void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+
+  unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+  void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+  void initPICLabelUId(unsigned UId) { PICLabelUId = UId; }
+  unsigned getNumPICLabels() const { return PICLabelUId; }
+  unsigned createPICLabelUId() { return PICLabelUId++; }
+
+  int getVariadicGPRIdx() const { return VariadicGPRIdx; }
+  void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+
+  unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
+  void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+
+  int getVariadicFPRIdx() const { return VariadicFPRIdx; }
+  void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+
+  unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
+  void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+
+  int getVariadicStackIdx() const { return VariadicStackIdx; }
+  void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+
+  int getFramePointerOffset() const { return FramePointerOffset; }
+  void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+
+};
+
+} // End llvm namespace
+
+#endif
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@ -0,0 +1,211 @@
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii,
+                                         const AArch64Subtarget &sti)
+  : AArch64GenRegisterInfo(AArch64::X30), TII(tii) {
+}
+
+const uint16_t *
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  return CSR_PCS_SaveList;
+}
+
+const uint32_t*
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+  return CSR_PCS_RegMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
+  return TLSDesc_RegMask;
+}
+
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &AArch64::FlagClassRegClass)
+    return &AArch64::GPR64RegClass;
+
+  return RC;
+}
+
+
+
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  Reserved.set(AArch64::XSP);
+  Reserved.set(AArch64::WSP);
+
+  Reserved.set(AArch64::XZR);
+  Reserved.set(AArch64::WZR);
+
+  if (TFI->hasFP(MF)) {
+    Reserved.set(AArch64::X29);
+    Reserved.set(AArch64::W29);
+  }
+
+  return Reserved;
+}
+
+void
+AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
+                                         int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64FrameLowering *TFI =
+    static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have a FrameIndex Operand");
+  }
+
+  // In order to work out the base and offset for addressing, the FrameLowering
+  // code needs to know (sometimes) whether the instruction is storing/loading a
+  // callee-saved register, or whether it's a more generic
+  // operation. Fortunately the frame indices are used *only* for that purpose
+  // and are contiguous, so we can check here.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+
+  unsigned FrameReg;
+  int64_t Offset;
+  Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
+                                           IsCalleeSaveOp);
+
+  Offset += MI.getOperand(i+1).getImm();
+
+  // DBG_VALUE instructions have no real restrictions so they can be handled
+  // easily.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).ChangeToRegister(FrameReg, /*isDef=*/ false);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  int MinOffset, MaxOffset, OffsetScale;
+  if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
+    MinOffset = 0;
+    MaxOffset = 0xfff;
+    OffsetScale = 1;
+  } else {
+    // Load/store of a stack object
+    TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
+  }
+
+  // The frame lowering has told us a base and offset it thinks we should use to
+  // access this variable, but it's still up to us to make sure the values are
+  // legal for the instruction in question.
+  if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
+    unsigned BaseReg =
+      MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+    emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
+                  BaseReg, FrameReg, BaseReg, Offset);
+    FrameReg = BaseReg;
+    Offset = 0;
+  }
+
+  // Negative offsets are expected if we address from FP, but for
+  // now this checks nothing has gone horribly wrong.
+  assert(Offset >= 0 && "Unexpected negative offset from SP");
+
+  MI.getOperand(i).ChangeToRegister(FrameReg, false, false, true);
+  MI.getOperand(i+1).ChangeToImmediate(Offset / OffsetScale);
+}
+
+void
+AArch64RegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                         MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc dl = MI->getDebugLoc();
+  int Opcode = MI->getOpcode();
+  bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+  uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    unsigned Align = TFI->getStackAlignment();
+
+    uint64_t Amount = MI->getOperand(0).getImm();
+    Amount = (Amount + Align - 1)/Align * Align;
+    if (!IsDestroy) Amount = -Amount;
+
+    // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+    // doesn't have to pop anything), then the first operand will be zero too so
+    // this adjustment is a no-op.
+    if (CalleePopAmount == 0) {
+      // FIXME: in-function stack adjustment for calls is limited to 12-bits
+      // because there's no guaranteed temporary register available. Mostly call
+      // frames will be allocated at the start of a function so this is OK, but
+      // it is a limitation that needs dealing with.
+      assert(abs(Amount) < 0xfff && "call frame too large");
+      emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+    }
+  } else if (CalleePopAmount != 0) {
+    // If the calling convention demands that the callee pops arguments from the
+    // stack, we want to add it back if we have a reserved call frame.
+    assert(CalleePopAmount < 0xfff && "call frame too large");
+    emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
+  }
+
+  MBB.erase(MI);
+}
+
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
+    return AArch64::X29;
+  else
+    return AArch64::XSP;
+}
+
+bool
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const AArch64FrameLowering *AFI = static_cast<const AArch64FrameLowering*>(TFI);
+  return AFI->useFPForAddressing(MF);
+}
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@ -0,0 +1,79 @@
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
+#define LLVM_TARGET_AARCH64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AArch64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class AArch64InstrInfo;
+class AArch64Subtarget;
+
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
+private:
+  const AArch64InstrInfo &TII;
+
+public:
+  AArch64RegisterInfo(const AArch64InstrInfo &tii,
+                      const AArch64Subtarget &sti);
+
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+  const uint32_t *getTLSDescCallPreservedMask() const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                           RegScavenger *Rs = NULL) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
+
+  /// getCrossCopyRegClass - Returns a legal register class to copy a register
+  /// in the specified class to or from. Returns original class if it is
+  /// possible to copy between a two registers of the specified class.
+  const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+  /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+  /// legal to use in the current sub-target and has the same spill size.
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+    if (RC == &AArch64::tcGPR64RegClass)
+      return &AArch64::GPR64RegClass;
+
+    return RC;
+  }
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@ -0,0 +1,205 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AArch64" in {
+def sub_128 : SubRegIndex;
+def sub_64 : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_16 : SubRegIndex;
+def sub_8  : SubRegIndex;
+
+// The VPR registers are handled as sub-registers of FPR equivalents, but
+// they're really the same thing. We give this concept a special index.
+def sub_alias : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+class AArch64Reg<bits<16> enc, string n> : Register<n> {
+  let HWEncoding = enc;
+  let Namespace = "AArch64";
+}
+
+class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
+                         list<SubRegIndex> inds = []>
+      : AArch64Reg<enc, n> {
+  let SubRegs = subregs;
+  let SubRegIndices = inds;
+}
+
+//===----------------------------------------------------------------------===//
+//  Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-30 in {
+  def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+}
+
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">;
+
+// Could be combined with previous loop, but this way leaves w and x registers
+// consecutive as LLVM register numbers, which makes for easier debugging.
+foreach Index = 0-30 in {
+  def X#Index : AArch64RegWithSubs<Index, "x"#Index,
+                                   [!cast<Register>("W"#Index)], [sub_32]>,
+                DwarfRegNum<[Index]>;
+}
+
+def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
+def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+
+// Most instructions treat register 31 as zero for reads and a black-hole for
+// writes.
+
+// Note that the order of registers is important for the Disassembler here:
+// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
+// take an encoding value.
+def GPR32 : RegisterClass<"AArch64", [i32], 32,
+                          (add (sequence "W%u", 0, 30), WZR)> {
+}
+
+def GPR64 : RegisterClass<"AArch64", [i64], 64,
+                          (add (sequence "X%u", 0, 30), XZR)> {
+}
+
+def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
+                               (sequence "W%u", 0, 30)> {
+}
+
+def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
+                               (sequence "X%u", 0, 30)> {
+}
+
+// For tail calls, we can't use callee-saved registers or the structure-return
+// register, as they are supposed to be live across function calls and may be
+// clobbered by the epilogue.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
+                            (add (sequence "X%u", 0, 7),
+                                 (sequence "X%u", 9, 18))> {
+}
+
+
+// Certain addressing-useful instructions accept sp directly. Again the order of
+// registers is important to the Disassembler.
+def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
+                             (add (sequence "W%u", 0, 30), WSP)> {
+}
+
+def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
+                             (add (sequence "X%u", 0, 30), XSP)> {
+}
+
+// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
+// non-SP variants). We can't use a bare register in those patterns because
+// TableGen doesn't like it, so we need a class containing just stack registers
+def Rxsp : RegisterClass<"AArch64", [i64], 64,
+                         (add XSP)> {
+}
+
+def Rwsp : RegisterClass<"AArch64", [i32], 32,
+                         (add WSP)> {
+}
+
+//===----------------------------------------------------------------------===//
+//  Scalar registers in the vector unit:
+//  b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-31 in {
+  def B # Index : AArch64Reg< Index, "b" # Index>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def H # Index : AArch64RegWithSubs<Index, "h" # Index,
+                                     [!cast<Register>("B" # Index)], [sub_8]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def S # Index : AArch64RegWithSubs<Index, "s" # Index,
+                                     [!cast<Register>("H" # Index)], [sub_16]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def D # Index : AArch64RegWithSubs<Index, "d" # Index,
+                                     [!cast<Register>("S" # Index)], [sub_32]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
+                                     [!cast<Register>("D" # Index)], [sub_64]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+}
+
+
+def FPR8 : RegisterClass<"AArch64", [i8], 8,
+                          (sequence "B%u", 0, 31)> {
+}
+
+def FPR16 : RegisterClass<"AArch64", [f16], 16,
+                          (sequence "H%u", 0, 31)> {
+}
+
+def FPR32 : RegisterClass<"AArch64", [f32], 32,
+                          (sequence "S%u", 0, 31)> {
+}
+
+def FPR64 : RegisterClass<"AArch64", [f64], 64,
+                          (sequence "D%u", 0, 31)> {
+}
+
+def FPR128 : RegisterClass<"AArch64", [f128], 128,
+                          (sequence "Q%u", 0, 31)> {
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Vector registers:
+//===----------------------------------------------------------------------===//
+
+// NEON registers simply specify the overall vector, and it's expected that
+// Instructions will individually specify the acceptable data layout. In
+// principle this leaves two approaches open:
+//   + An operand, giving a single ADDvvv instruction (for example). This turns
+//     out to be unworkable in the assembly parser (without every Instruction
+//     having a "cvt" function, at least) because the constraints can't be
+//     properly enforced. It also complicates specifying patterns since each
+//     instruction will accept many types.
+//  + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
+//    details about NEON registers, but simplifies most other details.
+//
+// The second approach was taken.
+
+foreach Index = 0-31 in {
+  def V # Index  : AArch64RegWithSubs<Index, "v" # Index,
+                                      [!cast<Register>("Q" # Index)],
+                                      [sub_alias]>,
+            DwarfRegNum<[!add(Index, 64)]>;
+}
+
+// These two classes contain the same registers, which should be reasonably
+// sensible for MC and allocation purposes, but allows them to be treated
+// separately for things like stack spilling.
+def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
+                          (sequence "V%u", 0, 31)>;
+
+def VPR128 : RegisterClass<"AArch64",
+                           [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
+                           (sequence "V%u", 0, 31)>;
+
+// Flags register
+def NZCV : Register<"nzcv">
+{
+  let Namespace = "AArch64";
+}
+
+def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)>
+{
+  let CopyCost = -1;
+  let isAllocatable = 0;
+}
--- a/lib/Target/AArch64/AArch64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@ -0,0 +1,10 @@
+//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@ -0,0 +1,25 @@
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM),
+    Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+}
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@ -0,0 +1,32 @@
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
+#define LLVM_AARCH64SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AArch64TargetMachine;
+
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+  const AArch64Subtarget *Subtarget;
+public:
+  explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+  ~AArch64SelectionDAGInfo();
+};
+
+}
+
+#endif
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@ -0,0 +1,43 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
+  : AArch64GenSubtargetInfo(TT, CPU, FS)
+  , HasNEON(true)
+  , HasCrypto(true)
+  , TargetTriple(TT) {
+
+  ParseSubtargetFeatures(CPU, FS);
+}
+
+bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                          Reloc::Model RelocM) const {
+  if (RelocM == Reloc::Static)
+    return false;
+
+  return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+}
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@ -0,0 +1,54 @@
+//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
+#define LLVM_TARGET_AARCH64_SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AArch64GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+class GlobalValue;
+
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
+protected:
+  bool HasNEON;
+  bool HasCrypto;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+  bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_AARCH64_SUBTARGET_H
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@ -0,0 +1,78 @@
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAArch64Target() {
+  RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target);
+}
+
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    InstrInfo(Subtarget),
+    DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget) {
+}
+
+namespace {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
+public:
+  AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  AArch64TargetMachine &getAArch64TargetMachine() const {
+    return getTM<AArch64TargetMachine>();
+  }
+
+  const AArch64Subtarget &getAArch64Subtarget() const {
+    return *getAArch64TargetMachine().getSubtargetImpl();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new AArch64PassConfig(this, PM);
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+  addPass(&UnpackMachineBundlesID);
+  addPass(createAArch64ConstantIslandPass());
+  return true;
+}
+
+bool AArch64PassConfig::addInstSelector() {
+  addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+
+  // For ELF, cleanup any local-dynamic TLS accesses.
+  if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+    addPass(createAArch64CleanupLocalDynamicTLSPass());
+
+  return false;
+}
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@ -0,0 +1,69 @@
+//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETMACHINE_H
+#define LLVM_AARCH64TARGETMACHINE_H
+
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+  AArch64Subtarget          Subtarget;
+  AArch64InstrInfo          InstrInfo;
+  const DataLayout          DL;
+  AArch64TargetLowering     TLInfo;
+  AArch64SelectionDAGInfo   TSInfo;
+  AArch64FrameLowering      FrameLowering;
+
+public:
+  AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+
+  const AArch64InstrInfo *getInstrInfo() const {
+    return &InstrInfo;
+  }
+
+  const AArch64FrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  const AArch64TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  const DataLayout *getDataLayout() const { return &DL; }
+
+  const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+}
+
+#endif
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@ -0,0 +1,19 @@
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetObjectFile.h"
+
+using namespace llvm;
+
+void
+AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@ -0,0 +1,27 @@
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+  /// AArch64LinuxTargetObjectFile - This implementation is used for linux
+  /// AArch64.
+  class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  };
+
+} // end namespace llvm
+
+#endif
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
--- a/lib/Target/AArch64/AsmParser/CMakeLists.txt
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmParser
+  AArch64AsmParser.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen)
--- a/lib/Target/AArch64/AsmParser/LLVMBuild.txt
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmParser
+parent = AArch64
+required_libraries = AArch64Desc AArch64Info MC MCParser Support
+add_to_library_groups = AArch64
+
--- a/lib/Target/AArch64/AsmParser/Makefile
+++ b/lib/Target/AArch64/AsmParser/Makefile
@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmParser
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@ -0,0 +1,35 @@
+set(LLVM_TARGET_DEFINITIONS AArch64.td)
+
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(AArch64CommonTableGen)
+
+add_llvm_target(AArch64CodeGen
+  AArch64AsmPrinter.cpp
+  AArch64ConstantIslandPass.cpp
+  AArch64FrameLowering.cpp
+  AArch64ISelDAGToDAG.cpp
+  AArch64ISelLowering.cpp
+  AArch64InstrInfo.cpp
+  AArch64MachineFunctionInfo.cpp
+  AArch64MCInstLower.cpp
+  AArch64RegisterInfo.cpp
+  AArch64SelectionDAGInfo.cpp
+  AArch64Subtarget.cpp
+  AArch64TargetMachine.cpp
+  AArch64TargetObjectFile.cpp
+  )
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@ -0,0 +1,791 @@
+//===- AArch64Disassembler.cpp - Disassembler for AArch64/Thumb ISA -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// AArch64 disassembler for all AArch64 platforms.
+class AArch64Disassembler : public MCDisassembler {
+  const MCRegisterInfo *RegInfo;
+public:
+  /// Initializes the disassembler.
+  ///
+  AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
+    : MCDisassembler(STI), RegInfo(Info) {
+  }
+
+  ~AArch64Disassembler() {
+  }
+
+  /// See MCDisassembler.
+  DecodeStatus getInstruction(MCInst &instr,
+                              uint64_t &size,
+                              const MemoryObject &region,
+                              uint64_t address,
+                              raw_ostream &vStream,
+                              raw_ostream &cStream) const;
+
+  const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+
+}
+
+// Forward-declarations used in the auto-generated files.
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                              uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                              uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+                                               unsigned OptionHiS,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+                                             unsigned FullImm,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+                                            unsigned Bits,
+                                            uint64_t Address,
+                                            const void *Decoder);
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+                                           unsigned ShiftAmount,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+                                            unsigned ShiftAmount,
+                                            uint64_t Address,
+                                            const void *Decoder);
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+                                                       unsigned Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder);
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+                                          unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
+                                        llvm::MCInst &Inst,
+                                        unsigned Val,
+                                        uint64_t Address,
+                                        const void *Decoder);
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+                                                   unsigned Val,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+
+
+static bool Check(DecodeStatus &Out, DecodeStatus In);
+
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+  switch (In) {
+    case MCDisassembler::Success:
+      // Out stays the same.
+      return true;
+    case MCDisassembler::SoftFail:
+      Out = In;
+      return true;
+    case MCDisassembler::Fail:
+      Out = In;
+      return false;
+  }
+  llvm_unreachable("Invalid DecodeStatus!");
+}
+
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                 const MemoryObject &Region,
+                                                 uint64_t Address,
+                                                 raw_ostream &os,
+                                                 raw_ostream &cs) const {
+  CommentStream = &cs;
+
+  uint8_t bytes[4];
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  // Encoded as a small-endian 32-bit word in the stream.
+  uint32_t insn = (bytes[3] << 24) |
+    (bytes[2] << 16) |
+    (bytes[1] <<  8) |
+    (bytes[0] <<  0);
+
+  // Calling the auto-generated decoder function.
+  DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
+                                          this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    return result;
+  }
+
+  MI.clear();
+  Size = 0;
+  return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+  const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
+  return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+}
+
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                        uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                         uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                         uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+                                               unsigned OptionHiS,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
+  // S}. Hence we want to check bit 1.
+  if (!(OptionHiS & 2))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(OptionHiS));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
+  // between 0 and 31.
+  if (Imm6Bits > 31)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
+  if (Imm6Bits < 32)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+  return MCDisassembler::Success;
+}
+
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+                                             unsigned FullImm,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  unsigned Imm16 = FullImm & 0xffff;
+  unsigned Shift = FullImm >> 16;
+
+  if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm16));
+  Inst.addOperand(MCOperand::CreateImm(Shift));
+  return MCDisassembler::Success;
+}
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+                                            unsigned Bits,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  uint64_t Imm;
+  if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Bits));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+                                           unsigned ShiftAmount,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  // Only values 0-4 are valid for this 3-bit field
+  if (ShiftAmount > 4)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+                                            unsigned ShiftAmount,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  // Only values below 32 are valid for a 32-bit register
+  if (ShiftAmount > 31)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
+  unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
+  unsigned SF = fieldFromInstruction(Insn, 31, 1);
+
+  // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
+  // out assertions that it thinks should never be hit.
+  enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
+  Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
+
+  if (!SF) {
+    // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
+    if (ImmR > 31 || ImmS > 31)
+      return MCDisassembler::Fail;
+  }
+
+  if (SF) {
+    DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    // BFM MCInsts use Rd as a source too.
+    if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+  } else {
+    DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+    // BFM MCInsts use Rd as a source too.
+    if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // ASR and LSR have more specific patterns so they won't get here:
+  assert(!(ImmS == 31 && !SF && Opc != BFM) && "shift should have used auto decode");
+  assert(!(ImmS == 63 && SF && Opc != BFM) && "shift should have used auto decode");
+
+  // Extension instructions similarly:
+  if (Opc == SBFM && ImmR == 0) {
+    assert((ImmS != 7 && ImmS != 15) && "extension got here");
+    assert((ImmS != 31 || SF == 0) && "extension got here");
+  } else if (Opc == UBFM && ImmR == 0) {
+    assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
+  }
+
+  if (Opc == UBFM) {
+    // It might be a LSL instruction, which actually takes the shift amount
+    // itself as an MCInst operand.
+    if (SF && (ImmS + 1) % 64 == ImmR) {
+      Inst.setOpcode(AArch64::LSLxxi);
+      Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
+      return MCDisassembler::Success;
+    } else if (!SF && (ImmS + 1) % 32 == ImmR) {
+      Inst.setOpcode(AArch64::LSLwwi);
+      Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
+      return MCDisassembler::Success;
+    }
+  }
+
+  // Otherwise it's definitely either an extract or an insert depending on which
+  // of ImmR or ImmS is larger.
+  unsigned ExtractOp, InsertOp;
+  switch (Opc) {
+  default: llvm_unreachable("unexpected instruction trying to decode bitfield");
+  case SBFM:
+    ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
+    InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
+    break;
+  case BFM:
+    ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
+    InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
+    break;
+  case UBFM:
+    ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
+    InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
+    break;
+  }
+
+  // Otherwise it's a boring insert or extract
+  Inst.addOperand(MCOperand::CreateImm(ImmR));
+  Inst.addOperand(MCOperand::CreateImm(ImmS));
+
+
+  if (ImmS < ImmR)
+    Inst.setOpcode(InsertOp);
+  else
+    Inst.setOpcode(ExtractOp);
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  // This decoder exists to add the dummy Lane operand to the MCInst, which must
+  // be 1 in assembly but has no other real manifestation.
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
+
+  if (IsToVec) {
+    DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+  } else {
+    DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // Add the lane
+  Inst.addOperand(MCOperand::CreateImm(1));
+
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  DecodeStatus Result = MCDisassembler::Success;
+  unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
+  unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
+  unsigned L = fieldFromInstruction(Insn, 22, 1);
+  unsigned V = fieldFromInstruction(Insn, 26, 1);
+  unsigned Opc = fieldFromInstruction(Insn, 30, 2);
+
+  // Not an official name, but it turns out that bit 23 distinguishes indexed
+  // from non-indexed operations.
+  unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
+
+  if (Indexed && L == 0) {
+    // The MCInst for an indexed store has an out operand and 4 ins:
+    //    Rn_wb, Rt, Rt2, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // You shouldn't load to the same register twice in an instruction...
+  if (L && Rt == Rt2)
+    Result = MCDisassembler::SoftFail;
+
+  // ... or do any operation that writes-back to a transfer register. But note
+  // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+  if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+    Result = MCDisassembler::SoftFail;
+
+  // Exactly how we decode the MCInst's registers depends on the Opc and V
+  // fields of the instruction. These also obviously determine the size of the
+  // operation so we can fill in that information while we're at it.
+  if (V) {
+    // The instruction operates on the FP/SIMD registers
+    switch (Opc) {
+    default: return MCDisassembler::Fail;
+    case 0:
+      DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 1:
+      DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 2:
+      DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    }
+  } else {
+    switch (Opc) {
+    default: return MCDisassembler::Fail;
+    case 0:
+      DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 1:
+      assert(L && "unexpected \"store signed\" attempt");
+      DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 2:
+      DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    }
+  }
+
+  if (Indexed && L == 1) {
+    // The MCInst for an indexed load has 3 out operands and an 3 ins:
+    //    Rt, Rt2, Rn_wb, Rt2, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+
+  DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(SImm7));
+
+  return Result;
+}
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+                                                       uint32_t Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(Val, 0, 5);
+  unsigned Rn = fieldFromInstruction(Val, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
+  unsigned MemSize = fieldFromInstruction(Val, 30, 2);
+
+  DecodeStatus S = MCDisassembler::Success;
+  if (Rt == Rt2) S = MCDisassembler::SoftFail;
+
+  switch (MemSize) {
+    case 2:
+      if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
+        return MCDisassembler::Fail;
+      if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
+        return MCDisassembler::Fail;
+      break;
+    case 3:
+      if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
+        return MCDisassembler::Fail;
+      if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
+        return MCDisassembler::Fail;
+      break;
+    default:
+      llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
+  }
+
+  if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+                                          unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  SomeNamedImmMapper Mapper;
+  bool ValidNamed;
+  Mapper.toString(Val, ValidNamed);
+  if (ValidNamed || Mapper.validImm(Val)) {
+    Inst.addOperand(MCOperand::CreateImm(Val));
+    return MCDisassembler::Success;
+  }
+
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                                        llvm::MCInst &Inst,
+                                        unsigned Val,
+                                        uint64_t Address,
+                                        const void *Decoder) {
+  bool ValidNamed;
+  Mapper.toString(Val, ValidNamed);
+
+  Inst.addOperand(MCOperand::CreateImm(Val));
+
+  return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
+                             Decoder);
+}
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
+                             Decoder);
+}
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+                                                   unsigned Insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
+
+  unsigned Opc = fieldFromInstruction(Insn, 22, 2);
+  unsigned V = fieldFromInstruction(Insn, 26, 1);
+  unsigned Size = fieldFromInstruction(Insn, 30, 2);
+
+  if (Opc == 0 || (V == 1 && Opc == 2)) {
+    // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  if (V == 0 && (Opc == 2 || Size == 3)) {
+    DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+  } else if (V == 0) {
+    DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+  } else if (V == 1 && (Opc & 2)) {
+    DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+  } else {
+    switch (Size) {
+    case 0:
+      DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 1:
+      DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 2:
+      DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 3:
+      DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    }
+  }
+
+  if (Opc != 0 && (V != 1 || Opc != 2)) {
+    // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(Imm9));
+
+  // N.b. The official documentation says undpredictable if Rt == Rn, but this
+  // takes place at the architectural rather than encoding level:
+  //
+  // "STR xzr, [sp], #4" is perfectly valid.
+  if (V == 0 && Rt == Rn && Rn != 31)
+    return MCDisassembler::SoftFail;
+  else
+    return MCDisassembler::Success;
+}
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+                                                 const MCSubtargetInfo &STI) {
+  return new AArch64Disassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeAArch64Disassembler() {
+  TargetRegistry::RegisterMCDisassembler(TheAArch64Target,
+                                         createAArch64Disassembler);
+}
+
+
--- a/lib/Target/AArch64/Disassembler/CMakeLists.txt
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Disassembler
+  AArch64Disassembler.cpp
+  )
+
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
--- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Disassembler
+parent = AArch64
+required_libraries = AArch64CodeGen AArch64Desc AArch64Info MC Support
+add_to_library_groups = AArch64
+
--- a/lib/Target/AArch64/Disassembler/Makefile
+++ b/lib/Target/AArch64/Disassembler/Makefile
@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Disassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@ -0,0 +1,408 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+
+static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
+  assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
+  if (Value & (1ULL <<  (BitWidth - 1)))
+    return static_cast<int64_t>(Value) - (1LL << BitWidth);
+  else
+    return Value;
+}
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+                                       const MCInstrInfo &MII,
+                                       const MCRegisterInfo &MRI,
+                                       const MCSubtargetInfo &STI) :
+  MCInstPrinter(MAI, MII, MRI) {
+  // Initialize the set of available features.
+  setAvailableFeatures(STI.getFeatureBits());
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void
+AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
+                                              unsigned OpNum, raw_ostream &O) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+  int32_t Imm = unpackSignedImm(9, MOImm.getImm());
+
+  O << '#' << Imm;
+}
+
+void
+AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O, unsigned MemSize,
+                                          unsigned RmSize) {
+  unsigned ExtImm = MI->getOperand(OpNum).getImm();
+  unsigned OptionHi = ExtImm >> 1;
+  unsigned S = ExtImm & 1;
+  bool IsLSL = OptionHi == 1 && RmSize == 64;
+
+  const char *Ext;
+  switch (OptionHi) {
+  case 1:
+    Ext = (RmSize == 32) ? "uxtw" : "lsl";
+    break;
+  case 3:
+    Ext = (RmSize == 32) ? "sxtw" : "sxtx";
+    break;
+  default:
+    llvm_unreachable("Incorrect Option on load/store (reg offset)");
+  }
+  O << Ext;
+
+  if (S) {
+    unsigned ShiftAmt = Log2_32(MemSize);
+    O << " #" << ShiftAmt;
+  } else if (IsLSL) {
+    O << " #0";
+  }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
+                                              unsigned OpNum, raw_ostream &O) {
+  const MCOperand &Imm12Op = MI->getOperand(OpNum);
+
+  if (Imm12Op.isImm()) {
+    int64_t Imm12 = Imm12Op.getImm();
+    assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
+    O << "#" << Imm12;
+  } else {
+    assert(Imm12Op.isExpr() && "Unexpected shift operand type");
+    O << "#" << *Imm12Op.getExpr();
+  }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+
+  printAddSubImmLSL0Operand(MI, OpNum, O);
+
+  O << ", lsl #12";
+}
+
+void
+AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << MO.getImm();
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &ImmROp = MI->getOperand(OpNum);
+  unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+
+  O << '#' << LSB;
+}
+
+void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  const MCOperand &ImmSOp = MI->getOperand(OpNum);
+  unsigned Width = ImmSOp.getImm() + 1;
+
+  O << '#' << Width;
+}
+
+void
+AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &ImmSOp = MI->getOperand(OpNum);
+  const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+
+  unsigned ImmR = ImmROp.getImm();
+  unsigned ImmS = ImmSOp.getImm();
+
+  assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+
+  O << '#' << (ImmS - ImmR + 1);
+}
+
+void
+AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O) {
+    const MCOperand &CRx = MI->getOperand(OpNum);
+
+    O << 'c' << CRx.getImm();
+}
+
+
+void
+AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+    const MCOperand &ScaleOp = MI->getOperand(OpNum);
+
+    O << '#' << (64 - ScaleOp.getImm());
+}
+
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &o) {
+  const MCOperand &MOImm8 = MI->getOperand(OpNum);
+
+  assert(MOImm8.isImm()
+         && "Immediate operand required for floating-point immediate inst");
+
+  uint32_t Imm8 = MOImm8.getImm();
+  uint32_t Fraction = Imm8 & 0xf;
+  uint32_t Exponent = (Imm8 >> 4) & 0x7;
+  uint32_t Negative = (Imm8 >> 7) & 0x1;
+
+  float Val = 1.0f + Fraction / 16.0f;
+
+  // That is:
+  // 000 -> 2^1,  001 -> 2^2,  010 -> 2^3,  011 -> 2^4,
+  // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
+  if (Exponent & 0x4) {
+    Val /= 1 << (7 - Exponent);
+  } else {
+    Val *= 1 << (Exponent + 1);
+  }
+
+  Val = Negative ? -Val : Val;
+
+  o << '#' << format("%.8f", Val);
+}
+
+void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &o) {
+  o << "#0.0";
+}
+
+void
+AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+}
+
+template <unsigned field_width, unsigned scale> void
+AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  if (!MO.isImm()) {
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
+  // is multiplied by 4 (because all A64 instructions are 32-bits wide).
+  uint64_t UImm = MO.getImm();
+  uint64_t Sign = UImm & (1LL << (field_width - 1));
+  int64_t SImm = scale * ((UImm & ~Sign) - Sign);
+
+  O << "#" << SImm;
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint64_t Val;
+  A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+  O << "#0x";
+  O.write_hex(Val);
+}
+
+void
+AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O, int MemSize) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+
+  if (MOImm.isImm()) {
+    uint32_t Imm = MOImm.getImm() * MemSize;
+
+    O << "#" << Imm;
+  } else {
+    O << "#" << *MOImm.getExpr();
+  }
+}
+
+void
+AArch64InstPrinter::printShiftOperand(const MCInst *MI,  unsigned OpNum,
+                                      raw_ostream &O,
+                                      A64SE::ShiftExtSpecifiers Shift) {
+    const MCOperand &MO = MI->getOperand(OpNum);
+
+    // LSL #0 is not printed
+    if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
+        return;
+
+    switch (Shift) {
+    case A64SE::LSL: O << "lsl"; break;
+    case A64SE::LSR: O << "lsr"; break;
+    case A64SE::ASR: O << "asr"; break;
+    case A64SE::ROR: O << "ror"; break;
+    default: llvm_unreachable("Invalid shift specifier in logical instruction");
+    }
+
+  O << " #" << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI,  unsigned OpNum,
+                                            raw_ostream &O) {
+  const MCOperand &UImm16MO = MI->getOperand(OpNum);
+  const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+
+  if (UImm16MO.isImm()) {
+    O << '#' << UImm16MO.getImm();
+
+    if (ShiftMO.getImm() != 0)
+      O << ", lsl #" << (ShiftMO.getImm() * 16);
+
+    return;
+  }
+
+  O << "#" << *UImm16MO.getExpr();
+}
+
+void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
+                                              const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  bool ValidName;
+  const MCOperand &MO = MI->getOperand(OpNum);
+  StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+
+  if (ValidName)
+    O << Name;
+  else
+    O << '#' << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                                       const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  bool ValidName;
+  std::string Name = Mapper.toString(MO.getImm(), ValidName);
+  if (ValidName) {
+    O << Name;
+    return;
+  }
+}
+
+
+void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
+                                               unsigned OpNum,
+                                               raw_ostream &O,
+                                               A64SE::ShiftExtSpecifiers Ext) {
+  // FIXME: In principle TableGen should be able to detect this itself far more
+  // easily. We will only accumulate more of these hacks.
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+
+  if (isStackReg(Reg0) || isStackReg(Reg1)) {
+    A64SE::ShiftExtSpecifiers LSLEquiv;
+
+    if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
+      LSLEquiv = A64SE::UXTX;
+    else
+      LSLEquiv = A64SE::UXTW;
+
+    if (Ext == LSLEquiv) {
+      O << "lsl #" << MI->getOperand(OpNum).getImm();
+      return;
+    }
+  }
+
+  switch (Ext) {
+  case A64SE::UXTB: O << "uxtb"; break;
+  case A64SE::UXTH: O << "uxth"; break;
+  case A64SE::UXTW: O << "uxtw"; break;
+  case A64SE::UXTX: O << "uxtx"; break;
+  case A64SE::SXTB: O << "sxtb"; break;
+  case A64SE::SXTH: O << "sxth"; break;
+  case A64SE::SXTW: O << "sxtw"; break;
+  case A64SE::SXTX: O << "sxtx"; break;
+  default: llvm_unreachable("Unexpected shift type for printing");
+  }
+
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getImm() != 0)
+    O << " #" << MO.getImm();
+}
+
+template<int MemScale> void
+AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+  int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+
+  O << "#" << (Imm * MemScale);
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    // If a symbolic branch target was added as a constant expression then print
+    // that address in hex.
+    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+    int64_t Address;
+    if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+      O << "0x";
+      O.write_hex(Address);
+    }
+    else {
+      // Otherwise, just print the expression.
+      O << *Op.getExpr();
+    }
+  }
+}
+
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  if (MI->getOpcode() == AArch64::TLSDESCCALL) {
+    // This is a special assembler directive which applies an
+    // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
+    // form outside the normal TableGenerated scheme.
+    O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
+  } else if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+
+  printAnnotation(O, Annot);
+}
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@ -0,0 +1,171 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64INSTPRINTER_H
+#define LLVM_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+  AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+  // Autogenerated by tblgen
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printRegName(raw_ostream &O, unsigned RegNum) const;
+
+  template<unsigned MemSize, unsigned RmSize>
+  void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O) {
+    printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+  }
+
+
+  void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O, unsigned MemSize,
+                                 unsigned RmSize);
+
+  void printAddSubImmLSL0Operand(const MCInst *MI,
+                                 unsigned OpNum, raw_ostream &O);
+  void printAddSubImmLSL12Operand(const MCInst *MI,
+                                  unsigned OpNum, raw_ostream &O);
+
+  void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<unsigned RegWidth>
+  void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+
+  void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O);
+
+  void printCRxOperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O);
+
+  void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+  void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+  template<int MemScale>
+  void printOffsetUImm12Operand(const MCInst *MI,
+                                  unsigned OpNum, raw_ostream &o) {
+    printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+  }
+
+  void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &o, int MemScale);
+
+  template<unsigned field_width, unsigned scale>
+  void printLabelOperand(const MCInst *MI, unsigned OpNum,
+                         raw_ostream &O);
+
+  template<unsigned RegWidth>
+  void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<typename SomeNamedImmMapper>
+  void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O) {
+    printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+  }
+
+  void printNamedImmOperand(const NamedImmMapper &Mapper,
+                            const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O);
+
+  void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                          const MCInst *MI, unsigned OpNum,
+                          raw_ostream &O);
+
+  void printMRSOperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O) {
+    printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+  }
+
+  void printMSROperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O) {
+    printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+  }
+
+  void printShiftOperand(const char *name, const MCInst *MI,
+                         unsigned OpIdx, raw_ostream &O);  
+
+  void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("lsr", MI, OpNum, O);
+  }
+  void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("asr", MI, OpNum, O);
+  }
+  void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("ror", MI, OpNum, O);
+  }
+
+  template<A64SE::ShiftExtSpecifiers Shift>
+  void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand(MI, OpNum, O, Shift);
+  }
+
+  void printShiftOperand(const MCInst *MI, unsigned OpNum,
+                         raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+
+
+  void printMoveWideImmOperand(const  MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  template<int MemSize> void
+  printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<A64SE::ShiftExtSpecifiers EXT>
+  void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                             raw_ostream &O) {
+    printRegExtendOperand(MI, OpNum, O, EXT);
+  }
+
+  void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                             raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+  bool isStackReg(unsigned RegNo) {
+    return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+  }
+
+
+};
+
+}
+
+#endif
--- a/lib/Target/AArch64/InstPrinter/CMakeLists.txt
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmPrinter
+  AArch64InstPrinter.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
+
--- a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmPrinter
+parent = AArch64
+required_libraries = MC Support
+add_to_library_groups = AArch64
+
--- a/lib/Target/AArch64/InstPrinter/Makefile
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmPrinter
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- a/lib/Target/AArch64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@ -0,0 +1,36 @@
+;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = AArch64
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+;has_jit = 1
+
+[component_1]
+type = Library
+name = AArch64CodeGen
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = AArch64
+
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@ -0,0 +1,580 @@
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+  const MCSubtargetInfo* STI;
+public:
+  AArch64AsmBackend(const Target &T, const StringRef TT)
+    : MCAsmBackend(),
+      STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
+    {}
+
+
+  ~AArch64AsmBackend() {
+    delete STI;
+  }
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  virtual void processFixupValue(const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFixup &Fixup, const MCFragment *DF,
+                                 MCValue &Target, uint64_t &Value,
+                                 bool &IsResolved);
+};
+} // end anonymous namespace
+
+void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+                                          const MCAsmLayout &Layout,
+                                          const MCFixup &Fixup,
+                                          const MCFragment *DF,
+                                          MCValue &Target, uint64_t &Value,
+                                          bool &IsResolved) {
+  // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+  // ~0xfff. This means that the required offset to reach a symbol can vary by
+  // up to one step depending on where the ADRP is in memory. For example:
+  //
+  //     ADRP x0, there
+  //  there:
+  //
+  // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+  // we'll need that as an offset. At any other address "there" will be in the
+  // same page as the ADRP and the instruction should encode 0x0. Assuming the
+  // section isn't 0x1000-aligned, we therefore need to delegate this decision
+  // to the linker -- a relocation!
+  if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
+    IsResolved = false;
+}
+
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+  uint8_t OSABI;
+  ELFAArch64AsmBackend(const Target &T, const StringRef TT,
+                       uint8_t _OSABI)
+    : AArch64AsmBackend(T, TT), OSABI(_OSABI) { }
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
+  unsigned int getNumFixupKinds() const {
+    return AArch64::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// AArch64FixupKinds.h.
+//
+// Name                         Offset (bits)    Size (bits)      Flags
+      { "fixup_a64_ld_prel",               0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_adr_prel",              0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_adr_prel_page",         0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_add_lo12",              0,    32,             0 },
+      { "fixup_a64_ldst8_lo12",            0,    32,             0 },
+      { "fixup_a64_ldst16_lo12",           0,    32,             0 },
+      { "fixup_a64_ldst32_lo12",           0,    32,             0 },
+      { "fixup_a64_ldst64_lo12",           0,    32,             0 },
+      { "fixup_a64_ldst128_lo12",          0,    32,             0 },
+      { "fixup_a64_tstbr",                 0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_condbr",                0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_uncondbr",              0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_call",                  0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_movw_uabs_g0",          0,    32,             0 },
+      { "fixup_a64_movw_uabs_g0_nc",       0,    32,             0 },
+      { "fixup_a64_movw_uabs_g1",          0,    32,             0 },
+      { "fixup_a64_movw_uabs_g1_nc",       0,    32,             0 },
+      { "fixup_a64_movw_uabs_g2",          0,    32,             0 },
+      { "fixup_a64_movw_uabs_g2_nc",       0,    32,             0 },
+      { "fixup_a64_movw_uabs_g3",          0,    32,             0 },
+      { "fixup_a64_movw_sabs_g0",          0,    32,             0 },
+      { "fixup_a64_movw_sabs_g1",          0,    32,             0 },
+      { "fixup_a64_movw_sabs_g2",          0,    32,             0 },
+      { "fixup_a64_adr_prel_got_page",     0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_ld64_got_lo12_nc",      0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g2",        0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g1",        0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g1_nc",     0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g0",        0,    32,             0 },
+      { "fixup_a64_movw_dtprel_g0_nc",     0,    32,             0 },
+      { "fixup_a64_add_dtprel_hi12",       0,    32,             0 },
+      { "fixup_a64_add_dtprel_lo12",       0,    32,             0 },
+      { "fixup_a64_add_dtprel_lo12_nc",    0,    32,             0 },
+      { "fixup_a64_ldst8_dtprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst8_dtprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_ldst16_dtprel_lo12",    0,    32,             0 },
+      { "fixup_a64_ldst16_dtprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_ldst32_dtprel_lo12",    0,    32,             0 },
+      { "fixup_a64_ldst32_dtprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_ldst64_dtprel_lo12",    0,    32,             0 },
+      { "fixup_a64_ldst64_dtprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_movw_gottprel_g1",      0,    32,             0 },
+      { "fixup_a64_movw_gottprel_g0_nc",   0,    32,             0 },
+      { "fixup_a64_adr_gottprel_page",     0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_ld64_gottprel_lo12_nc", 0,    32,             0 },
+      { "fixup_a64_ld_gottprel_prel19",    0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_movw_tprel_g2",         0,    32,             0 },
+      { "fixup_a64_movw_tprel_g1",         0,    32,             0 },
+      { "fixup_a64_movw_tprel_g1_nc",      0,    32,             0 },
+      { "fixup_a64_movw_tprel_g0",         0,    32,             0 },
+      { "fixup_a64_movw_tprel_g0_nc",      0,    32,             0 },
+      { "fixup_a64_add_tprel_hi12",        0,    32,             0 },
+      { "fixup_a64_add_tprel_lo12",        0,    32,             0 },
+      { "fixup_a64_add_tprel_lo12_nc",     0,    32,             0 },
+      { "fixup_a64_ldst8_tprel_lo12",      0,    32,             0 },
+      { "fixup_a64_ldst8_tprel_lo12_nc",   0,    32,             0 },
+      { "fixup_a64_ldst16_tprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst16_tprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_ldst32_tprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst32_tprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_ldst64_tprel_lo12",     0,    32,             0 },
+      { "fixup_a64_ldst64_tprel_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_tlsdesc_adr_page",      0,    32, MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_a64_tlsdesc_ld64_lo12_nc",  0,    32,             0 },
+      { "fixup_a64_tlsdesc_add_lo12_nc",   0,    32,             0 },
+      { "fixup_a64_tlsdesc_call",          0,     0,             0 }
+    };
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
+    Value = adjustFixupValue(Fixup.getKind(), Value);
+    if (!Value) return;           // Doesn't change encoding.
+
+    unsigned Offset = Fixup.getOffset();
+    assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+    // For each byte of the fragment that the fixup touches, mask in the bits
+    // from the fixup value.
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    }
+  }
+
+  bool mayNeedRelaxation(const MCInst&) const {
+    return false;
+  }
+
+  void relaxInstruction(const MCInst&, llvm::MCInst&) const {
+    llvm_unreachable("Cannot relax instructions");
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createAArch64ELFObjectWriter(OS, OSABI);
+  }
+};
+
+} // end anonymous namespace
+
+bool
+ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                           uint64_t Value,
+                                           const MCRelaxableFragment *DF,
+                                           const MCAsmLayout &Layout) const {
+  // Correct for now. With all instructions 32-bit only very low-level
+  // considerations could make you select something which may fail.
+  return false;
+}
+
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  // Can't emit NOP with size not multiple of 32-bits
+  if (Count % 4 != 0)
+    return false;
+
+  uint64_t NumNops = Count / 4;
+  for (uint64_t i = 0; i != NumNops; ++i)
+    OW->Write32(0xd503201f);
+
+  return true;
+}
+
+static unsigned ADRImmBits(unsigned Value) {
+  unsigned lo2 = Value & 0x3;
+  unsigned hi19 = (Value & 0x1fffff) >> 2;
+
+  return (hi19 << 5) | (lo2 << 29);
+}
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_2:
+    assert((int64_t)Value >= -32768 &&
+           (int64_t)Value <= 65536 &&
+           "Out of range ABS16 fixup");
+    return Value;
+  case FK_Data_4:
+    assert((int64_t)Value >= -(1LL << 31) &&
+           (int64_t)Value <= (1LL << 32) - 1 &&
+           "Out of range ABS32 fixup");
+    return Value;
+  case FK_Data_8:
+    return Value;
+
+  case AArch64::fixup_a64_ld_gottprel_prel19:
+    // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
+    // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
+  case AArch64::fixup_a64_ld_prel:
+    // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
+    // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
+    return (Value & 0x1ffffc) << 3;
+
+  case AArch64::fixup_a64_adr_prel:
+    // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
+    // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
+    return ADRImmBits(Value & 0x1fffff);
+
+  case AArch64::fixup_a64_adr_prel_page:
+    // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
+    // F000 of the result of the operation, checking that -2^32 <= result <
+    // 2^32.
+    assert((int64_t)Value >= -(1LL << 32) &&
+           (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+    return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+  case AArch64::fixup_a64_add_dtprel_hi12:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+    // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
+  case AArch64::fixup_a64_add_tprel_hi12:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+    // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
+    assert((int64_t)Value >= 0 &&
+           (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
+    return (Value & 0xfff000) >> 2;
+
+  case AArch64::fixup_a64_add_dtprel_lo12:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+    // FFF of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_add_tprel_lo12:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+    // FFF of TPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t)Value >= 0 &&
+           (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_add_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
+    // FFF of DTPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_add_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
+    // FFF of TPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+    // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
+    // FFF of G(TLSDESC(S+A)), with no overflow check.
+  case AArch64::fixup_a64_add_lo12:
+    // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
+    // S+A, with no overflow check.
+    return (Value & 0xfff) << 10;
+
+  case AArch64::fixup_a64_ldst8_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst8_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst8_lo12:
+    // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
+    // of S+A, with no overflow check.
+    return (Value & 0xfff) << 10;
+
+  case AArch64::fixup_a64_ldst16_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst16_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst16_lo12:
+    // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
+    // of S+A, with no overflow check.
+    return (Value & 0xffe) << 9;
+
+  case AArch64::fixup_a64_ldst32_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst32_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst32_lo12:
+    // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
+    // of S+A, with no overflow check.
+    return (Value & 0xffc) << 8;
+
+  case AArch64::fixup_a64_ldst64_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst64_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst64_lo12:
+    // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
+    // of S+A, with no overflow check.
+    return (Value & 0xff8) << 7;
+
+  case AArch64::fixup_a64_ldst128_lo12:
+    // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
+    // of S+A, with no overflow check.
+    return (Value & 0xff0) << 6;
+
+  case AArch64::fixup_a64_movw_uabs_g0:
+    // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
+    // with a check that S+A < 2^16
+    assert(Value <= 0xffff && "Out of range move wide fixup");
+    return (Value & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g0_nc:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of DTPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_movw_gottprel_g0_nc:
+    // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
+  case AArch64::fixup_a64_movw_tprel_g0_nc:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of TPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_movw_uabs_g0_nc:
+    // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
+    // S+A with no overflow check.
+    return (Value & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g1:
+    // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
+    // S+A with a check that S+A < 2^32
+    assert(Value <= 0xffffffffull && "Out of range move wide fixup");
+    return ((Value >> 16) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g1_nc:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
+    // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_movw_tprel_g1_nc:
+    // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
+    // to bits FFFF0000 of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_movw_uabs_g1_nc:
+    // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
+    // FFFF0000 of S+A with no overflow check.
+    return ((Value >> 16) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g2:
+    // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
+    // 0000 of S+A with a check that S+A < 2^48
+    assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
+    return ((Value >> 32) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g2_nc:
+    // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
+    // 0000 of S+A with no overflow check.
+    return ((Value >> 32) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g3:
+    // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
+    // 0000 0000 of S+A (no overflow check needed)
+    return ((Value >> 48) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g0:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
+    // to bits FFFF of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_tprel_g0:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
+    // bits FFFF of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g0: {
+    // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
+    // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
+    // should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = (Value & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      // MCCodeEmitter should have encoded a MOVN, which is fine.
+      Value = (~Value & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_movw_dtprel_g1:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
+    // to bits FFFF0000 of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_gottprel_g1:
+    // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
+    // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
+  case AArch64::fixup_a64_movw_tprel_g1:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
+    // bits FFFF0000 of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g1: {
+    // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
+    // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
+    // should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = ((Value >> 16) & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      Value = ((~Value >> 16) & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_movw_dtprel_g2:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
+    // to bits FFFF 0000 0000 of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_tprel_g2:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
+    // bits FFFF 0000 0000 of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g2: {
+    // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
+    // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
+    // we should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = ((Value >> 32) & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      Value = ((~Value >> 32) & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_tstbr:
+    // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
+    // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
+    assert((int64_t)Value >= -(1LL << 15) &&
+           (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
+    return (Value & 0xfffc) << (5 - 2);
+
+  case AArch64::fixup_a64_condbr:
+    // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
+    // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
+    return (Value & 0x1ffffc) << (5 - 2);
+
+  case AArch64::fixup_a64_uncondbr:
+    // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
+  case AArch64::fixup_a64_call:
+    // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
+    // checking that -2^27 <= S+A-P < 2^27.
+    assert((int64_t)Value >= -(1LL << 27) &&
+           (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
+    return (Value & 0xffffffc) >> 2;
+
+  case AArch64::fixup_a64_adr_gottprel_page:
+    // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
+    // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
+  case AArch64::fixup_a64_tlsdesc_adr_page:
+    // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
+    // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
+  case AArch64::fixup_a64_adr_prel_got_page:
+    // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
+    // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
+    // 2^32.
+    assert((int64_t)Value >= -(1LL << 32) &&
+           (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+    return ADRImmBits((Value & 0x1fffff000) >> 12);
+
+  case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+    // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
+    // of X, with no overflow check. Check that X & 7 == 0.
+  case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+    // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
+    // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
+  case AArch64::fixup_a64_ld64_got_lo12_nc:
+    // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
+    // G(S) with no overflow check. Check X & 7 == 0
+    assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
+    return (Value & 0xff8) << 7;
+
+  case AArch64::fixup_a64_tlsdesc_call:
+    // R_AARCH64_TLSDESC_CALL: For relaxation only.
+    return 0;
+  }
+}
+
+MCAsmBackend *
+llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+  Triple TheTriple(TT);
+
+  return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
+}
--- a/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64BaseInfo.h
@ -0,0 +1,779 @@
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the AArch64 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_BASEINFO_H
+#define LLVM_AARCH64_BASEINFO_H
+
+#include "AArch64MCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// // Enums corresponding to AArch64 condition codes
+namespace A64CC {
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
+  enum CondCodes {   // Meaning (integer)          Meaning (floating-point)
+    EQ = 0,        // Equal                      Equal
+    NE,            // Not equal                  Not equal, or unordered
+    HS,            // Unsigned higher or same    >, ==, or unordered
+    LO,            // Unsigned lower or same     Less than
+    MI,            // Minus, negative            Less than
+    PL,            // Plus, positive or zero     >, ==, or unordered
+    VS,            // Overflow                   Unordered
+    VC,            // No overflow                Ordered
+    HI,            // Unsigned higher            Greater than, or unordered
+    LS,            // Unsigned lower or same     Less than or equal
+    GE,            // Greater than or equal      Greater than or equal
+    LT,            // Less than                  Less than, or unordered
+    GT,            // Signed greater than        Greater than
+    LE,            // Signed less than or equal  <, ==, or unordered
+    AL,            // Always (unconditional)     Always (unconditional)
+    NV,             // Always (unconditional)     Always (unconditional)
+    // Note the NV exists purely to disassemble 0b1111. Execution
+    // is "always".
+    Invalid
+  };
+
+} // namespace A64CC
+
+inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case A64CC::EQ:  return "eq";
+  case A64CC::NE:  return "ne";
+  case A64CC::HS:  return "hs";
+  case A64CC::LO:  return "lo";
+  case A64CC::MI:  return "mi";
+  case A64CC::PL:  return "pl";
+  case A64CC::VS:  return "vs";
+  case A64CC::VC:  return "vc";
+  case A64CC::HI:  return "hi";
+  case A64CC::LS:  return "ls";
+  case A64CC::GE:  return "ge";
+  case A64CC::LT:  return "lt";
+  case A64CC::GT:  return "gt";
+  case A64CC::LE:  return "le";
+  case A64CC::AL:  return "al";
+  case A64CC::NV:  return "nv";
+  }
+}
+
+inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
+  return StringSwitch<A64CC::CondCodes>(CondStr.lower())
+             .Case("eq", A64CC::EQ)
+             .Case("ne", A64CC::NE)
+             .Case("ne", A64CC::NE)
+             .Case("hs", A64CC::HS)
+             .Case("cs", A64CC::HS)
+             .Case("lo", A64CC::LO)
+             .Case("cc", A64CC::LO)
+             .Case("mi", A64CC::MI)
+             .Case("pl", A64CC::PL)
+             .Case("vs", A64CC::VS)
+             .Case("vc", A64CC::VC)
+             .Case("hi", A64CC::HI)
+             .Case("ls", A64CC::LS)
+             .Case("ge", A64CC::GE)
+             .Case("lt", A64CC::LT)
+             .Case("gt", A64CC::GT)
+             .Case("le", A64CC::LE)
+             .Case("al", A64CC::AL)
+             .Case("nv", A64CC::NV)
+             .Default(A64CC::Invalid);
+}
+
+inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
+  // It turns out that the condition codes have been designed so that in order
+  // to reverse the intent of the condition you only have to invert the low bit:
+
+  return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+}
+
+/// Instances of this class can perform bidirectional mapping from random
+/// identifier strings to operand encodings. For example "MSR" takes a named
+/// system-register which must be encoded somehow and decoded for printing. This
+/// central location means that the information for those transformations is not
+/// duplicated and remains in sync.
+///
+/// FIXME: currently the algorithm is a completely unoptimised linear
+/// search. Obviously this could be improved, but we would probably want to work
+/// out just how often these instructions are emitted before working on it. It
+/// might even be optimal to just reorder the tables for the common instructions
+/// rather than changing the algorithm.
+struct NamedImmMapper {
+  struct Mapping {
+    const char *Name;
+    uint32_t Value;
+  };
+
+  template<int N>
+  NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+    : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+
+  StringRef toString(uint32_t Value, bool &Valid) const;
+  uint32_t fromString(StringRef Name, bool &Valid) const;
+
+  /// Many of the instructions allow an alternative assembly form consisting of
+  /// a simple immediate. Currently the only valid forms are ranges [0, N) where
+  /// N being 0 indicates no immediate syntax-form is allowed.
+  bool validImm(uint32_t Value) const;
+protected:
+  const Mapping *Pairs;
+  size_t NumPairs;
+  uint32_t TooBigImm;
+};
+
+namespace A64AT {
+  enum ATValues {
+    Invalid = -1,    // Op0 Op1  CRn   CRm   Op2
+    S1E1R = 0x43c0,  // 01  000  0111  1000  000
+    S1E2R = 0x63c0,  // 01  100  0111  1000  000
+    S1E3R = 0x73c0,  // 01  110  0111  1000  000
+    S1E1W = 0x43c1,  // 01  000  0111  1000  001
+    S1E2W = 0x63c1,  // 01  100  0111  1000  001
+    S1E3W = 0x73c1,  // 01  110  0111  1000  001
+    S1E0R = 0x43c2,  // 01  000  0111  1000  010
+    S1E0W = 0x43c3,  // 01  000  0111  1000  011
+    S12E1R = 0x63c4, // 01  100  0111  1000  100
+    S12E1W = 0x63c5, // 01  100  0111  1000  101
+    S12E0R = 0x63c6, // 01  100  0111  1000  110
+    S12E0W = 0x63c7  // 01  100  0111  1000  111
+  };
+
+  struct ATMapper : NamedImmMapper {
+    const static Mapping ATPairs[];
+
+    ATMapper();
+  };
+
+}
+namespace A64DB {
+  enum DBValues {
+    Invalid = -1,
+    OSHLD = 0x1,
+    OSHST = 0x2,
+    OSH =   0x3,
+    NSHLD = 0x5,
+    NSHST = 0x6,
+    NSH =   0x7,
+    ISHLD = 0x9,
+    ISHST = 0xa,
+    ISH =   0xb,
+    LD =    0xd,
+    ST =    0xe,
+    SY =    0xf
+  };
+
+  struct DBarrierMapper : NamedImmMapper {
+    const static Mapping DBarrierPairs[];
+
+    DBarrierMapper();
+  };
+}
+
+namespace  A64DC {
+  enum DCValues {
+    Invalid = -1,   // Op1  CRn   CRm   Op2
+    ZVA   = 0x5ba1, // 01  011  0111  0100  001
+    IVAC  = 0x43b1, // 01  000  0111  0110  001
+    ISW   = 0x43b2, // 01  000  0111  0110  010
+    CVAC  = 0x5bd1, // 01  011  0111  1010  001
+    CSW   = 0x43d2, // 01  000  0111  1010  010
+    CVAU  = 0x5bd9, // 01  011  0111  1011  001
+    CIVAC = 0x5bf1, // 01  011  0111  1110  001
+    CISW  = 0x43f2  // 01  000  0111  1110  010
+  };
+
+  struct DCMapper : NamedImmMapper {
+    const static Mapping DCPairs[];
+
+    DCMapper();
+  };
+
+}
+
+namespace  A64IC {
+  enum ICValues {
+    Invalid = -1,     // Op1  CRn   CRm   Op2
+    IALLUIS = 0x0388, // 000  0111  0001  000
+    IALLU = 0x03a8,   // 000  0111  0101  000
+    IVAU = 0x1ba9     // 011  0111  0101  001
+  };
+
+
+  struct ICMapper : NamedImmMapper {
+    const static Mapping ICPairs[];
+
+    ICMapper();
+  };
+
+  static inline bool NeedsRegister(ICValues Val) {
+    return Val == IVAU;
+  }
+}
+
+namespace  A64ISB {
+  enum ISBValues {
+    Invalid = -1,
+    SY = 0xf
+  };
+  struct ISBMapper : NamedImmMapper {
+    const static Mapping ISBPairs[];
+
+    ISBMapper();
+  };
+}
+
+namespace A64PRFM {
+  enum PRFMValues {
+    Invalid = -1,
+    PLDL1KEEP = 0x00,
+    PLDL1STRM = 0x01,
+    PLDL2KEEP = 0x02,
+    PLDL2STRM = 0x03,
+    PLDL3KEEP = 0x04,
+    PLDL3STRM = 0x05,
+    PSTL1KEEP = 0x10,
+    PSTL1STRM = 0x11,
+    PSTL2KEEP = 0x12,
+    PSTL2STRM = 0x13,
+    PSTL3KEEP = 0x14,
+    PSTL3STRM = 0x15
+  };
+
+  struct PRFMMapper : NamedImmMapper {
+    const static Mapping PRFMPairs[];
+
+    PRFMMapper();
+  };
+}
+
+namespace A64PState {
+  enum PStateValues {
+    Invalid = -1,
+    SPSel = 0x05,
+    DAIFSet = 0x1e,
+    DAIFClr = 0x1f
+  };
+
+  struct PStateMapper : NamedImmMapper {
+    const static Mapping PStatePairs[];
+
+    PStateMapper();
+  };
+
+}
+
+namespace A64SE {
+    enum ShiftExtSpecifiers {
+        Invalid = -1,
+        LSL,
+        LSR,
+        ASR,
+        ROR,
+
+        UXTB,
+        UXTH,
+        UXTW,
+        UXTX,
+
+        SXTB,
+        SXTH,
+        SXTW,
+        SXTX
+    };
+}
+
+namespace A64SysReg {
+  enum SysRegROValues {
+    MDCCSR_EL0        = 0x9808, // 10  011  0000  0001  000
+    DBGDTRRX_EL0      = 0x9828, // 10  011  0000  0101  000
+    MDRAR_EL1         = 0x8080, // 10  000  0001  0000  000
+    OSLSR_EL1         = 0x808c, // 10  000  0001  0001  100
+    DBGAUTHSTATUS_EL1 = 0x83f6, // 10  000  0111  1110  110
+    PMCEID0_EL0       = 0xdce6, // 11  011  1001  1100  110
+    PMCEID1_EL0       = 0xdce7, // 11  011  1001  1100  111
+    MIDR_EL1          = 0xc000, // 11  000  0000  0000  000
+    CCSIDR_EL1        = 0xc800, // 11  001  0000  0000  000
+    CLIDR_EL1         = 0xc801, // 11  001  0000  0000  001
+    CTR_EL0           = 0xd801, // 11  011  0000  0000  001
+    MPIDR_EL1         = 0xc005, // 11  000  0000  0000  101
+    REVIDR_EL1        = 0xc006, // 11  000  0000  0000  110
+    AIDR_EL1          = 0xc807, // 11  001  0000  0000  111
+    DCZID_EL0         = 0xd807, // 11  011  0000  0000  111
+    ID_PFR0_EL1       = 0xc008, // 11  000  0000  0001  000
+    ID_PFR1_EL1       = 0xc009, // 11  000  0000  0001  001
+    ID_DFR0_EL1       = 0xc00a, // 11  000  0000  0001  010
+    ID_AFR0_EL1       = 0xc00b, // 11  000  0000  0001  011
+    ID_MMFR0_EL1      = 0xc00c, // 11  000  0000  0001  100
+    ID_MMFR1_EL1      = 0xc00d, // 11  000  0000  0001  101
+    ID_MMFR2_EL1      = 0xc00e, // 11  000  0000  0001  110
+    ID_MMFR3_EL1      = 0xc00f, // 11  000  0000  0001  111
+    ID_ISAR0_EL1      = 0xc010, // 11  000  0000  0010  000
+    ID_ISAR1_EL1      = 0xc011, // 11  000  0000  0010  001
+    ID_ISAR2_EL1      = 0xc012, // 11  000  0000  0010  010
+    ID_ISAR3_EL1      = 0xc013, // 11  000  0000  0010  011
+    ID_ISAR4_EL1      = 0xc014, // 11  000  0000  0010  100
+    ID_ISAR5_EL1      = 0xc015, // 11  000  0000  0010  101
+    ID_AA64PFR0_EL1   = 0xc020, // 11  000  0000  0100  000
+    ID_AA64PFR1_EL1   = 0xc021, // 11  000  0000  0100  001
+    ID_AA64DFR0_EL1   = 0xc028, // 11  000  0000  0101  000
+    ID_AA64DFR1_EL1   = 0xc029, // 11  000  0000  0101  001
+    ID_AA64AFR0_EL1   = 0xc02c, // 11  000  0000  0101  100
+    ID_AA64AFR1_EL1   = 0xc02d, // 11  000  0000  0101  101
+    ID_AA64ISAR0_EL1  = 0xc030, // 11  000  0000  0110  000
+    ID_AA64ISAR1_EL1  = 0xc031, // 11  000  0000  0110  001
+    ID_AA64MMFR0_EL1  = 0xc038, // 11  000  0000  0111  000
+    ID_AA64MMFR1_EL1  = 0xc039, // 11  000  0000  0111  001
+    MVFR0_EL1         = 0xc018, // 11  000  0000  0011  000
+    MVFR1_EL1         = 0xc019, // 11  000  0000  0011  001
+    MVFR2_EL1         = 0xc01a, // 11  000  0000  0011  010
+    RVBAR_EL1         = 0xc601, // 11  000  1100  0000  001
+    RVBAR_EL2         = 0xe601, // 11  100  1100  0000  001
+    RVBAR_EL3         = 0xf601, // 11  110  1100  0000  001
+    ISR_EL1           = 0xc608, // 11  000  1100  0001  000
+    CNTPCT_EL0        = 0xdf01, // 11  011  1110  0000  001
+    CNTVCT_EL0        = 0xdf02  // 11  011  1110  0000  010
+  };
+
+  enum SysRegWOValues {
+    DBGDTRTX_EL0      = 0x9828, // 10  011  0000  0101  000
+    OSLAR_EL1         = 0x8084, // 10  000  0001  0000  100
+    PMSWINC_EL0       = 0xdce4  // 11  011  1001  1100  100
+  };
+
+  enum SysRegValues {
+    Invalid = -1,               // Op0 Op1  CRn   CRm   Op2
+    OSDTRRX_EL1       = 0x8002, // 10  000  0000  0000  010
+    OSDTRTX_EL1       = 0x801a, // 10  000  0000  0011  010
+    TEECR32_EL1       = 0x9000, // 10  010  0000  0000  000
+    MDCCINT_EL1       = 0x8010, // 10  000  0000  0010  000
+    MDSCR_EL1         = 0x8012, // 10  000  0000  0010  010
+    DBGDTR_EL0        = 0x9820, // 10  011  0000  0100  000
+    OSECCR_EL1        = 0x8032, // 10  000  0000  0110  010
+    DBGVCR32_EL2      = 0xa038, // 10  100  0000  0111  000
+    DBGBVR0_EL1       = 0x8004, // 10  000  0000  0000  100
+    DBGBVR1_EL1       = 0x800c, // 10  000  0000  0001  100
+    DBGBVR2_EL1       = 0x8014, // 10  000  0000  0010  100
+    DBGBVR3_EL1       = 0x801c, // 10  000  0000  0011  100
+    DBGBVR4_EL1       = 0x8024, // 10  000  0000  0100  100
+    DBGBVR5_EL1       = 0x802c, // 10  000  0000  0101  100
+    DBGBVR6_EL1       = 0x8034, // 10  000  0000  0110  100
+    DBGBVR7_EL1       = 0x803c, // 10  000  0000  0111  100
+    DBGBVR8_EL1       = 0x8044, // 10  000  0000  1000  100
+    DBGBVR9_EL1       = 0x804c, // 10  000  0000  1001  100
+    DBGBVR10_EL1      = 0x8054, // 10  000  0000  1010  100
+    DBGBVR11_EL1      = 0x805c, // 10  000  0000  1011  100
+    DBGBVR12_EL1      = 0x8064, // 10  000  0000  1100  100
+    DBGBVR13_EL1      = 0x806c, // 10  000  0000  1101  100
+    DBGBVR14_EL1      = 0x8074, // 10  000  0000  1110  100
+    DBGBVR15_EL1      = 0x807c, // 10  000  0000  1111  100
+    DBGBCR0_EL1       = 0x8005, // 10  000  0000  0000  101
+    DBGBCR1_EL1       = 0x800d, // 10  000  0000  0001  101
+    DBGBCR2_EL1       = 0x8015, // 10  000  0000  0010  101
+    DBGBCR3_EL1       = 0x801d, // 10  000  0000  0011  101
+    DBGBCR4_EL1       = 0x8025, // 10  000  0000  0100  101
+    DBGBCR5_EL1       = 0x802d, // 10  000  0000  0101  101
+    DBGBCR6_EL1       = 0x8035, // 10  000  0000  0110  101
+    DBGBCR7_EL1       = 0x803d, // 10  000  0000  0111  101
+    DBGBCR8_EL1       = 0x8045, // 10  000  0000  1000  101
+    DBGBCR9_EL1       = 0x804d, // 10  000  0000  1001  101
+    DBGBCR10_EL1      = 0x8055, // 10  000  0000  1010  101
+    DBGBCR11_EL1      = 0x805d, // 10  000  0000  1011  101
+    DBGBCR12_EL1      = 0x8065, // 10  000  0000  1100  101
+    DBGBCR13_EL1      = 0x806d, // 10  000  0000  1101  101
+    DBGBCR14_EL1      = 0x8075, // 10  000  0000  1110  101
+    DBGBCR15_EL1      = 0x807d, // 10  000  0000  1111  101
+    DBGWVR0_EL1       = 0x8006, // 10  000  0000  0000  110
+    DBGWVR1_EL1       = 0x800e, // 10  000  0000  0001  110
+    DBGWVR2_EL1       = 0x8016, // 10  000  0000  0010  110
+    DBGWVR3_EL1       = 0x801e, // 10  000  0000  0011  110
+    DBGWVR4_EL1       = 0x8026, // 10  000  0000  0100  110
+    DBGWVR5_EL1       = 0x802e, // 10  000  0000  0101  110
+    DBGWVR6_EL1       = 0x8036, // 10  000  0000  0110  110
+    DBGWVR7_EL1       = 0x803e, // 10  000  0000  0111  110
+    DBGWVR8_EL1       = 0x8046, // 10  000  0000  1000  110
+    DBGWVR9_EL1       = 0x804e, // 10  000  0000  1001  110
+    DBGWVR10_EL1      = 0x8056, // 10  000  0000  1010  110
+    DBGWVR11_EL1      = 0x805e, // 10  000  0000  1011  110
+    DBGWVR12_EL1      = 0x8066, // 10  000  0000  1100  110
+    DBGWVR13_EL1      = 0x806e, // 10  000  0000  1101  110
+    DBGWVR14_EL1      = 0x8076, // 10  000  0000  1110  110
+    DBGWVR15_EL1      = 0x807e, // 10  000  0000  1111  110
+    DBGWCR0_EL1       = 0x8007, // 10  000  0000  0000  111
+    DBGWCR1_EL1       = 0x800f, // 10  000  0000  0001  111
+    DBGWCR2_EL1       = 0x8017, // 10  000  0000  0010  111
+    DBGWCR3_EL1       = 0x801f, // 10  000  0000  0011  111
+    DBGWCR4_EL1       = 0x8027, // 10  000  0000  0100  111
+    DBGWCR5_EL1       = 0x802f, // 10  000  0000  0101  111
+    DBGWCR6_EL1       = 0x8037, // 10  000  0000  0110  111
+    DBGWCR7_EL1       = 0x803f, // 10  000  0000  0111  111
+    DBGWCR8_EL1       = 0x8047, // 10  000  0000  1000  111
+    DBGWCR9_EL1       = 0x804f, // 10  000  0000  1001  111
+    DBGWCR10_EL1      = 0x8057, // 10  000  0000  1010  111
+    DBGWCR11_EL1      = 0x805f, // 10  000  0000  1011  111
+    DBGWCR12_EL1      = 0x8067, // 10  000  0000  1100  111
+    DBGWCR13_EL1      = 0x806f, // 10  000  0000  1101  111
+    DBGWCR14_EL1      = 0x8077, // 10  000  0000  1110  111
+    DBGWCR15_EL1      = 0x807f, // 10  000  0000  1111  111
+    TEEHBR32_EL1      = 0x9080, // 10  010  0001  0000  000
+    OSDLR_EL1         = 0x809c, // 10  000  0001  0011  100
+    DBGPRCR_EL1       = 0x80a4, // 10  000  0001  0100  100
+    DBGCLAIMSET_EL1   = 0x83c6, // 10  000  0111  1000  110
+    DBGCLAIMCLR_EL1   = 0x83ce, // 10  000  0111  1001  110
+    CSSELR_EL1        = 0xd000, // 11  010  0000  0000  000
+    VPIDR_EL2         = 0xe000, // 11  100  0000  0000  000
+    VMPIDR_EL2        = 0xe005, // 11  100  0000  0000  101
+    CPACR_EL1         = 0xc082, // 11  000  0001  0000  010
+    SCTLR_EL1         = 0xc080, // 11  000  0001  0000  000
+    SCTLR_EL2         = 0xe080, // 11  100  0001  0000  000
+    SCTLR_EL3         = 0xf080, // 11  110  0001  0000  000
+    ACTLR_EL1         = 0xc081, // 11  000  0001  0000  001
+    ACTLR_EL2         = 0xe081, // 11  100  0001  0000  001
+    ACTLR_EL3         = 0xf081, // 11  110  0001  0000  001
+    HCR_EL2           = 0xe088, // 11  100  0001  0001  000
+    SCR_EL3           = 0xf088, // 11  110  0001  0001  000
+    MDCR_EL2          = 0xe089, // 11  100  0001  0001  001
+    SDER32_EL3        = 0xf089, // 11  110  0001  0001  001
+    CPTR_EL2          = 0xe08a, // 11  100  0001  0001  010
+    CPTR_EL3          = 0xf08a, // 11  110  0001  0001  010
+    HSTR_EL2          = 0xe08b, // 11  100  0001  0001  011
+    HACR_EL2          = 0xe08f, // 11  100  0001  0001  111
+    MDCR_EL3          = 0xf099, // 11  110  0001  0011  001
+    TTBR0_EL1         = 0xc100, // 11  000  0010  0000  000
+    TTBR0_EL2         = 0xe100, // 11  100  0010  0000  000
+    TTBR0_EL3         = 0xf100, // 11  110  0010  0000  000
+    TTBR1_EL1         = 0xc101, // 11  000  0010  0000  001
+    TCR_EL1           = 0xc102, // 11  000  0010  0000  010
+    TCR_EL2           = 0xe102, // 11  100  0010  0000  010
+    TCR_EL3           = 0xf102, // 11  110  0010  0000  010
+    VTTBR_EL2         = 0xe108, // 11  100  0010  0001  000
+    VTCR_EL2          = 0xe10a, // 11  100  0010  0001  010
+    DACR32_EL2        = 0xe180, // 11  100  0011  0000  000
+    SPSR_EL1          = 0xc200, // 11  000  0100  0000  000
+    SPSR_EL2          = 0xe200, // 11  100  0100  0000  000
+    SPSR_EL3          = 0xf200, // 11  110  0100  0000  000
+    ELR_EL1           = 0xc201, // 11  000  0100  0000  001
+    ELR_EL2           = 0xe201, // 11  100  0100  0000  001
+    ELR_EL3           = 0xf201, // 11  110  0100  0000  001
+    SP_EL0            = 0xc208, // 11  000  0100  0001  000
+    SP_EL1            = 0xe208, // 11  100  0100  0001  000
+    SP_EL2            = 0xf208, // 11  110  0100  0001  000
+    SPSel             = 0xc210, // 11  000  0100  0010  000
+    NZCV              = 0xda10, // 11  011  0100  0010  000
+    DAIF              = 0xda11, // 11  011  0100  0010  001
+    CurrentEL         = 0xc212, // 11  000  0100  0010  010
+    SPSR_irq          = 0xe218, // 11  100  0100  0011  000
+    SPSR_abt          = 0xe219, // 11  100  0100  0011  001
+    SPSR_und          = 0xe21a, // 11  100  0100  0011  010
+    SPSR_fiq          = 0xe21b, // 11  100  0100  0011  011
+    FPCR              = 0xda20, // 11  011  0100  0100  000
+    FPSR              = 0xda21, // 11  011  0100  0100  001
+    DSPSR_EL0         = 0xda28, // 11  011  0100  0101  000
+    DLR_EL0           = 0xda29, // 11  011  0100  0101  001
+    IFSR32_EL2        = 0xe281, // 11  100  0101  0000  001
+    AFSR0_EL1         = 0xc288, // 11  000  0101  0001  000
+    AFSR0_EL2         = 0xe288, // 11  100  0101  0001  000
+    AFSR0_EL3         = 0xf288, // 11  110  0101  0001  000
+    AFSR1_EL1         = 0xc289, // 11  000  0101  0001  001
+    AFSR1_EL2         = 0xe289, // 11  100  0101  0001  001
+    AFSR1_EL3         = 0xf289, // 11  110  0101  0001  001
+    ESR_EL1           = 0xc290, // 11  000  0101  0010  000
+    ESR_EL2           = 0xe290, // 11  100  0101  0010  000
+    ESR_EL3           = 0xf290, // 11  110  0101  0010  000
+    FPEXC32_EL2       = 0xe298, // 11  100  0101  0011  000
+    FAR_EL1           = 0xc300, // 11  000  0110  0000  000
+    FAR_EL2           = 0xe300, // 11  100  0110  0000  000
+    FAR_EL3           = 0xf300, // 11  110  0110  0000  000
+    HPFAR_EL2         = 0xe304, // 11  100  0110  0000  100
+    PAR_EL1           = 0xc3a0, // 11  000  0111  0100  000
+    PMCR_EL0          = 0xdce0, // 11  011  1001  1100  000
+    PMCNTENSET_EL0    = 0xdce1, // 11  011  1001  1100  001
+    PMCNTENCLR_EL0    = 0xdce2, // 11  011  1001  1100  010
+    PMOVSCLR_EL0      = 0xdce3, // 11  011  1001  1100  011
+    PMSELR_EL0        = 0xdce5, // 11  011  1001  1100  101
+    PMCCNTR_EL0       = 0xdce8, // 11  011  1001  1101  000
+    PMXEVTYPER_EL0    = 0xdce9, // 11  011  1001  1101  001
+    PMXEVCNTR_EL0     = 0xdcea, // 11  011  1001  1101  010
+    PMUSERENR_EL0     = 0xdcf0, // 11  011  1001  1110  000
+    PMINTENSET_EL1    = 0xc4f1, // 11  000  1001  1110  001
+    PMINTENCLR_EL1    = 0xc4f2, // 11  000  1001  1110  010
+    PMOVSSET_EL0      = 0xdcf3, // 11  011  1001  1110  011
+    MAIR_EL1          = 0xc510, // 11  000  1010  0010  000
+    MAIR_EL2          = 0xe510, // 11  100  1010  0010  000
+    MAIR_EL3          = 0xf510, // 11  110  1010  0010  000
+    AMAIR_EL1         = 0xc518, // 11  000  1010  0011  000
+    AMAIR_EL2         = 0xe518, // 11  100  1010  0011  000
+    AMAIR_EL3         = 0xf518, // 11  110  1010  0011  000
+    VBAR_EL1          = 0xc600, // 11  000  1100  0000  000
+    VBAR_EL2          = 0xe600, // 11  100  1100  0000  000
+    VBAR_EL3          = 0xf600, // 11  110  1100  0000  000
+    RMR_EL1           = 0xc602, // 11  000  1100  0000  010
+    RMR_EL2           = 0xe602, // 11  100  1100  0000  010
+    RMR_EL3           = 0xf602, // 11  110  1100  0000  010
+    CONTEXTIDR_EL1    = 0xc681, // 11  000  1101  0000  001
+    TPIDR_EL0         = 0xde82, // 11  011  1101  0000  010
+    TPIDR_EL2         = 0xe682, // 11  100  1101  0000  010
+    TPIDR_EL3         = 0xf682, // 11  110  1101  0000  010
+    TPIDRRO_EL0       = 0xde83, // 11  011  1101  0000  011
+    TPIDR_EL1         = 0xc684, // 11  000  1101  0000  100
+    CNTFRQ_EL0        = 0xdf00, // 11  011  1110  0000  000
+    CNTVOFF_EL2       = 0xe703, // 11  100  1110  0000  011
+    CNTKCTL_EL1       = 0xc708, // 11  000  1110  0001  000
+    CNTHCTL_EL2       = 0xe708, // 11  100  1110  0001  000
+    CNTP_TVAL_EL0     = 0xdf10, // 11  011  1110  0010  000
+    CNTHP_TVAL_EL2    = 0xe710, // 11  100  1110  0010  000
+    CNTPS_TVAL_EL1    = 0xff10, // 11  111  1110  0010  000
+    CNTP_CTL_EL0      = 0xdf11, // 11  011  1110  0010  001
+    CNTHP_CTL_EL2     = 0xe711, // 11  100  1110  0010  001
+    CNTPS_CTL_EL1     = 0xff11, // 11  111  1110  0010  001
+    CNTP_CVAL_EL0     = 0xdf12, // 11  011  1110  0010  010
+    CNTHP_CVAL_EL2    = 0xe712, // 11  100  1110  0010  010
+    CNTPS_CVAL_EL1    = 0xff12, // 11  111  1110  0010  010
+    CNTV_TVAL_EL0     = 0xdf18, // 11  011  1110  0011  000
+    CNTV_CTL_EL0      = 0xdf19, // 11  011  1110  0011  001
+    CNTV_CVAL_EL0     = 0xdf1a, // 11  011  1110  0011  010
+    PMEVCNTR0_EL0     = 0xdf40, // 11  011  1110  1000  000
+    PMEVCNTR1_EL0     = 0xdf41, // 11  011  1110  1000  001
+    PMEVCNTR2_EL0     = 0xdf42, // 11  011  1110  1000  010
+    PMEVCNTR3_EL0     = 0xdf43, // 11  011  1110  1000  011
+    PMEVCNTR4_EL0     = 0xdf44, // 11  011  1110  1000  100
+    PMEVCNTR5_EL0     = 0xdf45, // 11  011  1110  1000  101
+    PMEVCNTR6_EL0     = 0xdf46, // 11  011  1110  1000  110
+    PMEVCNTR7_EL0     = 0xdf47, // 11  011  1110  1000  111
+    PMEVCNTR8_EL0     = 0xdf48, // 11  011  1110  1001  000
+    PMEVCNTR9_EL0     = 0xdf49, // 11  011  1110  1001  001
+    PMEVCNTR10_EL0    = 0xdf4a, // 11  011  1110  1001  010
+    PMEVCNTR11_EL0    = 0xdf4b, // 11  011  1110  1001  011
+    PMEVCNTR12_EL0    = 0xdf4c, // 11  011  1110  1001  100
+    PMEVCNTR13_EL0    = 0xdf4d, // 11  011  1110  1001  101
+    PMEVCNTR14_EL0    = 0xdf4e, // 11  011  1110  1001  110
+    PMEVCNTR15_EL0    = 0xdf4f, // 11  011  1110  1001  111
+    PMEVCNTR16_EL0    = 0xdf50, // 11  011  1110  1010  000
+    PMEVCNTR17_EL0    = 0xdf51, // 11  011  1110  1010  001
+    PMEVCNTR18_EL0    = 0xdf52, // 11  011  1110  1010  010
+    PMEVCNTR19_EL0    = 0xdf53, // 11  011  1110  1010  011
+    PMEVCNTR20_EL0    = 0xdf54, // 11  011  1110  1010  100
+    PMEVCNTR21_EL0    = 0xdf55, // 11  011  1110  1010  101
+    PMEVCNTR22_EL0    = 0xdf56, // 11  011  1110  1010  110
+    PMEVCNTR23_EL0    = 0xdf57, // 11  011  1110  1010  111
+    PMEVCNTR24_EL0    = 0xdf58, // 11  011  1110  1011  000
+    PMEVCNTR25_EL0    = 0xdf59, // 11  011  1110  1011  001
+    PMEVCNTR26_EL0    = 0xdf5a, // 11  011  1110  1011  010
+    PMEVCNTR27_EL0    = 0xdf5b, // 11  011  1110  1011  011
+    PMEVCNTR28_EL0    = 0xdf5c, // 11  011  1110  1011  100
+    PMEVCNTR29_EL0    = 0xdf5d, // 11  011  1110  1011  101
+    PMEVCNTR30_EL0    = 0xdf5e, // 11  011  1110  1011  110
+    PMCCFILTR_EL0     = 0xdf7f, // 11  011  1110  1111  111
+    PMEVTYPER0_EL0    = 0xdf60, // 11  011  1110  1100  000
+    PMEVTYPER1_EL0    = 0xdf61, // 11  011  1110  1100  001
+    PMEVTYPER2_EL0    = 0xdf62, // 11  011  1110  1100  010
+    PMEVTYPER3_EL0    = 0xdf63, // 11  011  1110  1100  011
+    PMEVTYPER4_EL0    = 0xdf64, // 11  011  1110  1100  100
+    PMEVTYPER5_EL0    = 0xdf65, // 11  011  1110  1100  101
+    PMEVTYPER6_EL0    = 0xdf66, // 11  011  1110  1100  110
+    PMEVTYPER7_EL0    = 0xdf67, // 11  011  1110  1100  111
+    PMEVTYPER8_EL0    = 0xdf68, // 11  011  1110  1101  000
+    PMEVTYPER9_EL0    = 0xdf69, // 11  011  1110  1101  001
+    PMEVTYPER10_EL0   = 0xdf6a, // 11  011  1110  1101  010
+    PMEVTYPER11_EL0   = 0xdf6b, // 11  011  1110  1101  011
+    PMEVTYPER12_EL0   = 0xdf6c, // 11  011  1110  1101  100
+    PMEVTYPER13_EL0   = 0xdf6d, // 11  011  1110  1101  101
+    PMEVTYPER14_EL0   = 0xdf6e, // 11  011  1110  1101  110
+    PMEVTYPER15_EL0   = 0xdf6f, // 11  011  1110  1101  111
+    PMEVTYPER16_EL0   = 0xdf70, // 11  011  1110  1110  000
+    PMEVTYPER17_EL0   = 0xdf71, // 11  011  1110  1110  001
+    PMEVTYPER18_EL0   = 0xdf72, // 11  011  1110  1110  010
+    PMEVTYPER19_EL0   = 0xdf73, // 11  011  1110  1110  011
+    PMEVTYPER20_EL0   = 0xdf74, // 11  011  1110  1110  100
+    PMEVTYPER21_EL0   = 0xdf75, // 11  011  1110  1110  101
+    PMEVTYPER22_EL0   = 0xdf76, // 11  011  1110  1110  110
+    PMEVTYPER23_EL0   = 0xdf77, // 11  011  1110  1110  111
+    PMEVTYPER24_EL0   = 0xdf78, // 11  011  1110  1111  000
+    PMEVTYPER25_EL0   = 0xdf79, // 11  011  1110  1111  001
+    PMEVTYPER26_EL0   = 0xdf7a, // 11  011  1110  1111  010
+    PMEVTYPER27_EL0   = 0xdf7b, // 11  011  1110  1111  011
+    PMEVTYPER28_EL0   = 0xdf7c, // 11  011  1110  1111  100
+    PMEVTYPER29_EL0   = 0xdf7d, // 11  011  1110  1111  101
+    PMEVTYPER30_EL0   = 0xdf7e  // 11  011  1110  1111  110
+  };
+
+  // Note that these do not inherit from NamedImmMapper. This class is
+  // sufficiently different in its behaviour that I don't believe it's worth
+  // burdening the common NamedImmMapper with abstractions only needed in
+  // this one case.
+  struct SysRegMapper {
+    static const NamedImmMapper::Mapping SysRegPairs[];
+
+    const NamedImmMapper::Mapping *InstPairs;
+    size_t NumInstPairs;
+
+    SysRegMapper() {}
+    uint32_t fromString(StringRef Name, bool &Valid) const;
+    std::string toString(uint32_t Bits, bool &Valid) const;
+  };
+
+  struct MSRMapper : SysRegMapper {
+    static const NamedImmMapper::Mapping MSRPairs[];
+    MSRMapper();
+  };
+
+  struct MRSMapper : SysRegMapper {
+    static const NamedImmMapper::Mapping MRSPairs[];
+    MRSMapper();
+  };
+
+  uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
+}
+
+namespace A64TLBI {
+  enum TLBIValues {
+    Invalid = -1,          // Op0 Op1  CRn   CRm   Op2
+    IPAS2E1IS    = 0x6401, // 01  100  1000  0000  001
+    IPAS2LE1IS   = 0x6405, // 01  100  1000  0000  101
+    VMALLE1IS    = 0x4418, // 01  000  1000  0011  000
+    ALLE2IS      = 0x6418, // 01  100  1000  0011  000
+    ALLE3IS      = 0x7418, // 01  110  1000  0011  000
+    VAE1IS       = 0x4419, // 01  000  1000  0011  001
+    VAE2IS       = 0x6419, // 01  100  1000  0011  001
+    VAE3IS       = 0x7419, // 01  110  1000  0011  001
+    ASIDE1IS     = 0x441a, // 01  000  1000  0011  010
+    VAAE1IS      = 0x441b, // 01  000  1000  0011  011
+    ALLE1IS      = 0x641c, // 01  100  1000  0011  100
+    VALE1IS      = 0x441d, // 01  000  1000  0011  101
+    VALE2IS      = 0x641d, // 01  100  1000  0011  101
+    VALE3IS      = 0x741d, // 01  110  1000  0011  101
+    VMALLS12E1IS = 0x641e, // 01  100  1000  0011  110
+    VAALE1IS     = 0x441f, // 01  000  1000  0011  111
+    IPAS2E1      = 0x6421, // 01  100  1000  0100  001
+    IPAS2LE1     = 0x6425, // 01  100  1000  0100  101
+    VMALLE1      = 0x4438, // 01  000  1000  0111  000
+    ALLE2        = 0x6438, // 01  100  1000  0111  000
+    ALLE3        = 0x7438, // 01  110  1000  0111  000
+    VAE1         = 0x4439, // 01  000  1000  0111  001
+    VAE2         = 0x6439, // 01  100  1000  0111  001
+    VAE3         = 0x7439, // 01  110  1000  0111  001
+    ASIDE1       = 0x443a, // 01  000  1000  0111  010
+    VAAE1        = 0x443b, // 01  000  1000  0111  011
+    ALLE1        = 0x643c, // 01  100  1000  0111  100
+    VALE1        = 0x443d, // 01  000  1000  0111  101
+    VALE2        = 0x643d, // 01  100  1000  0111  101
+    VALE3        = 0x743d, // 01  110  1000  0111  101
+    VMALLS12E1   = 0x643e, // 01  100  1000  0111  110
+    VAALE1       = 0x443f  // 01  000  1000  0111  111
+  };
+
+  struct TLBIMapper : NamedImmMapper {
+    const static Mapping TLBIPairs[];
+
+    TLBIMapper();
+  };
+
+  static inline bool NeedsRegister(TLBIValues Val) {
+    switch (Val) {
+    case VMALLE1IS:
+    case ALLE2IS:
+    case ALLE3IS:
+    case ALLE1IS:
+    case VMALLS12E1IS:
+    case VMALLE1:
+    case ALLE2:
+    case ALLE3:
+    case ALLE1:
+    case VMALLS12E1:
+      return false;
+    default:
+      return true;
+    }
+  }
+}
+
+namespace AArch64II {
+
+  enum TOF {
+    //===--------------------------------------------------------------===//
+    // AArch64 Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    // MO_GOT - Represents a relocation referring to the GOT entry of a given
+    // symbol. Used in adrp.
+    MO_GOT,
+
+    // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
+    // GOT entry of a given symbol. Used in ldr only.
+    MO_GOT_LO12,
+
+    // MO_DTPREL_* - Represents a relocation referring to the offset from a
+    // module's dynamic thread pointer. Used in the local-dynamic TLS access
+    // model.
+    MO_DTPREL_G1,
+    MO_DTPREL_G0_NC,
+
+    // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
+    // providing the offset of a variable from the thread-pointer. Used in
+    // initial-exec TLS model where this offset is assigned in the static thread
+    // block and thus known by the dynamic linker.
+    MO_GOTTPREL,
+    MO_GOTTPREL_LO12,
+
+    // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
+    // a TLS descriptor chosen by the dynamic linker. Used for the
+    // general-dynamic and local-dynamic TLS access models where very littls is
+    // known at link-time.
+    MO_TLSDESC,
+    MO_TLSDESC_LO12,
+
+    // MO_TPREL_* - Represents a relocation referring to the offset of a
+    // variable from the thread pointer itself. Used in the local-exec TLS
+    // access model.
+    MO_TPREL_G1,
+    MO_TPREL_G0_NC,
+
+    // MO_LO12 - On a symbol operand, this represents a relocation containing
+    // lower 12 bits of the address. Used in add/sub/ldr/str.
+    MO_LO12
+  };
+}
+
+class APFloat;
+
+namespace A64Imms {
+  bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
+
+  inline bool isFPImm(const APFloat &Val) {
+    uint32_t Imm8;
+    return isFPImm(Val, Imm8);
+  }
+
+  bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
+  bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
+
+  bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+  bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+  // We sometimes want to know whether the immediate is representable with a
+  // MOVN but *not* with a MOVZ (because that would take priority).
+  bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+}
+
+} // end namespace llvm;
+
+#endif
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@ -0,0 +1,287 @@
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AArch64ELFObjectWriter(uint8_t OSABI);
+
+  virtual ~AArch64ELFObjectWriter();
+
+protected:
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const;
+private:
+};
+}
+
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+                            /*HasRelocationAddend*/ true)
+{}
+
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
+{}
+
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                              const MCFixup &Fixup,
+                                              bool IsPCRel,
+                                              bool IsRelocWithSymbol,
+                                              int64_t Addend) const {
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_Data_8:
+      return ELF::R_AARCH64_PREL64;
+    case FK_Data_4:
+      return ELF::R_AARCH64_PREL32;
+    case FK_Data_2:
+      return ELF::R_AARCH64_PREL16;
+    case AArch64::fixup_a64_ld_prel:
+      Type = ELF::R_AARCH64_LD_PREL_LO19;
+      break;
+    case AArch64::fixup_a64_adr_prel:
+      Type = ELF::R_AARCH64_ADR_PREL_LO21;
+      break;
+    case AArch64::fixup_a64_adr_prel_page:
+      Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
+      break;
+    case AArch64::fixup_a64_adr_prel_got_page:
+      Type = ELF::R_AARCH64_ADR_GOT_PAGE;
+      break;
+    case AArch64::fixup_a64_tstbr:
+      Type = ELF::R_AARCH64_TSTBR14;
+      break;
+    case AArch64::fixup_a64_condbr:
+      Type = ELF::R_AARCH64_CONDBR19;
+      break;
+    case AArch64::fixup_a64_uncondbr:
+      Type = ELF::R_AARCH64_JUMP26;
+      break;
+    case AArch64::fixup_a64_call:
+      Type = ELF::R_AARCH64_CALL26;
+      break;
+    case AArch64::fixup_a64_adr_gottprel_page:
+      Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+      break;
+    case AArch64::fixup_a64_ld_gottprel_prel19:
+      Type =  ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+      break;
+    case AArch64::fixup_a64_tlsdesc_adr_page:
+      Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_Data_8:
+      return ELF::R_AARCH64_ABS64;
+    case FK_Data_4:
+      return ELF::R_AARCH64_ABS32;
+    case FK_Data_2:
+      return ELF::R_AARCH64_ABS16;
+    case AArch64::fixup_a64_add_lo12:
+      Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ld64_got_lo12_nc:
+      Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_lo12:
+      Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_lo12:
+      Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_lo12:
+      Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_lo12:
+      Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst128_lo12:
+      Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g0:
+      Type = ELF::R_AARCH64_MOVW_UABS_G0;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g0_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g1:
+      Type = ELF::R_AARCH64_MOVW_UABS_G1;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g1_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g2:
+      Type = ELF::R_AARCH64_MOVW_UABS_G2;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g2_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g3:
+      Type = ELF::R_AARCH64_MOVW_UABS_G3;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g0:
+      Type = ELF::R_AARCH64_MOVW_SABS_G0;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g1:
+      Type = ELF::R_AARCH64_MOVW_SABS_G1;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g2:
+      Type = ELF::R_AARCH64_MOVW_SABS_G2;
+      break;
+
+    // TLS Local-dynamic block
+    case AArch64::fixup_a64_movw_dtprel_g2:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g1:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g1_nc:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g0:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_add_dtprel_hi12:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+      break;
+    case AArch64::fixup_a64_add_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_add_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+      break;
+
+    // TLS initial-exec block
+    case AArch64::fixup_a64_movw_gottprel_g1:
+      Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_gottprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+      break;
+
+    // TLS local-exec block
+    case AArch64::fixup_a64_movw_tprel_g2:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g1:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g1_nc:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g0:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_add_tprel_hi12:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+      break;
+    case AArch64::fixup_a64_add_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_add_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+      break;
+
+    // TLS general-dynamic block
+    case AArch64::fixup_a64_tlsdesc_adr_page:
+      Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+      break;
+    case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+      Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+      break;
+    case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+      Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+      break;
+    case AArch64::fixup_a64_tlsdesc_call:
+      Type = ELF::R_AARCH64_TLSDESC_CALL;
+      break;
+    }
+  }
+
+  return Type;
+}
+
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
+                                                   uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/true);
+}
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@ -0,0 +1,160 @@
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits AArch64 ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
+// regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// AArch64 ELF ABI:
+///    infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
+///
+/// In brief: $x or $d should be emitted at the start of each contiguous region
+/// of A64 code or data in a section. In practice, this emission does not rely
+/// on explicit assembler directives but on inherent properties of the
+/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
+/// instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class AArch64ELFStreamer : public MCELFStreamer {
+public:
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                 raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCELFStreamer(Context, TAB, OS, Emitter),
+      MappingSymbolCounter(0), LastEMS(EMS_None) {
+  }
+
+  ~AArch64ELFStreamer() {}
+
+  virtual void ChangeSection(const MCSection *Section) {
+    // We have to keep track of the mapping symbol state of any sections we
+    // use. Each one should start off as EMS_None, which is provided as the
+    // default constructor by DenseMap::lookup.
+    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastEMS = LastMappingSymbols.lookup(Section);
+
+    MCELFStreamer::ChangeSection(Section);
+  }
+
+  /// This function is the one used to emit instruction data into the ELF
+  /// streamer. We override it to add the appropriate mapping symbol if
+  /// necessary.
+  virtual void EmitInstruction(const MCInst& Inst) {
+    EmitA64MappingSymbol();
+    MCELFStreamer::EmitInstruction(Inst);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// if necessary.
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitBytes(Data, AddrSpace);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// if necessary.
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+  }
+
+private:
+  enum ElfMappingSymbol {
+    EMS_None,
+    EMS_A64,
+    EMS_Data
+  };
+
+  void EmitDataMappingSymbol() {
+    if (LastEMS == EMS_Data) return;
+    EmitMappingSymbol("$d");
+    LastEMS = EMS_Data;
+  }
+
+  void EmitA64MappingSymbol() {
+    if (LastEMS == EMS_A64) return;
+    EmitMappingSymbol("$x");
+    LastEMS = EMS_A64;
+  }
+
+  void EmitMappingSymbol(StringRef Name) {
+    MCSymbol *Start = getContext().CreateTempSymbol();
+    EmitLabel(Start);
+
+    MCSymbol *Symbol =
+      getContext().GetOrCreateSymbol(Name + "." +
+                                     Twine(MappingSymbolCounter++));
+
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    Symbol->setSection(*getCurrentSection());
+
+    const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+    Symbol->setVariableValue(Value);
+  }
+
+  int64_t MappingSymbolCounter;
+
+  DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+  ElfMappingSymbol LastEMS;
+
+  /// @}
+};
+}
+
+namespace llvm {
+  MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *Emitter,
+                                      bool RelaxAll, bool NoExecStack) {
+    AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+    if (RelaxAll)
+      S->getAssembler().setRelaxAll(true);
+    if (NoExecStack)
+      S->getAssembler().setNoExecStack(true);
+    return S;
+  }
+}
+
+
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@ -0,0 +1,27 @@
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_ELF_STREAMER_H
+#define LLVM_AARCH64_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+  MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                          raw_ostream &OS,
+                                          MCCodeEmitter *Emitter,
+                                          bool RelaxAll, bool NoExecStack);
+}
+
+#endif // AArch64_ELF_STREAMER_H
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@ -0,0 +1,108 @@
+//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+  namespace AArch64 {
+    enum Fixups {
+      fixup_a64_ld_prel = FirstTargetFixupKind,
+      fixup_a64_adr_prel,
+      fixup_a64_adr_prel_page,
+
+      fixup_a64_add_lo12,
+
+      fixup_a64_ldst8_lo12,
+      fixup_a64_ldst16_lo12,
+      fixup_a64_ldst32_lo12,
+      fixup_a64_ldst64_lo12,
+      fixup_a64_ldst128_lo12,
+
+      fixup_a64_tstbr,
+      fixup_a64_condbr,
+      fixup_a64_uncondbr,
+      fixup_a64_call,
+
+      fixup_a64_movw_uabs_g0,
+      fixup_a64_movw_uabs_g0_nc,
+      fixup_a64_movw_uabs_g1,
+      fixup_a64_movw_uabs_g1_nc,
+      fixup_a64_movw_uabs_g2,
+      fixup_a64_movw_uabs_g2_nc,
+      fixup_a64_movw_uabs_g3,
+
+      fixup_a64_movw_sabs_g0,
+      fixup_a64_movw_sabs_g1,
+      fixup_a64_movw_sabs_g2,
+
+      fixup_a64_adr_prel_got_page,
+      fixup_a64_ld64_got_lo12_nc,
+
+      // Produce offsets relative to the module's dynamic TLS area.
+      fixup_a64_movw_dtprel_g2,
+      fixup_a64_movw_dtprel_g1,
+      fixup_a64_movw_dtprel_g1_nc,
+      fixup_a64_movw_dtprel_g0,
+      fixup_a64_movw_dtprel_g0_nc,
+      fixup_a64_add_dtprel_hi12,
+      fixup_a64_add_dtprel_lo12,
+      fixup_a64_add_dtprel_lo12_nc,
+      fixup_a64_ldst8_dtprel_lo12,
+      fixup_a64_ldst8_dtprel_lo12_nc,
+      fixup_a64_ldst16_dtprel_lo12,
+      fixup_a64_ldst16_dtprel_lo12_nc,
+      fixup_a64_ldst32_dtprel_lo12,
+      fixup_a64_ldst32_dtprel_lo12_nc,
+      fixup_a64_ldst64_dtprel_lo12,
+      fixup_a64_ldst64_dtprel_lo12_nc,
+
+      // Produce the GOT entry containing a variable's address in TLS's
+      // initial-exec mode.
+      fixup_a64_movw_gottprel_g1,
+      fixup_a64_movw_gottprel_g0_nc,
+      fixup_a64_adr_gottprel_page,
+      fixup_a64_ld64_gottprel_lo12_nc,
+      fixup_a64_ld_gottprel_prel19,
+
+      // Produce offsets relative to the thread pointer: TPIDR_EL0.
+      fixup_a64_movw_tprel_g2,
+      fixup_a64_movw_tprel_g1,
+      fixup_a64_movw_tprel_g1_nc,
+      fixup_a64_movw_tprel_g0,
+      fixup_a64_movw_tprel_g0_nc,
+      fixup_a64_add_tprel_hi12,
+      fixup_a64_add_tprel_lo12,
+      fixup_a64_add_tprel_lo12_nc,
+      fixup_a64_ldst8_tprel_lo12,
+      fixup_a64_ldst8_tprel_lo12_nc,
+      fixup_a64_ldst16_tprel_lo12,
+      fixup_a64_ldst16_tprel_lo12_nc,
+      fixup_a64_ldst32_tprel_lo12,
+      fixup_a64_ldst32_tprel_lo12_nc,
+      fixup_a64_ldst64_tprel_lo12,
+      fixup_a64_ldst64_tprel_lo12_nc,
+
+      // Produce the special fixups used by the general-dynamic TLS model.
+      fixup_a64_tlsdesc_adr_page,
+      fixup_a64_tlsdesc_ld64_lo12_nc,
+      fixup_a64_tlsdesc_add_lo12_nc,
+      fixup_a64_tlsdesc_call,
+
+
+      // Marker
+      LastTargetFixupKind,
+      NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+    };
+  }
+}
+
+#endif
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@ -0,0 +1,41 @@
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AArch64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCAsmInfo.h"
+
+using namespace llvm;
+
+AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
+  PointerSize = 8;
+
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  CommentString = "//";
+  PrivateGlobalPrefix = ".L";
+  Code32Directive = ".code\t32";
+
+  Data16bitsDirective = "\t.hword\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = "\t.xword\t";
+
+  UseDataRegionDirectives = true;
+
+  WeakRefDirective = "\t.weak\t";
+
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@ -0,0 +1,27 @@
+//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AArch64MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETASMINFO_H
+#define LLVM_AARCH64TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+  struct AArch64ELFMCAsmInfo : public MCAsmInfo {
+    explicit AArch64ELFMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@ -0,0 +1,517 @@
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
+  AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
+  MCContext &Ctx;
+
+public:
+  AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                       MCContext &ctx)
+    : MCII(mcii), STI(sti), Ctx(ctx) {
+  }
+
+  ~AArch64MCCodeEmitter() {}
+
+  unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  template<int MemSize>
+  unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const {
+    return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize);
+  }
+
+  unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups,
+                                    int MemSize) const;
+
+  unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  // Labels are handled mostly the same way: a symbol is needed, and
+  // just gets some fixup attached.
+  template<AArch64::Fixups fixupDesired>
+  unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned  getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  unsigned getAddressWithFixup(const MCOperand &MO,
+                               unsigned FixupKind,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
+  }
+
+  void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      EmitByte(Val & 0xff, OS);
+      Val >>= 8;
+    }
+  }
+
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const;
+
+  template<int hasRs, int hasRt2> unsigned
+  fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
+
+  unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const;
+
+  unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const;
+
+
+};
+
+} // end anonymous namespace
+
+unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
+                                       unsigned FixupKind,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (!MO.isExpr()) {
+    // This can occur for manually decoded or constructed MCInsts, but neither
+    // the assembly-parser nor instruction selection will currently produce an
+    // MCInst that's not a symbol reference.
+    assert(MO.isImm() && "Unexpected address requested");
+    return MO.getImm();
+  }
+
+  const MCExpr *Expr = MO.getExpr();
+  MCFixupKind Kind = MCFixupKind(FixupKind);
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+  return 0;
+}
+
+unsigned AArch64MCCodeEmitter::
+getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups,
+                       int MemSize) const {
+  const MCOperand &ImmOp = MI.getOperand(OpIdx);
+  if (ImmOp.isImm())
+    return ImmOp.getImm();
+
+  assert(ImmOp.isExpr() && "Unexpected operand type");
+  const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
+  unsigned FixupKind;
+
+
+  switch (Expr->getKind()) {
+  default: llvm_unreachable("Unexpected operand modifier");
+  case AArch64MCExpr::VK_AARCH64_LO12: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
+                                AArch64::fixup_a64_ldst16_lo12,
+                                AArch64::fixup_a64_ldst32_lo12,
+                                AArch64::fixup_a64_ldst64_lo12,
+                                AArch64::fixup_a64_ldst128_lo12 };
+    assert(MemSize <= 16 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_GOT_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
+    break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:  {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12,
+                                AArch64::fixup_a64_ldst16_dtprel_lo12,
+                                AArch64::fixup_a64_ldst32_dtprel_lo12,
+                                AArch64::fixup_a64_ldst64_dtprel_lo12 };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst64_dtprel_lo12_nc };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
+    break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12,
+                                AArch64::fixup_a64_ldst16_tprel_lo12,
+                                AArch64::fixup_a64_ldst32_tprel_lo12,
+                                AArch64::fixup_a64_ldst64_tprel_lo12 };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst16_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst32_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst64_tprel_lo12_nc };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
+    break;
+  }
+
+  return getAddressWithFixup(ImmOp, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  unsigned FixupKind = 0;
+  switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
+  default: llvm_unreachable("Invalid expression modifier");
+  case AArch64MCExpr::VK_AARCH64_LO12:
+    FixupKind = AArch64::fixup_a64_add_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
+    FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
+    FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
+    FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
+    FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
+    FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
+    FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+    FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
+  if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
+    Modifier = Expr->getKind();
+
+  unsigned FixupKind = 0;
+  switch(Modifier) {
+  case AArch64MCExpr::VK_AARCH64_None:
+    FixupKind = AArch64::fixup_a64_adr_prel_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_GOT:
+    FixupKind = AArch64::fixup_a64_adr_prel_got_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL:
+    FixupKind = AArch64::fixup_a64_adr_gottprel_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_TLSDESC:
+    FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
+    break;
+  default:
+    llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Only immediate expected for shift");
+
+  return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Only immediate expected for shift");
+
+  return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+}
+
+
+template<AArch64::Fixups fixupDesired> unsigned
+AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
+                                      unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  if (MO.isExpr())
+    return getAddressWithFixup(MO, fixupDesired, Fixups);
+
+  assert(MO.isImm());
+  return MO.getImm();
+}
+
+unsigned
+AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
+                                       unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  if (MO.isImm())
+    return MO.getImm();
+
+  assert(MO.isExpr());
+
+  unsigned FixupKind;
+  if (isa<AArch64MCExpr>(MO.getExpr())) {
+    assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
+           == AArch64MCExpr::VK_AARCH64_GOTTPREL
+           && "Invalid symbol modifier for literal load");
+    FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
+  } else {
+    FixupKind = AArch64::fixup_a64_ld_prel;
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                       const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  }
+
+  llvm_unreachable("Unable to encode MCOperand!");
+  return 0;
+}
+
+unsigned
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                            SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &UImm16MO = MI.getOperand(OpIdx);
+  const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+
+  unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+
+  if (UImm16MO.isImm()) {
+    Result |= UImm16MO.getImm();
+    return Result;
+  }
+
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+  AArch64::Fixups requestedFixup;
+  switch (A64E->getKind()) {
+  default: llvm_unreachable("unexpected expression modifier");
+  case AArch64MCExpr::VK_AARCH64_ABS_G0:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G1:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G2:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G3:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G0:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G1:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G2:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
+  }
+
+  return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
+}
+
+unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI,
+                                          unsigned EncodedValue) const {
+    // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation
+    // with 0s, but is architecturally ignored
+    EncodedValue &= ~0x1f0000u;
+
+    return EncodedValue;
+}
+
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+                                            unsigned EncodedValue) const {
+  if (!hasRs) EncodedValue |= 0x001F0000;
+  if (!hasRt2) EncodedValue |= 0x00007C00;
+
+  return EncodedValue;
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const {
+  // If one of the signed fixup kinds is applied to a MOVZ instruction, the
+  // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
+  // job to ensure that any bits possibly affected by this are 0. This means we
+  // must zero out bit 30 (essentially emitting a MOVN).
+  MCOperand UImm16MO = MI.getOperand(1);
+
+  // Nothing to do if there's no fixup.
+  if (UImm16MO.isImm())
+    return EncodedValue;
+
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+  switch (A64E->getKind()) {
+  case AArch64MCExpr::VK_AARCH64_SABS_G0:
+  case AArch64MCExpr::VK_AARCH64_SABS_G1:
+  case AArch64MCExpr::VK_AARCH64_SABS_G2:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    return EncodedValue & ~(1u << 30);
+  default:
+    // Nothing to do for an unsigned fixup.
+    return EncodedValue;
+  }
+
+  llvm_unreachable("Should have returned by now");
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+                                 unsigned EncodedValue) const {
+  // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+  // (i.e. all bits 1) but is ignored by the processor.
+  EncodedValue |= 0x1f << 10;
+  return EncodedValue;
+}
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &STI,
+                                                MCContext &Ctx) {
+  return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+}
+
+void AArch64MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+    // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+    // following (BLR) instruction. It doesn't emit any code itself so it
+    // doesn't go through the normal TableGenerated channels.
+    MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
+    const MCExpr *Expr;
+    Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
+    Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
+    return;
+  }
+
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+  EmitInstruction(Binary, OS);
+}
+
+
+#include "AArch64GenMCCodeEmitter.inc"
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@ -0,0 +1,173 @@
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64mcexpr"
+#include "AArch64MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/Object/ELF.h"
+
+using namespace llvm;
+
+const AArch64MCExpr*
+AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                      MCContext &Ctx) {
+  return new (Ctx) AArch64MCExpr(Kind, Expr);
+}
+
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: llvm_unreachable("Invalid kind!");
+  case VK_AARCH64_GOT:              OS << ":got:"; break;
+  case VK_AARCH64_GOT_LO12:         OS << ":got_lo12:"; break;
+  case VK_AARCH64_LO12:             OS << ":lo12:"; break;
+  case VK_AARCH64_ABS_G0:           OS << ":abs_g0:"; break;
+  case VK_AARCH64_ABS_G0_NC:        OS << ":abs_g0_nc:"; break;
+  case VK_AARCH64_ABS_G1:           OS << ":abs_g1:"; break;
+  case VK_AARCH64_ABS_G1_NC:        OS << ":abs_g1_nc:"; break;
+  case VK_AARCH64_ABS_G2:           OS << ":abs_g2:"; break;
+  case VK_AARCH64_ABS_G2_NC:        OS << ":abs_g2_nc:"; break;
+  case VK_AARCH64_ABS_G3:           OS << ":abs_g3:"; break;
+  case VK_AARCH64_SABS_G0:          OS << ":abs_g0_s:"; break;
+  case VK_AARCH64_SABS_G1:          OS << ":abs_g1_s:"; break;
+  case VK_AARCH64_SABS_G2:          OS << ":abs_g2_s:"; break;
+  case VK_AARCH64_DTPREL_G2:        OS << ":dtprel_g2:"; break;
+  case VK_AARCH64_DTPREL_G1:        OS << ":dtprel_g1:"; break;
+  case VK_AARCH64_DTPREL_G1_NC:     OS << ":dtprel_g1_nc:"; break;
+  case VK_AARCH64_DTPREL_G0:        OS << ":dtprel_g0:"; break;
+  case VK_AARCH64_DTPREL_G0_NC:     OS << ":dtprel_g0_nc:"; break;
+  case VK_AARCH64_DTPREL_HI12:      OS << ":dtprel_hi12:"; break;
+  case VK_AARCH64_DTPREL_LO12:      OS << ":dtprel_lo12:"; break;
+  case VK_AARCH64_DTPREL_LO12_NC:   OS << ":dtprel_lo12_nc:"; break;
+  case VK_AARCH64_GOTTPREL_G1:      OS << ":gottprel_g1:"; break;
+  case VK_AARCH64_GOTTPREL_G0_NC:   OS << ":gottprel_g0_nc:"; break;
+  case VK_AARCH64_GOTTPREL:         OS << ":gottprel:"; break;
+  case VK_AARCH64_GOTTPREL_LO12:    OS << ":gottprel_lo12:"; break;
+  case VK_AARCH64_TPREL_G2:         OS << ":tprel_g2:"; break;
+  case VK_AARCH64_TPREL_G1:         OS << ":tprel_g1:"; break;
+  case VK_AARCH64_TPREL_G1_NC:      OS << ":tprel_g1_nc:"; break;
+  case VK_AARCH64_TPREL_G0:         OS << ":tprel_g0:"; break;
+  case VK_AARCH64_TPREL_G0_NC:      OS << ":tprel_g0_nc:"; break;
+  case VK_AARCH64_TPREL_HI12:       OS << ":tprel_hi12:"; break;
+  case VK_AARCH64_TPREL_LO12:       OS << ":tprel_lo12:"; break;
+  case VK_AARCH64_TPREL_LO12_NC:    OS << ":tprel_lo12_nc:"; break;
+  case VK_AARCH64_TLSDESC:          OS << ":tlsdesc:"; break;
+  case VK_AARCH64_TLSDESC_LO12:     OS << ":tlsdesc_lo12:"; break;
+
+  }
+
+  const MCExpr *Expr = getSubExpr();
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << '(';
+  Expr->print(OS);
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << ')';
+}
+
+bool
+AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                         const MCAsmLayout *Layout) const {
+  return getSubExpr()->EvaluateAsRelocatable(Res, *Layout);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+  switch (Expr->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expression");
+    break;
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    // We're known to be under a TLS fixup, so any symbol should be
+    // modified. There should be only one.
+    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+    MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+  switch (getKind()) {
+  default:
+    return;
+  case VK_AARCH64_DTPREL_G2:
+  case VK_AARCH64_DTPREL_G1:
+  case VK_AARCH64_DTPREL_G1_NC:
+  case VK_AARCH64_DTPREL_G0:
+  case VK_AARCH64_DTPREL_G0_NC:
+  case VK_AARCH64_DTPREL_HI12:
+  case VK_AARCH64_DTPREL_LO12:
+  case VK_AARCH64_DTPREL_LO12_NC:
+  case VK_AARCH64_GOTTPREL_G1:
+  case VK_AARCH64_GOTTPREL_G0_NC:
+  case VK_AARCH64_GOTTPREL:
+  case VK_AARCH64_GOTTPREL_LO12:
+  case VK_AARCH64_TPREL_G2:
+  case VK_AARCH64_TPREL_G1:
+  case VK_AARCH64_TPREL_G1_NC:
+  case VK_AARCH64_TPREL_G0:
+  case VK_AARCH64_TPREL_G0_NC:
+  case VK_AARCH64_TPREL_HI12:
+  case VK_AARCH64_TPREL_LO12:
+  case VK_AARCH64_TPREL_LO12_NC:
+  case VK_AARCH64_TLSDESC:
+  case VK_AARCH64_TLSDESC_LO12:
+    break;
+  }
+
+  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbolsImpl(BE->getLHS(), Asm);
+    AddValueSymbolsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbolsImpl(getSubExpr(), Asm);
+}
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@ -0,0 +1,161 @@
+//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCEXPR_H
+#define LLVM_AARCH64MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class AArch64MCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_AARCH64_None,
+    VK_AARCH64_GOT,      // :got: modifier in assembly
+    VK_AARCH64_GOT_LO12, // :got_lo12:
+    VK_AARCH64_LO12,     // :lo12:
+
+    VK_AARCH64_ABS_G0, // :abs_g0:
+    VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
+    VK_AARCH64_ABS_G1,
+    VK_AARCH64_ABS_G1_NC,
+    VK_AARCH64_ABS_G2,
+    VK_AARCH64_ABS_G2_NC,
+    VK_AARCH64_ABS_G3,
+
+    VK_AARCH64_SABS_G0, // :abs_g0_s:
+    VK_AARCH64_SABS_G1,
+    VK_AARCH64_SABS_G2,
+
+    VK_AARCH64_DTPREL_G2, // :dtprel_g2:
+    VK_AARCH64_DTPREL_G1,
+    VK_AARCH64_DTPREL_G1_NC,
+    VK_AARCH64_DTPREL_G0,
+    VK_AARCH64_DTPREL_G0_NC,
+    VK_AARCH64_DTPREL_HI12,
+    VK_AARCH64_DTPREL_LO12,
+    VK_AARCH64_DTPREL_LO12_NC,
+
+    VK_AARCH64_GOTTPREL_G1, // :gottprel:
+    VK_AARCH64_GOTTPREL_G0_NC,
+    VK_AARCH64_GOTTPREL,
+    VK_AARCH64_GOTTPREL_LO12,
+
+    VK_AARCH64_TPREL_G2, // :tprel:
+    VK_AARCH64_TPREL_G1,
+    VK_AARCH64_TPREL_G1_NC,
+    VK_AARCH64_TPREL_G0,
+    VK_AARCH64_TPREL_G0_NC,
+    VK_AARCH64_TPREL_HI12,
+    VK_AARCH64_TPREL_LO12,
+    VK_AARCH64_TPREL_LO12_NC,
+
+    VK_AARCH64_TLSDESC, // :tlsdesc:
+    VK_AARCH64_TLSDESC_LO12
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                     MCContext &Ctx);
+
+  static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_GOT, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
+                                                 MCContext &Ctx) {
+    return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
+                                            MCContext &Ctx) {
+    return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const {
+    return getSubExpr()->FindAssociatedSection();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const AArch64MCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@ -0,0 +1,991 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64BaseInfo.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+  for (unsigned i = 0; i < NumPairs; ++i) {
+    if (Pairs[i].Value == Value) {
+      Valid = true;
+      return Pairs[i].Name;
+    }
+  }
+
+  Valid = false;
+  return StringRef();
+}
+
+uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+  std::string LowerCaseName = Name.lower();
+  for (unsigned i = 0; i < NumPairs; ++i) {
+    if (Pairs[i].Name == LowerCaseName) {
+      Valid = true;
+      return Pairs[i].Value;
+    }
+  }
+
+  Valid = false;
+  return -1;
+}
+
+bool NamedImmMapper::validImm(uint32_t Value) const {
+  return Value < TooBigImm;
+}
+
+const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+  {"s1e1r", S1E1R},
+  {"s1e2r", S1E2R},
+  {"s1e3r", S1E3R},
+  {"s1e1w", S1E1W},
+  {"s1e2w", S1E2W},
+  {"s1e3w", S1E3W},
+  {"s1e0r", S1E0R},
+  {"s1e0w", S1E0W},
+  {"s12e1r", S12E1R},
+  {"s12e1w", S12E1W},
+  {"s12e0r", S12E0R},
+  {"s12e0w", S12E0W},
+};
+
+A64AT::ATMapper::ATMapper()
+  : NamedImmMapper(ATPairs, 0) {}
+
+const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+  {"oshld", OSHLD},
+  {"oshst", OSHST},
+  {"osh", OSH},
+  {"nshld", NSHLD},
+  {"nshst", NSHST},
+  {"nsh", NSH},
+  {"ishld", ISHLD},
+  {"ishst", ISHST},
+  {"ish", ISH},
+  {"ld", LD},
+  {"st", ST},
+  {"sy", SY}
+};
+
+A64DB::DBarrierMapper::DBarrierMapper()
+  : NamedImmMapper(DBarrierPairs, 16u) {}
+
+const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+  {"zva", ZVA},
+  {"ivac", IVAC},
+  {"isw", ISW},
+  {"cvac", CVAC},
+  {"csw", CSW},
+  {"cvau", CVAU},
+  {"civac", CIVAC},
+  {"cisw", CISW}
+};
+
+A64DC::DCMapper::DCMapper()
+  : NamedImmMapper(DCPairs, 0) {}
+
+const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+  {"ialluis",  IALLUIS},
+  {"iallu", IALLU},
+  {"ivau", IVAU}
+};
+
+A64IC::ICMapper::ICMapper()
+  : NamedImmMapper(ICPairs, 0) {}
+
+const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+  {"sy",  SY},
+};
+
+A64ISB::ISBMapper::ISBMapper()
+  : NamedImmMapper(ISBPairs, 16) {}
+
+const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+  {"pldl1keep", PLDL1KEEP},
+  {"pldl1strm", PLDL1STRM},
+  {"pldl2keep", PLDL2KEEP},
+  {"pldl2strm", PLDL2STRM},
+  {"pldl3keep", PLDL3KEEP},
+  {"pldl3strm", PLDL3STRM},
+  {"pstl1keep", PSTL1KEEP},
+  {"pstl1strm", PSTL1STRM},
+  {"pstl2keep", PSTL2KEEP},
+  {"pstl2strm", PSTL2STRM},
+  {"pstl3keep", PSTL3KEEP},
+  {"pstl3strm", PSTL3STRM}
+};
+
+A64PRFM::PRFMMapper::PRFMMapper()
+  : NamedImmMapper(PRFMPairs, 32) {}
+
+const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+  {"spsel", SPSel},
+  {"daifset", DAIFSet},
+  {"daifclr", DAIFClr}
+};
+
+A64PState::PStateMapper::PStateMapper()
+  : NamedImmMapper(PStatePairs, 0) {}
+
+const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+  {"mdccsr_el0", MDCCSR_EL0},
+  {"dbgdtrrx_el0", DBGDTRRX_EL0},
+  {"mdrar_el1", MDRAR_EL1},
+  {"oslsr_el1", OSLSR_EL1},
+  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
+  {"pmceid0_el0", PMCEID0_EL0},
+  {"pmceid1_el0", PMCEID1_EL0},
+  {"midr_el1", MIDR_EL1},
+  {"ccsidr_el1", CCSIDR_EL1},
+  {"clidr_el1", CLIDR_EL1},
+  {"ctr_el0", CTR_EL0},
+  {"mpidr_el1", MPIDR_EL1},
+  {"revidr_el1", REVIDR_EL1},
+  {"aidr_el1", AIDR_EL1},
+  {"dczid_el0", DCZID_EL0},
+  {"id_pfr0_el1", ID_PFR0_EL1},
+  {"id_pfr1_el1", ID_PFR1_EL1},
+  {"id_dfr0_el1", ID_DFR0_EL1},
+  {"id_afr0_el1", ID_AFR0_EL1},
+  {"id_mmfr0_el1", ID_MMFR0_EL1},
+  {"id_mmfr1_el1", ID_MMFR1_EL1},
+  {"id_mmfr2_el1", ID_MMFR2_EL1},
+  {"id_mmfr3_el1", ID_MMFR3_EL1},
+  {"id_isar0_el1", ID_ISAR0_EL1},
+  {"id_isar1_el1", ID_ISAR1_EL1},
+  {"id_isar2_el1", ID_ISAR2_EL1},
+  {"id_isar3_el1", ID_ISAR3_EL1},
+  {"id_isar4_el1", ID_ISAR4_EL1},
+  {"id_isar5_el1", ID_ISAR5_EL1},
+  {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
+  {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
+  {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
+  {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
+  {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
+  {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
+  {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
+  {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
+  {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
+  {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+  {"mvfr0_el1", MVFR0_EL1},
+  {"mvfr1_el1", MVFR1_EL1},
+  {"mvfr2_el1", MVFR2_EL1},
+  {"rvbar_el1", RVBAR_EL1},
+  {"rvbar_el2", RVBAR_EL2},
+  {"rvbar_el3", RVBAR_EL3},
+  {"isr_el1", ISR_EL1},
+  {"cntpct_el0", CNTPCT_EL0},
+  {"cntvct_el0", CNTVCT_EL0}
+};
+
+A64SysReg::MRSMapper::MRSMapper() {
+    InstPairs = &MRSPairs[0];
+    NumInstPairs = llvm::array_lengthof(MRSPairs);
+}
+
+const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+  {"dbgdtrtx_el0", DBGDTRTX_EL0},
+  {"oslar_el1", OSLAR_EL1},
+  {"pmswinc_el0", PMSWINC_EL0}
+};
+
+A64SysReg::MSRMapper::MSRMapper() {
+    InstPairs = &MSRPairs[0];
+    NumInstPairs = llvm::array_lengthof(MSRPairs);
+}
+
+
+const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+  {"osdtrrx_el1", OSDTRRX_EL1},
+  {"osdtrtx_el1",  OSDTRTX_EL1},
+  {"teecr32_el1", TEECR32_EL1},
+  {"mdccint_el1", MDCCINT_EL1},
+  {"mdscr_el1", MDSCR_EL1},
+  {"dbgdtr_el0", DBGDTR_EL0},
+  {"oseccr_el1", OSECCR_EL1},
+  {"dbgvcr32_el2", DBGVCR32_EL2},
+  {"dbgbvr0_el1", DBGBVR0_EL1},
+  {"dbgbvr1_el1", DBGBVR1_EL1},
+  {"dbgbvr2_el1", DBGBVR2_EL1},
+  {"dbgbvr3_el1", DBGBVR3_EL1},
+  {"dbgbvr4_el1", DBGBVR4_EL1},
+  {"dbgbvr5_el1", DBGBVR5_EL1},
+  {"dbgbvr6_el1", DBGBVR6_EL1},
+  {"dbgbvr7_el1", DBGBVR7_EL1},
+  {"dbgbvr8_el1", DBGBVR8_EL1},
+  {"dbgbvr9_el1", DBGBVR9_EL1},
+  {"dbgbvr10_el1", DBGBVR10_EL1},
+  {"dbgbvr11_el1", DBGBVR11_EL1},
+  {"dbgbvr12_el1", DBGBVR12_EL1},
+  {"dbgbvr13_el1", DBGBVR13_EL1},
+  {"dbgbvr14_el1", DBGBVR14_EL1},
+  {"dbgbvr15_el1", DBGBVR15_EL1},
+  {"dbgbcr0_el1", DBGBCR0_EL1},
+  {"dbgbcr1_el1", DBGBCR1_EL1},
+  {"dbgbcr2_el1", DBGBCR2_EL1},
+  {"dbgbcr3_el1", DBGBCR3_EL1},
+  {"dbgbcr4_el1", DBGBCR4_EL1},
+  {"dbgbcr5_el1", DBGBCR5_EL1},
+  {"dbgbcr6_el1", DBGBCR6_EL1},
+  {"dbgbcr7_el1", DBGBCR7_EL1},
+  {"dbgbcr8_el1", DBGBCR8_EL1},
+  {"dbgbcr9_el1", DBGBCR9_EL1},
+  {"dbgbcr10_el1", DBGBCR10_EL1},
+  {"dbgbcr11_el1", DBGBCR11_EL1},
+  {"dbgbcr12_el1", DBGBCR12_EL1},
+  {"dbgbcr13_el1", DBGBCR13_EL1},
+  {"dbgbcr14_el1", DBGBCR14_EL1},
+  {"dbgbcr15_el1", DBGBCR15_EL1},
+  {"dbgwvr0_el1", DBGWVR0_EL1},
+  {"dbgwvr1_el1", DBGWVR1_EL1},
+  {"dbgwvr2_el1", DBGWVR2_EL1},
+  {"dbgwvr3_el1", DBGWVR3_EL1},
+  {"dbgwvr4_el1", DBGWVR4_EL1},
+  {"dbgwvr5_el1", DBGWVR5_EL1},
+  {"dbgwvr6_el1", DBGWVR6_EL1},
+  {"dbgwvr7_el1", DBGWVR7_EL1},
+  {"dbgwvr8_el1", DBGWVR8_EL1},
+  {"dbgwvr9_el1", DBGWVR9_EL1},
+  {"dbgwvr10_el1", DBGWVR10_EL1},
+  {"dbgwvr11_el1", DBGWVR11_EL1},
+  {"dbgwvr12_el1", DBGWVR12_EL1},
+  {"dbgwvr13_el1", DBGWVR13_EL1},
+  {"dbgwvr14_el1", DBGWVR14_EL1},
+  {"dbgwvr15_el1", DBGWVR15_EL1},
+  {"dbgwcr0_el1", DBGWCR0_EL1},
+  {"dbgwcr1_el1", DBGWCR1_EL1},
+  {"dbgwcr2_el1", DBGWCR2_EL1},
+  {"dbgwcr3_el1", DBGWCR3_EL1},
+  {"dbgwcr4_el1", DBGWCR4_EL1},
+  {"dbgwcr5_el1", DBGWCR5_EL1},
+  {"dbgwcr6_el1", DBGWCR6_EL1},
+  {"dbgwcr7_el1", DBGWCR7_EL1},
+  {"dbgwcr8_el1", DBGWCR8_EL1},
+  {"dbgwcr9_el1", DBGWCR9_EL1},
+  {"dbgwcr10_el1", DBGWCR10_EL1},
+  {"dbgwcr11_el1", DBGWCR11_EL1},
+  {"dbgwcr12_el1", DBGWCR12_EL1},
+  {"dbgwcr13_el1", DBGWCR13_EL1},
+  {"dbgwcr14_el1", DBGWCR14_EL1},
+  {"dbgwcr15_el1", DBGWCR15_EL1},
+  {"teehbr32_el1", TEEHBR32_EL1},
+  {"osdlr_el1", OSDLR_EL1},
+  {"dbgprcr_el1", DBGPRCR_EL1},
+  {"dbgclaimset_el1", DBGCLAIMSET_EL1},
+  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
+  {"csselr_el1", CSSELR_EL1},
+  {"vpidr_el2", VPIDR_EL2},
+  {"vmpidr_el2", VMPIDR_EL2},
+  {"sctlr_el1", SCTLR_EL1},
+  {"sctlr_el2", SCTLR_EL2},
+  {"sctlr_el3", SCTLR_EL3},
+  {"actlr_el1", ACTLR_EL1},
+  {"actlr_el2", ACTLR_EL2},
+  {"actlr_el3", ACTLR_EL3},
+  {"cpacr_el1", CPACR_EL1},
+  {"hcr_el2", HCR_EL2},
+  {"scr_el3", SCR_EL3},
+  {"mdcr_el2", MDCR_EL2},
+  {"sder32_el3", SDER32_EL3},
+  {"cptr_el2", CPTR_EL2},
+  {"cptr_el3", CPTR_EL3},
+  {"hstr_el2", HSTR_EL2},
+  {"hacr_el2", HACR_EL2},
+  {"mdcr_el3", MDCR_EL3},
+  {"ttbr0_el1", TTBR0_EL1},
+  {"ttbr0_el2", TTBR0_EL2},
+  {"ttbr0_el3", TTBR0_EL3},
+  {"ttbr1_el1", TTBR1_EL1},
+  {"tcr_el1", TCR_EL1},
+  {"tcr_el2", TCR_EL2},
+  {"tcr_el3", TCR_EL3},
+  {"vttbr_el2", VTTBR_EL2},
+  {"vtcr_el2", VTCR_EL2},
+  {"dacr32_el2", DACR32_EL2},
+  {"spsr_el1", SPSR_EL1},
+  {"spsr_el2", SPSR_EL2},
+  {"spsr_el3", SPSR_EL3},
+  {"elr_el1", ELR_EL1},
+  {"elr_el2", ELR_EL2},
+  {"elr_el3", ELR_EL3},
+  {"sp_el0", SP_EL0},
+  {"sp_el1", SP_EL1},
+  {"sp_el2", SP_EL2},
+  {"spsel", SPSel},
+  {"nzcv", NZCV},
+  {"daif", DAIF},
+  {"currentel", CurrentEL},
+  {"spsr_irq", SPSR_irq},
+  {"spsr_abt", SPSR_abt},
+  {"spsr_und", SPSR_und},
+  {"spsr_fiq", SPSR_fiq},
+  {"fpcr", FPCR},
+  {"fpsr", FPSR},
+  {"dspsr_el0", DSPSR_EL0},
+  {"dlr_el0", DLR_EL0},
+  {"ifsr32_el2", IFSR32_EL2},
+  {"afsr0_el1", AFSR0_EL1},
+  {"afsr0_el2", AFSR0_EL2},
+  {"afsr0_el3", AFSR0_EL3},
+  {"afsr1_el1", AFSR1_EL1},
+  {"afsr1_el2", AFSR1_EL2},
+  {"afsr1_el3", AFSR1_EL3},
+  {"esr_el1", ESR_EL1},
+  {"esr_el2", ESR_EL2},
+  {"esr_el3", ESR_EL3},
+  {"fpexc32_el2", FPEXC32_EL2},
+  {"far_el1", FAR_EL1},
+  {"far_el2", FAR_EL2},
+  {"far_el3", FAR_EL3},
+  {"hpfar_el2", HPFAR_EL2},
+  {"par_el1", PAR_EL1},
+  {"pmcr_el0", PMCR_EL0},
+  {"pmcntenset_el0", PMCNTENSET_EL0},
+  {"pmcntenclr_el0", PMCNTENCLR_EL0},
+  {"pmovsclr_el0", PMOVSCLR_EL0},
+  {"pmselr_el0", PMSELR_EL0},
+  {"pmccntr_el0", PMCCNTR_EL0},
+  {"pmxevtyper_el0", PMXEVTYPER_EL0},
+  {"pmxevcntr_el0", PMXEVCNTR_EL0},
+  {"pmuserenr_el0", PMUSERENR_EL0},
+  {"pmintenset_el1", PMINTENSET_EL1},
+  {"pmintenclr_el1", PMINTENCLR_EL1},
+  {"pmovsset_el0", PMOVSSET_EL0},
+  {"mair_el1", MAIR_EL1},
+  {"mair_el2", MAIR_EL2},
+  {"mair_el3", MAIR_EL3},
+  {"amair_el1", AMAIR_EL1},
+  {"amair_el2", AMAIR_EL2},
+  {"amair_el3", AMAIR_EL3},
+  {"vbar_el1", VBAR_EL1},
+  {"vbar_el2", VBAR_EL2},
+  {"vbar_el3", VBAR_EL3},
+  {"rmr_el1", RMR_EL1},
+  {"rmr_el2", RMR_EL2},
+  {"rmr_el3", RMR_EL3},
+  {"contextidr_el1", CONTEXTIDR_EL1},
+  {"tpidr_el0", TPIDR_EL0},
+  {"tpidr_el2", TPIDR_EL2},
+  {"tpidr_el3", TPIDR_EL3},
+  {"tpidrro_el0", TPIDRRO_EL0},
+  {"tpidr_el1", TPIDR_EL1},
+  {"cntfrq_el0", CNTFRQ_EL0},
+  {"cntvoff_el2", CNTVOFF_EL2},
+  {"cntkctl_el1", CNTKCTL_EL1},
+  {"cnthctl_el2", CNTHCTL_EL2},
+  {"cntp_tval_el0", CNTP_TVAL_EL0},
+  {"cnthp_tval_el2", CNTHP_TVAL_EL2},
+  {"cntps_tval_el1", CNTPS_TVAL_EL1},
+  {"cntp_ctl_el0", CNTP_CTL_EL0},
+  {"cnthp_ctl_el2", CNTHP_CTL_EL2},
+  {"cntps_ctl_el1", CNTPS_CTL_EL1},
+  {"cntp_cval_el0", CNTP_CVAL_EL0},
+  {"cnthp_cval_el2", CNTHP_CVAL_EL2},
+  {"cntps_cval_el1", CNTPS_CVAL_EL1},
+  {"cntv_tval_el0", CNTV_TVAL_EL0},
+  {"cntv_ctl_el0", CNTV_CTL_EL0},
+  {"cntv_cval_el0", CNTV_CVAL_EL0},
+  {"pmevcntr0_el0", PMEVCNTR0_EL0},
+  {"pmevcntr1_el0", PMEVCNTR1_EL0},
+  {"pmevcntr2_el0", PMEVCNTR2_EL0},
+  {"pmevcntr3_el0", PMEVCNTR3_EL0},
+  {"pmevcntr4_el0", PMEVCNTR4_EL0},
+  {"pmevcntr5_el0", PMEVCNTR5_EL0},
+  {"pmevcntr6_el0", PMEVCNTR6_EL0},
+  {"pmevcntr7_el0", PMEVCNTR7_EL0},
+  {"pmevcntr8_el0", PMEVCNTR8_EL0},
+  {"pmevcntr9_el0", PMEVCNTR9_EL0},
+  {"pmevcntr10_el0", PMEVCNTR10_EL0},
+  {"pmevcntr11_el0", PMEVCNTR11_EL0},
+  {"pmevcntr12_el0", PMEVCNTR12_EL0},
+  {"pmevcntr13_el0", PMEVCNTR13_EL0},
+  {"pmevcntr14_el0", PMEVCNTR14_EL0},
+  {"pmevcntr15_el0", PMEVCNTR15_EL0},
+  {"pmevcntr16_el0", PMEVCNTR16_EL0},
+  {"pmevcntr17_el0", PMEVCNTR17_EL0},
+  {"pmevcntr18_el0", PMEVCNTR18_EL0},
+  {"pmevcntr19_el0", PMEVCNTR19_EL0},
+  {"pmevcntr20_el0", PMEVCNTR20_EL0},
+  {"pmevcntr21_el0", PMEVCNTR21_EL0},
+  {"pmevcntr22_el0", PMEVCNTR22_EL0},
+  {"pmevcntr23_el0", PMEVCNTR23_EL0},
+  {"pmevcntr24_el0", PMEVCNTR24_EL0},
+  {"pmevcntr25_el0", PMEVCNTR25_EL0},
+  {"pmevcntr26_el0", PMEVCNTR26_EL0},
+  {"pmevcntr27_el0", PMEVCNTR27_EL0},
+  {"pmevcntr28_el0", PMEVCNTR28_EL0},
+  {"pmevcntr29_el0", PMEVCNTR29_EL0},
+  {"pmevcntr30_el0", PMEVCNTR30_EL0},
+  {"pmccfiltr_el0", PMCCFILTR_EL0},
+  {"pmevtyper0_el0", PMEVTYPER0_EL0},
+  {"pmevtyper1_el0", PMEVTYPER1_EL0},
+  {"pmevtyper2_el0", PMEVTYPER2_EL0},
+  {"pmevtyper3_el0", PMEVTYPER3_EL0},
+  {"pmevtyper4_el0", PMEVTYPER4_EL0},
+  {"pmevtyper5_el0", PMEVTYPER5_EL0},
+  {"pmevtyper6_el0", PMEVTYPER6_EL0},
+  {"pmevtyper7_el0", PMEVTYPER7_EL0},
+  {"pmevtyper8_el0", PMEVTYPER8_EL0},
+  {"pmevtyper9_el0", PMEVTYPER9_EL0},
+  {"pmevtyper10_el0", PMEVTYPER10_EL0},
+  {"pmevtyper11_el0", PMEVTYPER11_EL0},
+  {"pmevtyper12_el0", PMEVTYPER12_EL0},
+  {"pmevtyper13_el0", PMEVTYPER13_EL0},
+  {"pmevtyper14_el0", PMEVTYPER14_EL0},
+  {"pmevtyper15_el0", PMEVTYPER15_EL0},
+  {"pmevtyper16_el0", PMEVTYPER16_EL0},
+  {"pmevtyper17_el0", PMEVTYPER17_EL0},
+  {"pmevtyper18_el0", PMEVTYPER18_EL0},
+  {"pmevtyper19_el0", PMEVTYPER19_EL0},
+  {"pmevtyper20_el0", PMEVTYPER20_EL0},
+  {"pmevtyper21_el0", PMEVTYPER21_EL0},
+  {"pmevtyper22_el0", PMEVTYPER22_EL0},
+  {"pmevtyper23_el0", PMEVTYPER23_EL0},
+  {"pmevtyper24_el0", PMEVTYPER24_EL0},
+  {"pmevtyper25_el0", PMEVTYPER25_EL0},
+  {"pmevtyper26_el0", PMEVTYPER26_EL0},
+  {"pmevtyper27_el0", PMEVTYPER27_EL0},
+  {"pmevtyper28_el0", PMEVTYPER28_EL0},
+  {"pmevtyper29_el0", PMEVTYPER29_EL0},
+  {"pmevtyper30_el0", PMEVTYPER30_EL0},
+};
+
+uint32_t
+A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+  // First search the registers shared by all
+  std::string NameLower = Name.lower();
+  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+    if (SysRegPairs[i].Name == NameLower) {
+      Valid = true;
+      return SysRegPairs[i].Value;
+    }
+  }
+
+  // Now try the instruction-specific registers (either read-only or
+  // write-only).
+  for (unsigned i = 0; i < NumInstPairs; ++i) {
+    if (InstPairs[i].Name == NameLower) {
+      Valid = true;
+      return InstPairs[i].Value;
+    }
+  }
+
+  // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
+  // are: 11 xxx 1x11 xxxx xxx
+  Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+
+  SmallVector<StringRef, 4> Ops;
+  if (!GenericRegPattern.match(NameLower, &Ops)) {
+    Valid = false;
+    return -1;
+  }
+
+  uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+  uint32_t Bits;
+  Ops[1].getAsInteger(10, Op1);
+  Ops[2].getAsInteger(10, CRn);
+  Ops[3].getAsInteger(10, CRm);
+  Ops[4].getAsInteger(10, Op2);
+  Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
+
+  Valid = true;
+  return Bits;
+}
+
+std::string
+A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+    if (SysRegPairs[i].Value == Bits) {
+      Valid = true;
+      return SysRegPairs[i].Name;
+    }
+  }
+
+  for (unsigned i = 0; i < NumInstPairs; ++i) {
+    if (InstPairs[i].Value == Bits) {
+      Valid = true;
+      return InstPairs[i].Name;
+    }
+  }
+
+  uint32_t Op0 = (Bits >> 14) & 0x3;
+  uint32_t Op1 = (Bits >> 11) & 0x7;
+  uint32_t CRn = (Bits >> 7) & 0xf;
+  uint32_t CRm = (Bits >> 3) & 0xf;
+  uint32_t Op2 = Bits & 0x7;
+
+  // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
+  // name.
+  if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
+      Valid = false;
+      return "";
+  }
+
+  assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
+
+  Valid = true;
+  return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+               + "_c" + utostr(CRm) + "_" + utostr(Op2);
+}
+
+const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+  {"ipas2e1is", IPAS2E1IS},
+  {"ipas2le1is", IPAS2LE1IS},
+  {"vmalle1is", VMALLE1IS},
+  {"alle2is", ALLE2IS},
+  {"alle3is", ALLE3IS},
+  {"vae1is", VAE1IS},
+  {"vae2is", VAE2IS},
+  {"vae3is", VAE3IS},
+  {"aside1is", ASIDE1IS},
+  {"vaae1is", VAAE1IS},
+  {"alle1is", ALLE1IS},
+  {"vale1is", VALE1IS},
+  {"vale2is", VALE2IS},
+  {"vale3is", VALE3IS},
+  {"vmalls12e1is", VMALLS12E1IS},
+  {"vaale1is", VAALE1IS},
+  {"ipas2e1", IPAS2E1},
+  {"ipas2le1", IPAS2LE1},
+  {"vmalle1", VMALLE1},
+  {"alle2", ALLE2},
+  {"alle3", ALLE3},
+  {"vae1", VAE1},
+  {"vae2", VAE2},
+  {"vae3", VAE3},
+  {"aside1", ASIDE1},
+  {"vaae1", VAAE1},
+  {"alle1", ALLE1},
+  {"vale1", VALE1},
+  {"vale2", VALE2},
+  {"vale3", VALE3},
+  {"vmalls12e1", VMALLS12E1},
+  {"vaale1", VAALE1}
+};
+
+A64TLBI::TLBIMapper::TLBIMapper()
+  : NamedImmMapper(TLBIPairs, 0) {}
+
+bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
+  const fltSemantics &Sem = Val.getSemantics();
+  unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
+
+  uint32_t ExpMask;
+  switch (FracBits) {
+  case 10: // IEEE half-precision
+    ExpMask = 0x1f;
+    break;
+  case 23: // IEEE single-precision
+    ExpMask = 0xff;
+    break;
+  case 52: // IEEE double-precision
+    ExpMask = 0x7ff;
+    break;
+  case 112: // IEEE quad-precision
+    // No immediates are valid for double precision.
+    return false;
+  default:
+    llvm_unreachable("Only half, single and double precision supported");
+  }
+
+  uint32_t ExpStart = FracBits;
+  uint64_t FracMask = (1ULL << FracBits) - 1;
+
+  uint32_t Sign = Val.isNegative();
+
+  uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
+  uint64_t Fraction = Bits & FracMask;
+  int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
+  Exponent -= ExpMask >> 1;
+
+  // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
+  // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
+  // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
+  uint64_t A64FracStart = FracBits - 4;
+  uint64_t A64FracMask = 0xf;
+
+  // Are there too many fraction bits?
+  if (Fraction & ~(A64FracMask << A64FracStart))
+    return false;
+
+  if (Exponent < -3 || Exponent > 4)
+    return false;
+
+  uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
+  uint32_t PackedExp = (Exponent + 7) & 0x7;
+
+  Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
+  return true;
+}
+
+// Encoding of the immediate for logical (immediate) instructions:
+//
+// | N | imms   | immr   | size | R            | S            |
+// |---+--------+--------+------+--------------+--------------|
+// | 1 | ssssss | rrrrrr |   64 | UInt(rrrrrr) | UInt(ssssss) |
+// | 0 | 0sssss | xrrrrr |   32 | UInt(rrrrr)  | UInt(sssss)  |
+// | 0 | 10ssss | xxrrrr |   16 | UInt(rrrr)   | UInt(ssss)   |
+// | 0 | 110sss | xxxrrr |    8 | UInt(rrr)    | UInt(sss)    |
+// | 0 | 1110ss | xxxxrr |    4 | UInt(rr)     | UInt(ss)     |
+// | 0 | 11110s | xxxxxr |    2 | UInt(r)      | UInt(s)      |
+// | 0 | 11111x | -      |      | UNALLOCATED  |              |
+//
+// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
+// which the lower S+1 bits are ones and the remaining bits are zero, then
+// rotated right by R bits, which is then replicated across the datapath.
+//
+// + Values of 'N', 'imms' and 'immr' which do not match the above table are
+//   RESERVED.
+// + If all 's' bits in the imms field are set then the instruction is
+//   RESERVED.
+// + The 'x' bits in the 'immr' field are IGNORED.
+
+bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
+  int RepeatWidth;
+  int Rotation = 0;
+  int Num1s = 0;
+
+  // Because there are S+1 ones in the replicated mask, an immediate of all
+  // zeros is not allowed. Filtering it here is probably more efficient.
+  if (Imm == 0) return false;
+
+  for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
+    uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
+    uint64_t ReplicatedMask = Imm & RepeatMask;
+
+    if (ReplicatedMask == 0) continue;
+
+    // First we have to make sure the mask is actually repeated in each slot for
+    // this width-specifier.
+    bool IsReplicatedMask = true;
+    for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
+      if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
+        IsReplicatedMask = false;
+        break;
+      }
+    }
+    if (!IsReplicatedMask) continue;
+
+    // Now we have to work out the amount of rotation needed. The first part of
+    // this calculation is actually independent of RepeatWidth, but the complex
+    // case will depend on it.
+    Rotation = CountTrailingZeros_64(Imm);
+    if (Rotation == 0) {
+      // There were no leading zeros, which means it's either in place or there
+      // are 1s at each end (e.g. 0x8003 needs rotating).
+      Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
+                                : CountLeadingOnes_32(Imm);
+      Rotation = RepeatWidth - Rotation;
+    }
+
+    uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
+      | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+    // Of course, they may not actually be ones, so we have to check that:
+    if (!isMask_64(ReplicatedOnes))
+      continue;
+
+    Num1s = CountTrailingOnes_64(ReplicatedOnes);
+
+    // We know we've got an almost valid encoding (certainly, if this is invalid
+    // no other parameters would work).
+    break;
+  }
+
+  // The encodings which would produce all 1s are RESERVED.
+  if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
+
+  uint32_t N = RepeatWidth == 64;
+  uint32_t ImmR = RepeatWidth - Rotation;
+  uint32_t ImmS = Num1s - 1;
+
+  switch (RepeatWidth) {
+  default: break; // No action required for other valid rotations.
+  case 16: ImmS |= 0x20; break; // 10ssss
+  case 8: ImmS |= 0x30; break;  // 110sss
+  case 4: ImmS |= 0x38; break;  // 1110ss
+  case 2: ImmS |= 0x3c; break;  // 11110s
+  }
+
+  Bits = ImmS | (ImmR << 6) | (N << 12);
+
+  return true;
+}
+
+
+bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm) {
+  uint32_t N = Bits >> 12;
+  uint32_t ImmR = (Bits >> 6) & 0x3f;
+  uint32_t ImmS = Bits & 0x3f;
+
+  // N=1 encodes a 64-bit replication and is invalid for the 32-bit
+  // instructions.
+  if (RegWidth == 32 && N != 0) return false;
+
+  int Width = 0;
+  if (N == 1)
+    Width = 64;
+  else if ((ImmS & 0x20) == 0)
+    Width = 32;
+  else if ((ImmS & 0x10) == 0)
+    Width = 16;
+  else if ((ImmS & 0x08) == 0)
+    Width = 8;
+  else if ((ImmS & 0x04) == 0)
+    Width = 4;
+  else if ((ImmS & 0x02) == 0)
+    Width = 2;
+  else {
+    // ImmS  is 0b11111x: UNALLOCATED
+    return false;
+  }
+
+  int Num1s = (ImmS & (Width - 1)) + 1;
+
+  // All encodings which would map to -1 (signed) are RESERVED.
+  if (Num1s == Width) return false;
+
+  int Rotation = (ImmR & (Width - 1));
+  uint64_t Mask = (1ULL << Num1s) - 1;
+  uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
+  Mask = (Mask >> Rotation)
+    | ((Mask << (Width - Rotation)) & WidthMask);
+
+  Imm = 0;
+  for (unsigned i = 0; i < RegWidth / Width; ++i) {
+    Imm |= Mask;
+    Mask <<= Width;
+  }
+
+  return true;
+}
+
+bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+  // If high bits are set then a 32-bit MOVZ can't possibly work.
+  if (RegWidth == 32 && (Value & ~0xffffffffULL))
+    return false;
+
+  for (int i = 0; i < RegWidth; i += 16) {
+    // If the value is 0 when we mask out all the bits that could be set with
+    // the current LSL value then it's representable.
+    if ((Value & ~(0xffffULL << i)) == 0) {
+      Shift = i / 16;
+      UImm16 = (Value >> i) & 0xffff;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+  // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
+
+  // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
+  // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
+  // a valid input for isMOVZImm.
+  if (RegWidth == 32 && (Value & ~0xffffffffULL))
+    return false;
+
+  uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
+
+  return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
+}
+
+bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
+                            int &UImm16, int &Shift) {
+  if (isMOVZImm(RegWidth, Value, UImm16, Shift))
+    return false;
+
+  return isMOVNImm(RegWidth, Value, UImm16, Shift);
+}
+
+MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
+                                                          StringRef CPU,
+                                                          StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitAArch64MCSubtargetInfo(X, TT, CPU, "");
+  return X;
+}
+
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAArch64MCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAArch64MCRegisterInfo(X, AArch64::X30);
+  return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(AArch64::XSP, 0);
+  MAI->addInitialFrameState(0, Dst, Src);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
+    // On ELF platforms the default static relocation model has a smart enough
+    // linker to cope with referencing external symbols defined in a shared
+    // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+    RM = Reloc::Static;
+  }
+
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Small;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+                                    MCContext &Ctx, MCAsmBackend &MAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  if (SyntaxVariant == 0)
+    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+  return 0;
+}
+
+namespace {
+
+class AArch64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+  AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+  virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+    if (Inst.getOpcode() == AArch64::Bcc
+        && Inst.getOperand(0).getImm() == A64CC::AL)
+      return true;
+    return MCInstrAnalysis::isUnconditionalBranch(Inst);
+  }
+
+  virtual bool isConditionalBranch(const MCInst &Inst) const {
+    if (Inst.getOpcode() == AArch64::Bcc
+        && Inst.getOperand(0).getImm() == A64CC::AL)
+      return false;
+    return MCInstrAnalysis::isConditionalBranch(Inst);
+  }
+
+  uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                          uint64_t Size) const {
+    unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
+    // FIXME: We only handle PCRel branches for now.
+    if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
+        != MCOI::OPERAND_PCREL)
+      return -1ULL;
+
+    int64_t Imm = Inst.getOperand(LblOperand).getImm();
+
+    return Addr + Imm;
+  }
+};
+
+}
+
+static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
+  return new AArch64MCInstrAnalysis(Info);
+}
+
+
+
+extern "C" void LLVMInitializeAArch64TargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target,
+                                        createAArch64MCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheAArch64Target,
+                                      createAArch64MCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheAArch64Target,
+                                    createAArch64MCRegisterInfo);
+
+  // Register the MC subtarget info.
+  using AArch64_MC::createAArch64MCSubtargetInfo;
+  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target,
+                                          createAArch64MCSubtargetInfo);
+
+  // Register the MC instruction analyzer.
+  TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target,
+                                          createAArch64MCInstrAnalysis);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target,
+                                        createAArch64MCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterMCAsmBackend(TheAArch64Target,
+                                       createAArch64AsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target,
+                                           createMCStreamer);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheAArch64Target,
+                                        createAArch64MCInstPrinter);
+}
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@ -0,0 +1,65 @@
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCTARGETDESC_H
+#define LLVM_AARCH64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheAArch64Target;
+
+namespace AArch64_MC {
+  MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                StringRef FS);
+}
+
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
+                                          const MCSubtargetInfo &STI,
+                                          MCContext &Ctx);
+
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
+                                             uint8_t OSABI);
+
+MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
+                                      StringRef CPU);
+
+} // End llvm namespace
+
+// Defines symbolic names for AArch64 registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AArch64GenRegisterInfo.inc"
+
+// Defines symbolic names for the AArch64 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AArch64GenSubtargetInfo.inc"
+
+#endif
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@ -0,0 +1,13 @@
+add_llvm_library(LLVMAArch64Desc
+  AArch64AsmBackend.cpp
+  AArch64ELFObjectWriter.cpp
+  AArch64ELFStreamer.cpp
+  AArch64MCAsmInfo.cpp
+  AArch64MCCodeEmitter.cpp
+  AArch64MCExpr.cpp
+  AArch64MCTargetDesc.cpp
+  )
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
--- a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Desc
+parent = AArch64
+required_libraries = AArch64AsmPrinter MC Support
+add_to_library_groups = AArch64
+
--- a/lib/Target/AArch64/MCTargetDesc/Makefile
+++ b/lib/Target/AArch64/MCTargetDesc/Makefile
@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- a/lib/Target/AArch64/Makefile
+++ b/lib/Target/AArch64/Makefile
@ -0,0 +1,30 @@
+##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAArch64CodeGen
+TARGET = AArch64
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AArch64GenAsmMatcher.inc \
+   AArch64GenAsmWriter.inc \
+   AArch64GenCallingConv.inc \
+   AArch64GenDAGISel.inc \
+   AArch64GenDisassemblerTables.inc \
+   AArch64GenInstrInfo.inc \
+   AArch64GenMCCodeEmitter.inc \
+   AArch64GenMCPseudoLowering.inc \
+   AArch64GenRegisterInfo.inc \
+   AArch64GenSubtargetInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
+
+
--- a/lib/Target/AArch64/README.txt
+++ b/lib/Target/AArch64/README.txt
@ -0,0 +1,2 @@
+This file will contain changes that need to be made before AArch64 can become an
+officially supported target. Currently a placeholder.
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@ -0,0 +1,20 @@
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheAArch64Target;
+
+extern "C" void LLVMInitializeAArch64TargetInfo() {
+  RegisterTarget<Triple::aarch64>
+    X(TheAArch64Target, "aarch64", "AArch64");
+}
--- a/lib/Target/AArch64/TargetInfo/CMakeLists.txt
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Info
+  AArch64TargetInfo.cpp
+  )
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
--- a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Info
+parent = AArch64
+required_libraries = MC Support Target
+add_to_library_groups = AArch64
+
--- a/lib/Target/AArch64/TargetInfo/Makefile
+++ b/lib/Target/AArch64/TargetInfo/Makefile
@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@ -64,6 +64,9 @@ public:
    return getSubExpr()->FindAssociatedSection();
  }

+  // There are no TLS ARMMCExprs at the moment.
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
  static bool classof(const MCExpr *E) {
    return E->getKind() == MCExpr::Target;
  }
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;

 [common]
-subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
+subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore

 ; This is a special group whose required libraries are extended (by llvm-build)
 ; with the best execution engine (the native JIT, if available, or the
--- a/projects/sample/autoconf/config.sub
+++ b/projects/sample/autoconf/config.sub
@ -251,7 +251,8 @@ case $basic_machine in
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+   | be32 | be64 \
+   | aarch64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@ -359,6 +360,7 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+   | aarch64-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@ -304,6 +304,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
  mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
  mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
  xcore-*)                llvm_cv_target_arch="XCore" ;;
@ -474,6 +475,7 @@ else
    PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
    x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
    ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
    Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
    XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
    MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@ -596,7 +598,7 @@ if test "$enableval" = host-only ; then
  enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
  *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
      case "$a_target" in
        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -604,6 +606,7 @@ case "$enableval" in
        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
@ -617,6 +620,7 @@ case "$enableval" in
            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
            MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
--- a/projects/sample/configure
+++ b/projects/sample/configure
@ -3844,6 +3844,7 @@ else
  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
  mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
  mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
  xcore-*)                llvm_cv_target_arch="XCore" ;;
@ -5100,6 +5101,8 @@ else
    x86_64)      TARGET_HAS_JIT=1
 ;;
    ARM)         TARGET_HAS_JIT=1
+ ;;
+    AArch64)     TARGET_HAS_JIT=0
 ;;
    Mips)        TARGET_HAS_JIT=1
 ;;
@ -5297,7 +5300,7 @@ if test "$enableval" = host-only ; then
  enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
  *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
      case "$a_target" in
        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5305,6 +5308,7 @@ case "$enableval" in
        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
@ -5318,6 +5322,7 @@ case "$enableval" in
            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
            MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
@ -10348,7 +10353,7 @@ else
  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
  lt_status=$lt_dlunknown
  cat > conftest.$ac_ext <<EOF
-#line 10351 "configure"
+#line 10356 "configure"
 #include "confdefs.h"

 #if HAVE_DLFCN_H
--- a/test/CodeGen/AArch64/adc.ll
+++ b/test/CodeGen/AArch64/adc.ll
@ -0,0 +1,54 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
+; CHECK: test_simple:
+
+  %valadd = add i128 %a, %b
+; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2
+; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3
+
+  %valsub = sub i128 %valadd, %c
+; CHECK: subs x0, [[ADDLO]], x4
+; CHECK: sbcs x1, [[ADDHI]], x5
+
+  ret i128 %valsub
+; CHECK: ret
+}
+
+define i128 @test_imm(i128 %a) {
+; CHECK: test_imm:
+
+  %val = add i128 %a, 12
+; CHECK: adds x0, x0, #12
+; CHECK: adcs x1, x1, {{x[0-9]|xzr}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_shifted(i128 %a, i128 %b) {
+; CHECK: test_shifted:
+
+  %rhs = shl i128 %b, 45
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, x2, lsl #45
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_extended(i128 %a, i16 %b) {
+; CHECK: test_extended:
+
+  %ext = sext i16 %b to i128
+  %rhs = shl i128 %ext, 3
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, w2, sxth #3
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
--- a/test/CodeGen/AArch64/addsub-shifted.ll
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@ -0,0 +1,295 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsl_arith:
+
+  %rhs1 = load volatile i32* @var32
+  %shift1 = shl i32 %rhs1, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
+
+  %rhs2 = load volatile i32* @var32
+  %shift2 = shl i32 %rhs2, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+
+  %rhs3 = load volatile i32* @var32
+  %shift3 = shl i32 %rhs3, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs4 = load volatile i32* @var32
+  %shift4 = shl i32 %rhs4, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
+
+  %lhs4a = load volatile i32* @var32
+  %shift4a = shl i32 %lhs4a, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15
+
+  %rhs5 = load volatile i64* @var64
+  %shift5 = shl i64 %rhs5, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
+
+  %rhs6 = load volatile i64* @var64
+  %shift6 = shl i64 %rhs6, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
+
+  %rhs7 = load volatile i64* @var64
+  %shift7 = shl i64 %rhs7, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs8 = load volatile i64* @var64
+  %shift8 = shl i64 %rhs8, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
+
+  %lhs8a = load volatile i64* @var64
+  %shift8a = shl i64 %lhs8a, 60
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsr_arith:
+
+  %shift1 = lshr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18
+
+  %shift2 = lshr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31
+
+  %shift3 = lshr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift4 = lshr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19
+
+  %shift4a = lshr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15
+
+  %shift5 = lshr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18
+
+  %shift6 = lshr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31
+
+  %shift7 = lshr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift8 = lshr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19
+
+  %shift8a = lshr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_asr_arith:
+
+  %shift1 = ashr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18
+
+  %shift2 = ashr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31
+
+  %shift3 = ashr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift4 = ashr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19
+
+  %shift4a = ashr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15
+
+  %shift5 = ashr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18
+
+  %shift6 = ashr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31
+
+  %shift7 = ashr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift8 = ashr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19
+
+  %shift8a = ashr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45
+
+  ret void
+; CHECK: ret
+}
+
+define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmp:
+
+  %shift1 = shl i32 %rhs32, 13
+  %tst1 = icmp uge i32 %lhs32, %shift1
+  br i1 %tst1, label %t2, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %tst2 = icmp ne i32 %lhs32, %shift2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %tst3 = icmp ne i32 %lhs32, %shift3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %tst4 = icmp uge i64 %lhs64, %shift4
+  br i1 %tst4, label %t5, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %tst5 = icmp ne i64 %lhs64, %shift5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %tst6 = icmp ne i64 %lhs64, %shift6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
+define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmn:
+
+  %shift1 = shl i32 %rhs32, 13
+  %val1 = sub i32 0, %shift1
+  %tst1 = icmp uge i32 %lhs32, %val1
+  br i1 %tst1, label %t2, label %end
+  ; Important that this isn't lowered to a cmn instruction because if %rhs32 ==
+  ; 0 then the results will differ.
+; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13
+; CHECK: cmp {{w[0-9]+}}, [[RHS]]
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %val2 = sub i32 0, %shift2
+  %tst2 = icmp ne i32 %lhs32, %val2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %val3 = sub i32 0, %shift3
+  %tst3 = icmp eq i32 %lhs32, %val3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %val4 = sub i64 0, %shift4
+  %tst4 = icmp slt i64 %lhs64, %val4
+  br i1 %tst4, label %t5, label %end
+  ; Again, it's important that cmn isn't used here in case %rhs64 == 0.
+; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43
+; CHECK: cmp {{x[0-9]+}}, [[RHS]]
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %val5 = sub i64 0, %shift5
+  %tst5 = icmp ne i64 %lhs64, %val5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %val6 = sub i64 0, %shift6
+  %tst6 = icmp ne i64 %lhs64, %val6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@ -0,0 +1,127 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+; Note that this should be refactored (for efficiency if nothing else)
+; when the PCS is implemented so we don't have to worry about the
+; loads and stores.
+
+@var_i32 = global i32 42
+@var_i64 = global i64 0
+
+; Add pure 12-bit immediates:
+define void @add_small() {
+; CHECK: add_small:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Add 12-bit immediates, shifted left by 12 bits
+define void @add_med() {
+; CHECK: add_med:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates
+define void @sub_small() {
+; CHECK: sub_small:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates, shifted left by 12 bits
+define void @sub_med() {
+; CHECK: sub_med:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+define void @testing() {
+; CHECK: testing:
+  %val = load i32* @var_i32
+
+; CHECK: cmp {{w[0-9]+}}, #4095
+; CHECK: b.ne .LBB4_6
+  %cmp_pos_small = icmp ne i32 %val, 4095
+  br i1 %cmp_pos_small, label %ret, label %test2
+
+test2:
+; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12
+; CHECK: b.lo .LBB4_6
+  %newval2 = add i32 %val, 1
+  store i32 %newval2, i32* @var_i32
+  %cmp_pos_big = icmp ult i32 %val, 14610432
+  br i1 %cmp_pos_big, label %ret, label %test3
+
+test3:
+; CHECK: cmp {{w[0-9]+}}, #123
+; CHECK: b.lt .LBB4_6
+  %newval3 = add i32 %val, 2
+  store i32 %newval3, i32* @var_i32
+  %cmp_pos_slt = icmp slt i32 %val, 123
+  br i1 %cmp_pos_slt, label %ret, label %test4
+
+test4:
+; CHECK: cmp {{w[0-9]+}}, #321
+; CHECK: b.gt .LBB4_6
+  %newval4 = add i32 %val, 3
+  store i32 %newval4, i32* @var_i32
+  %cmp_pos_sgt = icmp sgt i32 %val, 321
+  br i1 %cmp_pos_sgt, label %ret, label %test5
+
+test5:
+; CHECK: cmn {{w[0-9]+}}, #444
+; CHECK: b.gt .LBB4_6
+  %newval5 = add i32 %val, 4
+  store i32 %newval5, i32* @var_i32
+  %cmp_neg_uge = icmp sgt i32 %val, -444
+  br i1 %cmp_neg_uge, label %ret, label %test6
+
+test6:
+  %newval6 = add i32 %val, 5
+  store i32 %newval6, i32* @var_i32
+  ret void
+
+ret:
+  ret void
+}
+; TODO: adds/subs
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@ -0,0 +1,189 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @addsub_i8rhs() {
+; CHECK: addsub_i8rhs:
+    %val8_tmp = load i8* @var8
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i8 operand.
+    %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i8 %val8 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i8 %val8 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i8 %val8 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i8 %val8 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+test2:
+    %cmp_sext = sext i8 %val8 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+define void @addsub_i16rhs() {
+; CHECK: addsub_i16rhs:
+    %val16_tmp = load i16* @var16
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i16 operand.
+    %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i16 %val16 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i16 %val16 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i16 %val16 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i16 %val16 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+test2:
+    %cmp_sext = sext i16 %val16 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
+; example), but the remaining instructions are probably not idiomatic
+; in the face of "add/sub (shifted register)" so I don't intend to.
+define void @addsub_i32rhs() {
+; CHECK: addsub_i32rhs:
+    %val32_tmp = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    %val32 = add i32 %val32_tmp, 123
+
+    %rhs64_zext = zext i32 %val32 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+    store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+    %rhs64_sext = sext i32 %val32 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+    %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+    store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+    ret void
+}
--- a/test/CodeGen/AArch64/adrp-relocation.ll
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@ -0,0 +1,35 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+
+define fp128 @testfn() nounwind {
+entry:
+  ret fp128 0xL00000000000000004004500000000000
+}
+
+define fp128 @foo() nounwind {
+entry:
+  %bar = alloca fp128 ()*, align 8
+  store fp128 ()* @testfn, fp128 ()** %bar, align 8
+  %call = call fp128 @testfn()
+  ret fp128 %call
+}
+
+; The above should produce an ADRP/ADD pair to calculate the address of
+; testfn. The important point is that LLVM shouldn't think it can deal with the
+; relocation on the ADRP itself (even though it knows everything about the
+; relative offsets of testfn and foo) because its value depends on where this
+; object file's .text section gets relocated in memory.
+
+; CHECK: .rela.text
+
+; CHECK: # Relocation 0
+; CHECK-NEXT: (('r_offset', 0x0000000000000028)
+; CHECK-NEXT:  ('r_sym', 0x00000009)
+; CHECK-NEXT:  ('r_type', 0x00000113)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
+; CHECK-NEXT:  Relocation 1
+; CHECK-NEXT: (('r_offset', 0x000000000000002c)
+; CHECK-NEXT:  ('r_sym', 0x00000009)
+; CHECK-NEXT:  ('r_type', 0x00000115)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@ -0,0 +1,134 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+declare void @use_addr(i8*)
+
+define void @test_simple_alloca(i64 %n) {
+; CHECK: test_simple_alloca:
+
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  call void @use_addr(i8* %buf)
+; CHECK: bl use_addr
+
+  ret void
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+declare void @use_addr_loc(i8*, i64*)
+
+define i64 @test_alloca_with_local(i64 %n) {
+; CHECK: test_alloca_with_local:
+; CHECK: sub sp, sp, #32
+; CHECK: stp x29, x30, [sp, #16]
+
+  %loc = alloca i64
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  ; Obviously suboptimal code here, but it to get &local in x1
+; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]]
+; CHECK: add x1, [[TMP]], #0
+
+  call void @use_addr_loc(i8* %buf, i64* %loc)
+; CHECK: bl use_addr
+
+  %val = load i64* %loc
+; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]]
+; CHECK: ldr x0, [x[[TMP]]]
+
+  ret i64 %val
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+define void @test_variadic_alloca(i64 %n, ...) {
+; CHECK: test_variadic_alloca:
+
+; CHECK: sub     sp, sp, #208
+; CHECK: stp     x29, x30, [sp, #192]
+; CHECK: add     x29, sp, #192
+; CHECK: sub     x9, x29, #192
+; CHECK: add     x8, x9, #0
+; CHECK: str     q7, [x8, #112]
+; [...]
+; CHECK: str     q1, [x8, #16]
+
+  %addr = alloca i8, i64 %n
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  ret void
+; CHECK: sub sp, x29, #192
+; CHECK: ldp x29, x30, [sp, #192]
+; CHECK: add sp, sp, #208
+}
+
+define void @test_alloca_large_frame(i64 %n) {
+; CHECK: test_alloca_large_frame:
+
+; CHECK: sub sp, sp, #496
+; CHECK: stp x29, x30, [sp, #480]
+; CHECK: add x29, sp, #480
+; CHECK: sub sp, sp, #48
+; CHECK: sub sp, sp, #1953, lsl #12
+
+  %addr1 = alloca i8, i64 %n
+  %addr2 = alloca i64, i64 1000000
+
+  call void @use_addr_loc(i8* %addr1, i64* %addr2)
+
+  ret void
+; CHECK: sub sp, x29, #480
+; CHECK: ldp x29, x30, [sp, #480]
+; CHECK: add sp, sp, #496
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test_scoped_alloca(i64 %n) {
+; CHECK: test_scoped_alloca
+; CHECK: sub sp, sp, #32
+
+  %sp = call i8* @llvm.stacksave()
+; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+
+  %addr = alloca i8, i64 %n
+; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
+; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  call void @llvm.stackrestore(i8* %sp)
+; CHECK: mov sp, [[SAVED_SP]]
+
+  ret void
+}
--- a/test/CodeGen/AArch64/analyze-branch.ll
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@ -0,0 +1,231 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; This test checks that LLVM can do basic stripping and reapplying of branches
+; to basic blocks.
+
+declare void @test_true()
+declare void @test_false()
+
+; !0 corresponds to a branch being taken, !1 to not being takne.
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_taken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_nottaken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_taken:
+  %tst = icmp eq i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_nottaken:
+  %tst = icmp eq i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_taken:
+  %tst = icmp ne i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_nottaken:
+  %tst = icmp ne i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp eq i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp eq i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+
+define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp ne i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp ne i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
--- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@ -0,0 +1,24 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+define i32 @foo(i32* %var, i1 %cond) {
+; CHECK: foo:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldxr
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
--- a/test/CodeGen/AArch64/basic-pic.ll
+++ b/test/CodeGen/AArch64/basic-pic.ll
@ -0,0 +1,70 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var = global i32 0
+
+; CHECK-ELF: RELOCATION RECORDS FOR [.text]
+
+define i32 @get_globalvar() {
+; CHECK: get_globalvar:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr w0, [x[[GOTLOC]]]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32 %val
+}
+
+define i32* @get_globalvaraddr() {
+; CHECK: get_globalvaraddr:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32* @var
+}
+
+@hiddenvar = hidden global i32 0
+
+define i32 @get_hiddenvar() {
+; CHECK: get_hiddenvar:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
+; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+  ret i32 %val
+}
+
+define i32* @get_hiddenvaraddr() {
+; CHECK: get_hiddenvaraddr:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
+; CHECK: add x0, [[HI]], #:lo12:hiddenvar
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+  ret i32* @hiddenvar
+}
+
+define void()* @get_func() {
+; CHECK: get_func:
+
+  ret void()* bitcast(void()*()* @get_func to void()*)
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
+
+  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
+  ; it can relax it because it knows where get_func is. It can't!
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
+}
--- a/test/CodeGen/AArch64/bitfield-insert-0.ll
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@ -0,0 +1,19 @@
+; RUN: llc -march=aarch64 -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s
+
+; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
+; point, in the edge case where lsb = 0. Just make sure.
+
+define void @test_bfi0(i32* %existing, i32* %new) {
+; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
+
+  %newval = load volatile i32* %new
+  %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
+
+  %combined = or i32 %newval_masked, %oldval_keep
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@ -0,0 +1,193 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; First, a simple example from Clang. The registers could plausibly be
+; different, but probably won't be.
+
+%struct.foo = type { i8, [2 x i8], i8 }
+
+define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
+; CHECK: from_clang:
+; CHECK: bfi w0, w1, #3, #4
+; CHECK-NEXT: ret
+
+entry:
+  %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
+  %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
+  %bf.value = shl i32 %n, 3
+  %0 = and i32 %bf.value, 120
+  %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
+  %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
+  %f.sroa.0.0.extract.trunc = zext i32 %1 to i64
+  %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
+  %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
+  %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
+  ret [1 x i64] %.fca.0.insert
+}
+
+define void @test_whole32(i32* %existing, i32* %new) {
+; CHECK: test_whole32:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 26
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_whole64(i64* %existing, i64* %new) {
+; CHECK: test_whole64:
+; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 26
+  %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_whole32_from64(i64* %existing, i64* %new) {
+; CHECK: test_whole32_from64:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
+
+  %newval = load volatile i64* %new
+  %newval_masked = and i64 %newval, 65535 ; = 0xffff
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_32bit_masked(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_masked:
+; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_64bit_masked(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_masked:
+; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
+; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 40
+  %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
+
+  %combined = or i64 %newval_masked, %oldval_keep
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
+define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_complexmask:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 647 ; = 0x287
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Neither mask is is a contiguous set of 1s. BFI can't be used
+define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Ditto
+define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 3
+  %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Bitfield insert where there's a left-over shr needed at the beginning
+; (e.g. result of str.bf1 = str.bf2)
+define void @test_32bit_with_shr(i32* %existing, i32* %new) {
+; CHECK: test_32bit_with_shr:
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load i32* %new
+  %newval_shifted = shl i32 %newval, 12
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
+; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5
+
+  ret void
+}
+
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@ -0,0 +1,218 @@
+
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_extendb(i8 %var) {
+; CHECK: test_extendb:
+
+  %sxt32 = sext i8 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i8 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i8 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+
+  %uxt64 = zext i8 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendh(i16 %var) {
+; CHECK: test_extendh:
+
+  %sxt32 = sext i16 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i16 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i16 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+
+  %uxt64 = zext i16 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendw(i32 %var) {
+; CHECK: test_extendw:
+
+  %sxt64 = sext i32 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+
+  %uxt64 = zext i32 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32
+  ret void
+}
+
+define void @test_shifts(i32 %val32, i64 %val64) {
+; CHECK: test_shifts:
+
+  %shift1 = ashr i32 %val32, 31
+  store volatile i32 %shift1, i32* @var32
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift2 = lshr i32 %val32, 8
+  store volatile i32 %shift2, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #8
+
+  %shift3 = shl i32 %val32, 1
+  store volatile i32 %shift3, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #1
+
+  %shift4 = ashr i64 %val64, 31
+  store volatile i64 %shift4, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #31
+
+  %shift5 = lshr i64 %val64, 8
+  store volatile i64 %shift5, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8
+
+  %shift6 = shl i64 %val64, 63
+  store volatile i64 %shift6, i64* @var64
+; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift7 = ashr i64 %val64, 63
+  store volatile i64 %shift7, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift8 = lshr i64 %val64, 63
+  store volatile i64 %shift8, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift9 = lshr i32 %val32, 31
+  store volatile i32 %shift9, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift10 = shl i32 %val32, 31
+  store volatile i32 %shift10, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  ret void
+}
+
+; LLVM can produce in-register extensions taking place entirely with
+; 64-bit registers too.
+define void @test_sext_inreg_64(i64 %in) {
+; CHECK: test_sext_inreg_64:
+
+; i1 doesn't have an official alias, but crops up and is handled by
+; the bitfield ops.
+  %trunc_i1 = trunc i64 %in to i1
+  %sext_i1 = sext i1 %trunc_i1 to i64
+  store volatile i64 %sext_i1, i64* @var64
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  %trunc_i8 = trunc i64 %in to i8
+  %sext_i8 = sext i8 %trunc_i8 to i64
+  store volatile i64 %sext_i8, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i16 = trunc i64 %in to i16
+  %sext_i16 = sext i16 %trunc_i16 to i64
+  store volatile i64 %sext_i16, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i32 = trunc i64 %in to i32
+  %sext_i32 = sext i32 %trunc_i32 to i64
+  store volatile i64 %sext_i32, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+; These instructions don't actually select to official bitfield
+; operations, but it's important that we select them somehow:
+define void @test_zext_inreg_64(i64 %in) {
+; CHECK: test_zext_inreg_64:
+
+  %trunc_i8 = trunc i64 %in to i8
+  %zext_i8 = zext i8 %trunc_i8 to i64
+  store volatile i64 %zext_i8, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+
+  %trunc_i16 = trunc i64 %in to i16
+  %zext_i16 = zext i16 %trunc_i16 to i64
+  store volatile i64 %zext_i16, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+
+  %trunc_i32 = trunc i64 %in to i32
+  %zext_i32 = zext i32 %trunc_i32 to i64
+  store volatile i64 %zext_i32, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffffffff
+
+  ret void
+}
+
+define i64 @test_sext_inreg_from_32(i32 %in) {
+; CHECK: test_sext_inreg_from_32:
+
+  %small = trunc i32 %in to i1
+  %ext = sext i1 %small to i64
+
+  ; Different registers are of course, possible, though suboptimal. This is
+  ; making sure that a 64-bit "(sext_inreg (anyext GPR32), i1)" uses the 64-bit
+  ; sbfx rather than just 32-bits.
+; CHECK: sbfx x0, x0, #0, #1
+  ret i64 %ext
+}
+
+
+define i32 @test_ubfx32(i32* %addr) {
+; CHECK: test_ubfx32:
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
+
+   %fields = load i32* %addr
+   %shifted = lshr i32 %fields, 23
+   %masked = and i32 %shifted, 7
+   ret i32 %masked
+}
+
+define i64 @test_ubfx64(i64* %addr) {
+; CHECK: test_ubfx64:
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
+
+   %fields = load i64* %addr
+   %shifted = lshr i64 %fields, 25
+   %masked = and i64 %shifted, 1023
+   ret i64 %masked
+}
+
+define i32 @test_sbfx32(i32* %addr) {
+; CHECK: test_sbfx32:
+; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
+
+   %fields = load i32* %addr
+   %shifted = shl i32 %fields, 23
+   %extended = ashr i32 %shifted, 29
+   ret i32 %extended
+}
+
+define i64 @test_sbfx64(i64* %addr) {
+; CHECK: test_sbfx64:
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
+
+   %fields = load i64* %addr
+   %shifted = shl i64 %fields, 1
+   %extended = ashr i64 %shifted, 1
+   ret i64 %extended
+}
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@ -0,0 +1,18 @@
+; RUN: llc -march=aarch64 -verify-machineinstrs < %s | FileCheck %s
+
+@addr = global i8* null
+
+define void @test_blockaddress() {
+; CHECK: test_blockaddress:
+  store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
+  %val = load volatile i8** @addr
+  indirectbr i8* %val, [label %block]
+; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
+; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]]
+; CHECK: str [[DEST]],
+; CHECK: ldr [[NEWDEST:x[0-9]+]]
+; CHECK: br [[NEWDEST]]
+
+block:
+  ret void
+}
--- a/test/CodeGen/AArch64/bool-loads.ll
+++ b/test/CodeGen/AArch64/bool-loads.ll
@ -0,0 +1,55 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+@var = global i1 0
+
+define i32 @test_sextloadi32() {
+; CHECK: test_sextloadi32
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i32 %ret
+; CHECK: ret
+}
+
+define i64 @test_sextloadi64() {
+; CHECK: test_sextloadi64
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i64 %ret
+; CHECK: ret
+}
+
+define i32 @test_zextloadi32() {
+; CHECK: test_zextloadi32
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i32 %ret
+; CHECK-NEXT: ret
+}
+
+define i64 @test_zextloadi64() {
+; CHECK: test_zextloadi64
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i64 %ret
+; CHECK-NEXT: ret
+}
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@ -0,0 +1,17 @@
+; RUN: llc -verify-machineinstrs < %s -march=aarch64 | FileCheck %s
+
+@stored_label = global i8* null
+
+define void @foo() {
+; CHECK: foo:
+  %lab = load i8** @stored_label
+  indirectbr i8* %lab, [label  %otherlab, label %retlab]
+; CHECK: adrp {{x[0-9]+}}, stored_label
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label]
+; CHECK: br {{x[0-9]+}}
+
+otherlab:
+  ret void
+retlab:
+  ret void
+}
--- a/Show More
+++ b/Show More