diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 9cb4eafa099..75fb937de9b 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+    "HasZeroCycleZeroingFPWorkaround", "true",
+    "The zero-cycle floating-point zeroing instruction has a bug">;
+
 def FeatureStrictAlign : SubtargetFeature<"strict-align",
                                           "StrictAlign", "true",
                                           "Disallow all unaligned memory "
@@ -290,7 +296,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                    FeaturePerfMon,
                                    FeatureSlowMisaligned128Store,
                                    FeatureZCRegMove,
-                                   FeatureZCZeroing
+                                   FeatureZCZeroing,
+                                   FeatureZCZeroingFPWorkaround
                                    ]>;
 
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 56fcff606aa..67138f41dda 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -523,7 +523,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
 
 void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
   unsigned DestReg = MI.getOperand(0).getReg();
-  if (STI->hasZeroCycleZeroing()) {
+  if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
     // Convert H/S/D register to corresponding Q register
     if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
       DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index a73ba887413..5d9759d363d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -86,6 +86,7 @@ protected:
 
   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
   bool HasZeroCycleZeroing = false;
+  bool HasZeroCycleZeroingFPWorkaround = false;
 
   // StrictAlign - Disallow unaligned memory accesses.
   bool StrictAlign = false;
@@ -197,6 +198,10 @@ public:
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
 
+  bool hasZeroCycleZeroingFPWorkaround() const {
+    return HasZeroCycleZeroingFPWorkaround;
+  }
+
   bool requiresStrictAlign() const { return StrictAlign; }
 
   bool isXRaySupported() const override { return true; }
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2763a5b3a90..fd2ef18fbe0 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3796,6 +3796,31 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       }
     }
   }
+
+  // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing
+  // instruction for FP registers correctly in some rare circumstances. Convert
+  // it to a safe instruction and warn (because silently changing someone's
+  // assembly is rude).
+  if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] &&
+      NumOperands == 4 && Tok == "movi") {
+    AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+    AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+    AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+    if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) ||
+        (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) {
+      StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken();
+      if (Suffix.lower() == ".2d" &&
+          cast<MCConstantExpr>(Op3.getImm())->getValue() == 0) {
+        Warning(IDLoc, "instruction movi.2d with immediate #0 may not function"
+                " correctly on this CPU, converting to equivalent movi.16b");
+        // Switch the suffix to .16b.
+        unsigned Idx = Op1.isToken() ? 1 : 2;
+        Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc,
+                                                  getContext());
+      }
+    }
+  }
+
   // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
   //        InstAlias can't quite handle this since the reg classes aren't
   //        subclasses.
diff --git a/test/CodeGen/AArch64/arm64-fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
index e8b1557bac6..5155d49cc3f 100644
--- a/test/CodeGen/AArch64/arm64-fcmp-opt.ll
+++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
@@ -41,7 +41,7 @@ entry:
 define float @fcmp_oeq(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_oeq
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], eq
 
@@ -53,7 +53,7 @@ define float @fcmp_oeq(float %a, float %b) nounwind ssp {
 define float @fcmp_ogt(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ogt
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], gt
 
@@ -65,7 +65,7 @@ define float @fcmp_ogt(float %a, float %b) nounwind ssp {
 define float @fcmp_oge(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_oge
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ge
 
@@ -77,7 +77,7 @@ define float @fcmp_oge(float %a, float %b) nounwind ssp {
 define float @fcmp_olt(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_olt
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], mi
 
@@ -89,7 +89,7 @@ define float @fcmp_olt(float %a, float %b) nounwind ssp {
 define float @fcmp_ole(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ole
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ls
 
@@ -101,7 +101,7 @@ define float @fcmp_ole(float %a, float %b) nounwind ssp {
 define float @fcmp_ord(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ord
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vc
   %cmp = fcmp ord float %a, %b
@@ -112,7 +112,7 @@ define float @fcmp_ord(float %a, float %b) nounwind ssp {
 define float @fcmp_uno(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_uno
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vs
   %cmp = fcmp uno float %a, %b
@@ -123,7 +123,7 @@ define float @fcmp_uno(float %a, float %b) nounwind ssp {
 define float @fcmp_ugt(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ugt
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], hi
   %cmp = fcmp ugt float %a, %b
@@ -134,7 +134,7 @@ define float @fcmp_ugt(float %a, float %b) nounwind ssp {
 define float @fcmp_uge(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_uge
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], pl
   %cmp = fcmp uge float %a, %b
@@ -145,7 +145,7 @@ define float @fcmp_uge(float %a, float %b) nounwind ssp {
 define float @fcmp_ult(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ult
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], lt
   %cmp = fcmp ult float %a, %b
@@ -156,7 +156,7 @@ define float @fcmp_ult(float %a, float %b) nounwind ssp {
 define float @fcmp_ule(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ule
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], le
   %cmp = fcmp ule float %a, %b
@@ -167,7 +167,7 @@ define float @fcmp_ule(float %a, float %b) nounwind ssp {
 define float @fcmp_une(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_une
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ne
   %cmp = fcmp une float %a, %b
@@ -180,7 +180,7 @@ define float @fcmp_une(float %a, float %b) nounwind ssp {
 define float @fcmp_one(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_one
 ;	fcmp	s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], mi
 ; CHECK: fcsel s0, s[[ONE]], [[TMP]], gt
@@ -194,7 +194,7 @@ define float @fcmp_one(float %a, float %b) nounwind ssp {
 define float @fcmp_ueq(float %a, float %b) nounwind ssp {
 ; CHECK-LABEL: @fcmp_ueq
 ; CHECK: fcmp s0, s1
-; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], eq
 ; CHECK: fcsel s0, s[[ONE]], [[TMP]], vs
diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 412651c5567..453334dce60 100644
--- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -9,10 +9,10 @@ define void @t1() nounwind ssp {
 entry:
 ; ALL-LABEL: t1:
 ; ALL-NOT: fmov
-; CYCLONE: movi.2d v0, #0000000000000000
-; CYCLONE: movi.2d v1, #0000000000000000
-; CYCLONE: movi.2d v2, #0000000000000000
-; CYCLONE: movi.2d v3, #0000000000000000
+; CYCLONE: fmov d0, xzr
+; CYCLONE: fmov d1, xzr
+; CYCLONE: fmov d2, xzr
+; CYCLONE: fmov d3, xzr
 ; KRYO: movi v0.2d, #0000000000000000
 ; KRYO: movi v1.2d, #0000000000000000
 ; KRYO: movi v2.2d, #0000000000000000
@@ -48,8 +48,8 @@ entry:
 define void @t4() nounwind ssp {
 ; ALL-LABEL: t4:
 ; ALL-NOT: fmov
-; CYCLONE: movi.2d v0, #0000000000000000
-; CYCLONE: movi.2d v1, #0000000000000000
+; CYCLONE: fmov s0, wzr
+; CYCLONE: fmov s1, wzr
 ; KRYO: movi v0.2d, #0000000000000000
 ; KRYO: movi v1.2d, #0000000000000000
 ; FALKOR: movi v0.2d, #0000000000000000
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
index 4d9cb21ddc3..f74e9c35094 100644
--- a/test/CodeGen/AArch64/fp-cond-sel.ll
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, float 0.0, float 1.0
   store float %val1, float* @varfloat
-; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK-DAG: fmov s[[FLT0:[0-9]+]], wzr
 ; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0
 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
 
diff --git a/test/MC/AArch64/cyclone-movi-bug.s b/test/MC/AArch64/cyclone-movi-bug.s
new file mode 100644
index 00000000000..d49aea39269
--- /dev/null
+++ b/test/MC/AArch64/cyclone-movi-bug.s
@@ -0,0 +1,9 @@
+; RUN: llvm-mc -triple aarch64-apple-ios -mcpu=cyclone %s 2> %t.log | FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-ERR < %t.log
+
+    ; CHECK: movi v3.16b, #0
+    ; CHECK: movi v7.16b, #0
+    ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b
+    ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b
+    movi.2d v3, #0
+    movi v7.2d, #0