diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 9cb4eafa099..75fb937de9b 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + def FeatureStrictAlign : SubtargetFeature<"strict-align", "StrictAlign", "true", "Disallow all unaligned memory " @@ -290,7 +296,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", FeaturePerfMon, FeatureSlowMisaligned128Store, FeatureZCRegMove, - FeatureZCZeroing + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround ]>; def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 56fcff606aa..67138f41dda 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -523,7 +523,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { unsigned DestReg = MI.getOperand(0).getReg(); - if (STI->hasZeroCycleZeroing()) { + if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) { // Convert H/S/D register to corresponding Q register if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) DestReg = AArch64::Q0 + (DestReg - AArch64::H0); diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index a73ba887413..5d9759d363d 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -86,6 +86,7 @@ protected: // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. bool HasZeroCycleZeroing = false; + bool HasZeroCycleZeroingFPWorkaround = false; // StrictAlign - Disallow unaligned memory accesses. bool StrictAlign = false; @@ -197,6 +198,10 @@ public: bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool hasZeroCycleZeroingFPWorkaround() const { + return HasZeroCycleZeroingFPWorkaround; + } + bool requiresStrictAlign() const { return StrictAlign; } bool isXRaySupported() const override { return true; } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2763a5b3a90..fd2ef18fbe0 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3796,6 +3796,31 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } } } + + // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing + // instruction for FP registers correctly in some rare circumstances. Convert + // it to a safe instruction and warn (because silently changing someone's + // assembly is rude). + if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] && + NumOperands == 4 && Tok == "movi") { + AArch64Operand &Op1 = static_cast(*Operands[1]); + AArch64Operand &Op2 = static_cast(*Operands[2]); + AArch64Operand &Op3 = static_cast(*Operands[3]); + if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) || + (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) { + StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken(); + if (Suffix.lower() == ".2d" && + cast(Op3.getImm())->getValue() == 0) { + Warning(IDLoc, "instruction movi.2d with immediate #0 may not function" + " correctly on this CPU, converting to equivalent movi.16b"); + // Switch the suffix to .16b. + unsigned Idx = Op1.isToken() ? 1 : 2; + Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc, + getContext()); + } + } + } + // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands. // InstAlias can't quite handle this since the reg classes aren't // subclasses. diff --git a/test/CodeGen/AArch64/arm64-fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll index e8b1557bac6..5155d49cc3f 100644 --- a/test/CodeGen/AArch64/arm64-fcmp-opt.ll +++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll @@ -41,7 +41,7 @@ entry: define float @fcmp_oeq(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_oeq ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], eq @@ -53,7 +53,7 @@ define float @fcmp_oeq(float %a, float %b) nounwind ssp { define float @fcmp_ogt(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ogt ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], gt @@ -65,7 +65,7 @@ define float @fcmp_ogt(float %a, float %b) nounwind ssp { define float @fcmp_oge(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_oge ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ge @@ -77,7 +77,7 @@ define float @fcmp_oge(float %a, float %b) nounwind ssp { define float @fcmp_olt(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_olt ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], mi @@ -89,7 +89,7 @@ define float @fcmp_olt(float %a, float %b) nounwind ssp { define float @fcmp_ole(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ole ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ls @@ -101,7 +101,7 @@ define float @fcmp_ole(float %a, float %b) nounwind ssp { define float @fcmp_ord(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ord ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vc %cmp = fcmp ord float %a, %b @@ -112,7 +112,7 @@ define float @fcmp_ord(float %a, float %b) nounwind ssp { define float @fcmp_uno(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_uno ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vs %cmp = fcmp uno float %a, %b @@ -123,7 +123,7 @@ define float @fcmp_uno(float %a, float %b) nounwind ssp { define float @fcmp_ugt(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ugt ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], hi %cmp = fcmp ugt float %a, %b @@ -134,7 +134,7 @@ define float @fcmp_ugt(float %a, float %b) nounwind ssp { define float @fcmp_uge(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_uge ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], pl %cmp = fcmp uge float %a, %b @@ -145,7 +145,7 @@ define float @fcmp_uge(float %a, float %b) nounwind ssp { define float @fcmp_ult(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ult ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], lt %cmp = fcmp ult float %a, %b @@ -156,7 +156,7 @@ define float @fcmp_ult(float %a, float %b) nounwind ssp { define float @fcmp_ule(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ule ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], le %cmp = fcmp ule float %a, %b @@ -167,7 +167,7 @@ define float @fcmp_ule(float %a, float %b) nounwind ssp { define float @fcmp_une(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_une ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ne %cmp = fcmp une float %a, %b @@ -180,7 +180,7 @@ define float @fcmp_une(float %a, float %b) nounwind ssp { define float @fcmp_one(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_one ; fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], mi ; CHECK: fcsel s0, s[[ONE]], [[TMP]], gt @@ -194,7 +194,7 @@ define float @fcmp_one(float %a, float %b) nounwind ssp { define float @fcmp_ueq(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ueq ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], eq ; CHECK: fcsel s0, s[[ONE]], [[TMP]], vs diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index 412651c5567..453334dce60 100644 --- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -9,10 +9,10 @@ define void @t1() nounwind ssp { entry: ; ALL-LABEL: t1: ; ALL-NOT: fmov -; CYCLONE: movi.2d v0, #0000000000000000 -; CYCLONE: movi.2d v1, #0000000000000000 -; CYCLONE: movi.2d v2, #0000000000000000 -; CYCLONE: movi.2d v3, #0000000000000000 +; CYCLONE: fmov d0, xzr +; CYCLONE: fmov d1, xzr +; CYCLONE: fmov d2, xzr +; CYCLONE: fmov d3, xzr ; KRYO: movi v0.2d, #0000000000000000 ; KRYO: movi v1.2d, #0000000000000000 ; KRYO: movi v2.2d, #0000000000000000 @@ -48,8 +48,8 @@ entry: define void @t4() nounwind ssp { ; ALL-LABEL: t4: ; ALL-NOT: fmov -; CYCLONE: movi.2d v0, #0000000000000000 -; CYCLONE: movi.2d v1, #0000000000000000 +; CYCLONE: fmov s0, wzr +; CYCLONE: fmov s1, wzr ; KRYO: movi v0.2d, #0000000000000000 ; KRYO: movi v1.2d, #0000000000000000 ; FALKOR: movi v0.2d, #0000000000000000 diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll index 4d9cb21ddc3..f74e9c35094 100644 --- a/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/test/CodeGen/AArch64/fp-cond-sel.ll @@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst1 = icmp ugt i32 %lhs32, %rhs32 %val1 = select i1 %tst1, float 0.0, float 1.0 store float %val1, float* @varfloat -; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0 +; CHECK-DAG: fmov s[[FLT0:[0-9]+]], wzr ; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi diff --git a/test/MC/AArch64/cyclone-movi-bug.s b/test/MC/AArch64/cyclone-movi-bug.s new file mode 100644 index 00000000000..d49aea39269 --- /dev/null +++ b/test/MC/AArch64/cyclone-movi-bug.s @@ -0,0 +1,9 @@ +; RUN: llvm-mc -triple aarch64-apple-ios -mcpu=cyclone %s 2> %t.log | FileCheck %s +; RUN: FileCheck %s --check-prefix=CHECK-ERR < %t.log + + ; CHECK: movi v3.16b, #0 + ; CHECK: movi v7.16b, #0 + ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b + ; CHECK-ERR: warning: instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to equivalent movi.16b + movi.2d v3, #0 + movi v7.2d, #0