diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 63c2dc4da6c..22ae131273d 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -362,17 +362,30 @@ def FeaturePrefer256Bit : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", "Prefer 256-bit AVX instructions">; -// Enable mitigation of some aspects of speculative execution related -// vulnerabilities by removing speculatable indirect branches. This disables -// jump-table formation, rewrites explicit `indirectbr` instructions into -// `switch` instructions, and uses a special construct called a "retpoline" to -// prevent speculation of the remaining indirect branches (indirect calls and -// tail calls). +// Lower indirect calls using a special construct called a `retpoline` to +// mitigate potential Spectre v2 attacks against them. +def FeatureRetpolineIndirectCalls + : SubtargetFeature< + "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", + "Remove speculation of indirect calls from the generated code.">; + +// Lower indirect branches and switches either using conditional branch trees +// or using a special construct called a `retpoline` to mitigate potential +// Spectre v2 attacks against them. +def FeatureRetpolineIndirectBranches + : SubtargetFeature< + "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", + "Remove speculation of indirect branches from the generated code.">; + +// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and +// `retpoline-indirect-branches` above. def FeatureRetpoline - : SubtargetFeature<"retpoline", "UseRetpoline", "true", + : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", "Remove speculation of indirect branches from the " "generated code, either by avoiding them entirely or " - "lowering them with a speculation blocking construct.">; + "lowering them with a speculation blocking construct.", + [FeatureRetpolineIndirectCalls, + FeatureRetpolineIndirectBranches]>; // Rely on external thunks for the emitted retpoline calls. This allows users // to provide their own custom thunk definitions in highly specialized @@ -380,8 +393,10 @@ def FeatureRetpoline def FeatureRetpolineExternalThunk : SubtargetFeature< "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", - "Enable retpoline, but with an externally provided thunk.", - [FeatureRetpoline]>; + "When lowering an indirect call or branch using a `retpoline`, rely " + "on the specified user provided thunk rather than emitting one " + "ourselves. Only has effect when combined with some other retpoline " + "feature.", [FeatureRetpolineIndirectCalls]>; // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index d082b42eefa..d085eb69421 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3222,8 +3222,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; - // Functions using retpoline should use SDISel for calls. - if (Subtarget->useRetpoline()) + // Functions using retpoline for indirect calls need to use SDISel. + if (Subtarget->useRetpolineIndirectCalls()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index e207c343fac..46e30d786f0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -765,7 +765,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; // FIXME: Add retpoline support and remove this. - if (Is64Bit && IsLargeCodeModel && STI.useRetpoline()) + if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) report_fatal_error("Emitting stack probe calls on 64-bit with the large " "code model and retpoline not yet implemented."); @@ -2437,7 +2437,7 @@ void X86FrameLowering::adjustForSegmentedStacks( // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. // FIXME: Add retpoline support and remove the error here.. - if (STI.useRetpoline()) + if (STI.useRetpolineIndirectCalls()) report_fatal_error("Emitting morestack calls on 64-bit with the large " "code model and retpoline not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 80d8183346b..7236fdeb8be 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -725,7 +725,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. - !Subtarget->useRetpoline() && + !Subtarget->useRetpolineIndirectCalls() && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && (Subtarget->is64Bit() || diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0c5e4df2d3c..2ffc9566ed0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -26649,7 +26649,7 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef Mask, bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { // If the subtarget is using retpolines, we need to not generate jump tables. - if (Subtarget.useRetpoline()) + if (Subtarget.useRetpolineIndirectBranches()) return false; // Otherwise, fallback on the generic logic. diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 7c01714b6a0..0ff73101dcc 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1095,14 +1095,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, NotUseRetpoline]>; + Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>; + Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1114,21 +1114,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpoline]>; + Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpoline]>; + Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, UseRetpoline]>; + Requires<[In64BitMode, UseRetpolineIndirectCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, UseRetpoline]>; + Requires<[Not64BitMode, UseRetpolineIndirectCalls]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 650bce74dcf..a7c7aaab228 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -222,11 +222,13 @@ let isCall = 1 in Sched<[WriteJumpLd]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32, - Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>; + Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>, + Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, OpSize32, - Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>, + Requires<[Not64BitMode,FavorMemIndirectCall, + NotUseRetpolineIndirectCalls]>, Sched<[WriteJumpLd]>; // Non-tracking calls for IBT, use with caution. @@ -320,11 +322,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseRetpoline]>; + Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseRetpoline]>; + NotUseRetpolineIndirectCalls]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -379,11 +381,11 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, SchedRW = [WriteJump] in { def RETPOLINE_CALL32 : PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>, - Requires<[Not64BitMode,UseRetpoline]>; + Requires<[Not64BitMode,UseRetpolineIndirectCalls]>; def RETPOLINE_CALL64 : PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, - Requires<[In64BitMode,UseRetpoline]>; + Requires<[In64BitMode,UseRetpolineIndirectCalls]>; // Retpoline variant of indirect tail calls. let isTerminator = 1, isReturn = 1, isBarrier = 1 in { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index b1ef2c565a5..bbc4e2bdd68 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -955,8 +955,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; -def UseRetpoline : Predicate<"Subtarget->useRetpoline()">; -def NotUseRetpoline : Predicate<"!Subtarget->useRetpoline()">; +def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; +def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 343bef0e919..1e4ced13bb7 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -898,7 +898,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, break; case MachineOperand::MO_Register: // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpoline()) + if (Subtarget->useRetpolineIndirectCalls()) report_fatal_error("Lowering register statepoints with retpoline not " "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); @@ -1055,7 +1055,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpoline()) + if (Subtarget->useRetpolineIndirectCalls()) report_fatal_error( "Lowering patchpoint with retpoline not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp index 250deb3523b..f62e89eb1ba 100644 --- a/lib/Target/X86/X86RetpolineThunks.cpp +++ b/lib/Target/X86/X86RetpolineThunks.cpp @@ -115,7 +115,9 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { // FIXME: It's a little silly to look at every function just to enumerate // the subtargets, but eventually we'll want to look at them for indirect // calls, so maybe this is OK. - if (!STI->useRetpoline() || STI->useRetpolineExternalThunk()) + if ((!STI->useRetpolineIndirectCalls() && + !STI->useRetpolineIndirectBranches()) || + STI->useRetpolineExternalThunk()) return false; // Otherwise, we need to insert the thunk. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 85e8256a6e9..e52708687c6 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -387,7 +387,15 @@ protected: /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. - bool UseRetpoline = false; + bool UseRetpolineIndirectCalls = false; + + /// Use a retpoline thunk or remove any indirect branch to block speculative + /// execution. + bool UseRetpolineIndirectBranches = false; + + /// Deprecated flag, query `UseRetpolineIndirectCalls` and + /// `UseRetpolineIndirectBranches` instead. + bool DeprecatedUseRetpoline = false; /// When using a retpoline thunk, call an externally provided thunk rather /// than emitting one inside the compiler. @@ -649,7 +657,10 @@ public: bool hasPCONFIG() const { return HasPCONFIG; } bool hasSGX() const { return HasSGX; } bool hasINVPCID() const { return HasINVPCID; } - bool useRetpoline() const { return UseRetpoline; } + bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } + bool useRetpolineIndirectBranches() const { + return UseRetpolineIndirectBranches; + } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } @@ -804,7 +815,9 @@ public: /// If we are using retpolines, we need to expand indirectbr to avoid it /// lowering to an actual indirect jump. - bool enableIndirectBrExpand() const override { return useRetpoline(); } + bool enableIndirectBrExpand() const override { + return useRetpolineIndirectBranches(); + } /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll index 04d6ecf816c..308a1a3181b 100644 --- a/test/CodeGen/X86/retpoline-external.ll +++ b/test/CodeGen/X86/retpoline-external.ll @@ -163,4 +163,4 @@ define void @direct_tail() #0 { ; X86FAST-NOT: __{{.*}}_retpoline_{{.*}}: -attributes #0 = { "target-features"="+retpoline-external-thunk" } +attributes #0 = { "target-features"="+retpoline-indirect-calls,+retpoline-external-thunk" } diff --git a/test/CodeGen/X86/retpoline-regparm.ll b/test/CodeGen/X86/retpoline-regparm.ll index 13b32740b28..472cf0b1f0d 100644 --- a/test/CodeGen/X86/retpoline-regparm.ll +++ b/test/CodeGen/X86/retpoline-regparm.ll @@ -38,5 +38,5 @@ entry: ; CHECK: popl %edi ; CHECK: retl -attributes #0 = { "target-features"="+retpoline" } -attributes #1 = { "target-features"="+retpoline-external-thunk" } +attributes #0 = { "target-features"="+retpoline-indirect-calls" } +attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-external-thunk" } diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll index da12ae8cba6..2625435ab8c 100644 --- a/test/CodeGen/X86/retpoline.ll +++ b/test/CodeGen/X86/retpoline.ll @@ -156,7 +156,7 @@ define void @direct_tail() #0 { ; X86FAST: jmp direct_callee # TAILCALL -declare void @nonlazybind_callee() #1 +declare void @nonlazybind_callee() #2 define void @nonlazybind_caller() #0 { call void @nonlazybind_callee() @@ -183,6 +183,153 @@ define void @nonlazybind_caller() #0 { ; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL +; Check that a switch gets lowered using a jump table when retpolines are only +; enabled for calls. +define void @switch_jumptable(i32* %ptr, i64* %sink) #0 { +; X64-LABEL: switch_jumptable: +; X64: jmpq * +; X86-LABEL: switch_jumptable: +; X86: jmpl * +entry: + br label %header + +header: + %i = load volatile i32, i32* %ptr + switch i32 %i, label %bb0 [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + ] + +bb0: + store volatile i64 0, i64* %sink + br label %header + +bb1: + store volatile i64 1, i64* %sink + br label %header + +bb2: + store volatile i64 2, i64* %sink + br label %header + +bb3: + store volatile i64 3, i64* %sink + br label %header + +bb4: + store volatile i64 4, i64* %sink + br label %header + +bb5: + store volatile i64 5, i64* %sink + br label %header + +bb6: + store volatile i64 6, i64* %sink + br label %header + +bb7: + store volatile i64 7, i64* %sink + br label %header + +bb8: + store volatile i64 8, i64* %sink + br label %header + +bb9: + store volatile i64 9, i64* %sink + br label %header +} + + +@indirectbr_preserved.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_preserved, %bb0), + i8* blockaddress(@indirectbr_preserved, %bb1), + i8* blockaddress(@indirectbr_preserved, %bb2), + i8* blockaddress(@indirectbr_preserved, %bb3), + i8* blockaddress(@indirectbr_preserved, %bb4), + i8* blockaddress(@indirectbr_preserved, %bb5), + i8* blockaddress(@indirectbr_preserved, %bb6), + i8* blockaddress(@indirectbr_preserved, %bb7), + i8* blockaddress(@indirectbr_preserved, %bb8), + i8* blockaddress(@indirectbr_preserved, %bb9)] + +; Check that we preserve indirectbr when only calls are retpolined. +define void @indirectbr_preserved(i64* readonly %p, i64* %sink) #0 { +; X64-LABEL: indirectbr_preserved: +; X64: jmpq * +; X86-LABEL: indirectbr_preserved: +; X86: jmpl * +entry: + %i0 = load i64, i64* %p + %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i0 + %target0 = load i8*, i8** %target.i0 + indirectbr i8* %target0, [label %bb1, label %bb3] + +bb0: + store volatile i64 0, i64* %sink + br label %latch + +bb1: + store volatile i64 1, i64* %sink + br label %latch + +bb2: + store volatile i64 2, i64* %sink + br label %latch + +bb3: + store volatile i64 3, i64* %sink + br label %latch + +bb4: + store volatile i64 4, i64* %sink + br label %latch + +bb5: + store volatile i64 5, i64* %sink + br label %latch + +bb6: + store volatile i64 6, i64* %sink + br label %latch + +bb7: + store volatile i64 7, i64* %sink + br label %latch + +bb8: + store volatile i64 8, i64* %sink + br label %latch + +bb9: + store volatile i64 9, i64* %sink + br label %latch + +latch: + %i.next = load i64, i64* %p + %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_preserved.targets, i64 0, i64 %i.next + %target.next = load i8*, i8** %target.i.next + ; Potentially hit a full 10 successors here so that even if we rewrite as + ; a switch it will try to be lowered with a jump table. + indirectbr i8* %target.next, [label %bb0, + label %bb1, + label %bb2, + label %bb3, + label %bb4, + label %bb5, + label %bb6, + label %bb7, + label %bb8, + label %bb9] +} + @indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), i8* blockaddress(@indirectbr_rewrite, %bb1), i8* blockaddress(@indirectbr_rewrite, %bb2), @@ -194,10 +341,10 @@ define void @nonlazybind_caller() #0 { i8* blockaddress(@indirectbr_rewrite, %bb8), i8* blockaddress(@indirectbr_rewrite, %bb9)] -; Check that when retpolines are enabled a function with indirectbr gets -; rewritten to use switch, and that in turn doesn't get lowered as a jump -; table. -define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { +; Check that when retpolines are enabled for indirect branches the indirectbr +; instruction gets rewritten to use switch, and that in turn doesn't get lowered +; as a jump table. +define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #1 { ; X64-LABEL: indirectbr_rewrite: ; X64-NOT: jmpq ; X86-LABEL: indirectbr_rewrite: @@ -359,5 +506,6 @@ latch: ; X86-NEXT: retl -attributes #0 = { "target-features"="+retpoline" } -attributes #1 = { nonlazybind } +attributes #0 = { "target-features"="+retpoline-indirect-calls" } +attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" } +attributes #2 = { nonlazybind }