diff --git a/docs/SpeculativeLoadHardening.md b/docs/SpeculativeLoadHardening.md index bf5c7d354fe..0911b7c6ae2 100644 --- a/docs/SpeculativeLoadHardening.md +++ b/docs/SpeculativeLoadHardening.md @@ -407,14 +407,12 @@ value to be particularly effective when used below to harden loads. ##### Indirect Call, Branch, and Return Predicates -(Not yet implemented.) - There is no analogous flag to use when tracing indirect calls, branches, and returns. The predicate state must be accumulated through some other means. Fundamentally, this is the reverse of the problem posed in CFI: we need to check where we came from rather than where we are going. For function-local jump tables, this is easily arranged by testing the input to the jump table -within each destination: +within each destination (not yet implemented, use retpolines): ``` pushq %rax xorl %eax, %eax # Zero out initial predicate state. @@ -462,7 +460,8 @@ return_addr: ``` For an ABI without a "red zone" (and thus unable to read the return address -from the stack), mitigating returns face similar problems to calls below. +from the stack), we can compute the expected return address prior to the call +into a register preserved across the call and use that similarly to the above. Indirect calls (and returns in the absence of a red zone ABI) pose the most significant challenge to propagate. The simplest technique would be to define a diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 923d82f051f..e9b4032a199 100644 --- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -1763,13 +1763,6 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( // pass specifically so that we have the complete set of instructions for // which we will do post-load hardening and can defer it in certain // circumstances. - // - // FIXME: This could probably be made even more effective by doing it - // across the entire function. Rather than just walking the flat list - // backwards here, we could walk the function in PO and each block bottom - // up, allowing us to in some cases sink hardening across block blocks. As - // long as the in-block predicate state is used at the eventual hardening - // site, this remains safe. for (MachineInstr &MI : MBB) { if (HardenLoads) { // We cannot both require hardening the def of a load and its address. @@ -1851,8 +1844,8 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( } // Otherwise we have a call. We need to handle transferring the predicate - // state into a call and recovering it after the call returns unless this - // is a tail call. + // state into a call and recovering it after the call returns (unless this + // is a tail call). assert(MI.isCall() && "Should only reach here for calls!"); tracePredStateThroughCall(MI); } @@ -2374,21 +2367,10 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { DebugLoc Loc = MI.getDebugLoc(); auto InsertPt = MI.getIterator(); - if (FenceCallAndRet) { - // Simply forcibly block speculation of loads out of the function by using - // an LFENCE. This is potentially a heavy-weight mitigation strategy, but - // should be secure, is simple from an ABI perspective, and the cost can be - // minimized through inlining. - // - // FIXME: We should investigate ways to establish a strong data-dependency - // on the return. However, poisoning the stack pointer is unlikely to work - // because the return is *predicted* rather than relying on the load of the - // return address to actually resolve. - BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE)); - ++NumInstsInserted; - ++NumLFENCEsInserted; + if (FenceCallAndRet) + // No need to fence here as we'll fence at the return site itself. That + // handles more cases than we can handle here. return; - } // Take our predicate state, shift it to the high 17 bits (so that we keep // pointers canonical) and merge it into RSP. This will allow the caller to @@ -2406,31 +2388,168 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { /// /// For tail calls, this is all we need to do. /// -/// For calls where we might return to control flow, we further need to extract -/// the predicate state built up within that function from the high bits of the -/// stack pointer, and make that the newly available predicate state. +/// For calls where we might return and resume the control flow, we need to +/// extract the predicate state from the high bits of the stack pointer after +/// control returns from the called function. +/// +/// We also need to verify that we intended to return to this location in the +/// code. An attacker might arrange for the processor to mispredict the return +/// to this valid but incorrect return address in the program rather than the +/// correct one. See the paper on this attack, called "ret2spec" by the +/// researchers, here: +/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf +/// +/// The way we verify that we returned to the correct location is by preserving +/// the expected return address across the call. One technique involves taking +/// advantage of the red-zone to load the return address from `8(%rsp)` where it +/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can +/// directly save the address into a register that will be preserved across the +/// call. We compare this intended return address against the address +/// immediately following the call (the observed return address). If these +/// mismatch, we have detected misspeculation and can poison our predicate +/// state. void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); auto InsertPt = MI.getIterator(); DebugLoc Loc = MI.getDebugLoc(); + if (FenceCallAndRet) { + if (MI.isReturn()) + // Tail call, we don't return to this function. + // FIXME: We should also handle noreturn calls. + return; + + // We don't need to fence before the call because the function should fence + // in its entry. However, we do need to fence after the call returns. + // Fencing before the return doesn't correctly handle cases where the return + // itself is mispredicted. + BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE)); + ++NumInstsInserted; + ++NumLFENCEsInserted; + return; + } + // First, we transfer the predicate state into the called function by merging // it into the stack pointer. This will kill the current def of the state. unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg); // If this call is also a return, it is a tail call and we don't need anything - // else to handle it so just continue. - // FIXME: We should also handle noreturn calls. - if (MI.isReturn()) + // else to handle it so just return. Also, if there are no further + // instructions and no successors, this call does not return so we can also + // bail. + if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty())) return; - // We need to step past the call and recover the predicate state from SP after - // the return, and make this new state available. + // Create a symbol to track the return address and attach it to the call + // machine instruction. We will lower extra symbols attached to call + // instructions as label immediately following the call. + MCSymbol *RetSymbol = + MF.getContext().createTempSymbol("slh_ret_addr", + /*AlwaysAddSuffix*/ true); + MI.setPostInstrSymbol(MF, RetSymbol); + + const TargetRegisterClass *AddrRC = &X86::GR64RegClass; + unsigned ExpectedRetAddrReg = 0; + + // If we have no red zones or if the function returns twice (possibly without + // using the `ret` instruction) like setjmp, we need to save the expected + // return address prior to the call. + if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) || + MF.exposesReturnsTwice()) { + // If we don't have red zones, we need to compute the expected return + // address prior to the call and store it in a register that lives across + // the call. + // + // In some ways, this is doubly satisfying as a mitigation because it will + // also successfully detect stack smashing bugs in some cases (typically, + // when a callee-saved register is used and the callee doesn't push it onto + // the stack). But that isn't our primary goal, so we only use it as + // a fallback. + // + // FIXME: It isn't clear that this is reliable in the face of + // rematerialization in the register allocator. We somehow need to force + // that to not occur for this particular instruction, and instead to spill + // or otherwise preserve the value computed *prior* to the call. + // + // FIXME: It is even less clear why MachineCSE can't just fold this when we + // end up having to use identical instructions both before and after the + // call to feed the comparison. + ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg) + .addSym(RetSymbol); + } else { + BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addSym(RetSymbol) + .addReg(/*Segment*/ 0); + } + } + + // Step past the call to handle when it returns. ++InsertPt; + + // If we didn't pre-compute the expected return address into a register, then + // red zones are enabled and the return address is still available on the + // stack immediately after the call. As the very first instruction, we load it + // into a register. + if (!ExpectedRetAddrReg) { + ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg) + .addReg(/*Base*/ X86::RSP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so + // the return address is 8-bytes past it. + .addReg(/*Segment*/ 0); + } + + // Now we extract the callee's predicate state from the stack pointer. unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc); - PS->SSA.AddAvailableValue(&MBB, NewStateReg); + + // Test the expected return address against our actual address. If we can + // form this basic block's address as an immediate, this is easy. Otherwise + // we compute it. + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // FIXME: Could we fold this with the load? It would require careful EFLAGS + // management. + BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32)) + .addReg(ExpectedRetAddrReg, RegState::Kill) + .addSym(RetSymbol); + } else { + unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 1) + .addReg(/*Index*/ 0) + .addSym(RetSymbol) + .addReg(/*Segment*/ 0); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr)) + .addReg(ExpectedRetAddrReg, RegState::Kill) + .addReg(ActualRetAddrReg, RegState::Kill); + } + + // Now conditionally update the predicate state we just extracted if we ended + // up at a different return address than expected. + int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; + auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + + unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) + .addReg(NewStateReg, RegState::Kill) + .addReg(PS->PoisonReg); + CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + ++NumInstsInserted; + LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); + + PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); } /// An attacker may speculatively store over a value that is then speculatively diff --git a/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll b/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll new file mode 100644 index 00000000000..5f4bbb0a434 --- /dev/null +++ b/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll @@ -0,0 +1,485 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s --check-prefix=X64-NOPIC +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -code-model medium | FileCheck %s --check-prefix=X64-NOPIC-MCM +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -relocation-model pic | FileCheck %s --check-prefix=X64-PIC +; +; FIXME: Add support for 32-bit. + +declare void @f() + +define i32 @test_calls_and_rets(i32 *%ptr) nounwind { +; X64-NOPIC-LABEL: test_calls_and_rets: +; X64-NOPIC: # %bb.0: # %entry +; X64-NOPIC-NEXT: pushq %rbp +; X64-NOPIC-NEXT: pushq %r14 +; X64-NOPIC-NEXT: pushq %rbx +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq %rdi, %rbx +; X64-NOPIC-NEXT: movq $-1, %r14 +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Lslh_ret_addr0: +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr0, %rcx +; X64-NOPIC-NEXT: cmovneq %r14, %rax +; X64-NOPIC-NEXT: movl (%rbx), %ebp +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Lslh_ret_addr1: +; X64-NOPIC-NEXT: movq %rsp, %rcx +; X64-NOPIC-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NOPIC-NEXT: sarq $63, %rcx +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr1, %rax +; X64-NOPIC-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: orl %ecx, %ebp +; X64-NOPIC-NEXT: shlq $47, %rcx +; X64-NOPIC-NEXT: movl %ebp, %eax +; X64-NOPIC-NEXT: orq %rcx, %rsp +; X64-NOPIC-NEXT: popq %rbx +; X64-NOPIC-NEXT: popq %r14 +; X64-NOPIC-NEXT: popq %rbp +; X64-NOPIC-NEXT: retq +; +; X64-NOPIC-MCM-LABEL: test_calls_and_rets: +; X64-NOPIC-MCM: # %bb.0: # %entry +; X64-NOPIC-MCM-NEXT: pushq %rbp +; X64-NOPIC-MCM-NEXT: pushq %r14 +; X64-NOPIC-MCM-NEXT: pushq %rbx +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx +; X64-NOPIC-MCM-NEXT: movq $-1, %r14 +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr0: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr0(%rip), %rdx +; X64-NOPIC-MCM-NEXT: cmpq %rdx, %rcx +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rax +; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr1: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx +; X64-NOPIC-MCM-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NOPIC-MCM-NEXT: sarq $63, %rcx +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr1(%rip), %rdx +; X64-NOPIC-MCM-NEXT: cmpq %rdx, %rax +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: orl %ecx, %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rcx +; X64-NOPIC-MCM-NEXT: movl %ebp, %eax +; X64-NOPIC-MCM-NEXT: orq %rcx, %rsp +; X64-NOPIC-MCM-NEXT: popq %rbx +; X64-NOPIC-MCM-NEXT: popq %r14 +; X64-NOPIC-MCM-NEXT: popq %rbp +; X64-NOPIC-MCM-NEXT: retq +; +; X64-PIC-LABEL: test_calls_and_rets: +; X64-PIC: # %bb.0: # %entry +; X64-PIC-NEXT: pushq %rbp +; X64-PIC-NEXT: pushq %r14 +; X64-PIC-NEXT: pushq %rbx +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq %rdi, %rbx +; X64-PIC-NEXT: movq $-1, %r14 +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Lslh_ret_addr0: +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: leaq .Lslh_ret_addr0(%rip), %rdx +; X64-PIC-NEXT: cmpq %rdx, %rcx +; X64-PIC-NEXT: cmovneq %r14, %rax +; X64-PIC-NEXT: movl (%rbx), %ebp +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Lslh_ret_addr1: +; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq .Lslh_ret_addr1(%rip), %rdx +; X64-PIC-NEXT: cmpq %rdx, %rax +; X64-PIC-NEXT: cmovneq %r14, %rcx +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: orl %ecx, %ebp +; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl %ebp, %eax +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: popq %rbx +; X64-PIC-NEXT: popq %r14 +; X64-PIC-NEXT: popq %rbp +; X64-PIC-NEXT: retq +entry: + call void @f() + %x = load i32, i32* %ptr + call void @f() + %y = load i32, i32* %ptr + %z = add i32 %x, %y + ret i32 %z +} + +define i32 @test_calls_and_rets_noredzone(i32 *%ptr) nounwind noredzone { +; X64-NOPIC-LABEL: test_calls_and_rets_noredzone: +; X64-NOPIC: # %bb.0: # %entry +; X64-NOPIC-NEXT: pushq %rbp +; X64-NOPIC-NEXT: pushq %r15 +; X64-NOPIC-NEXT: pushq %r14 +; X64-NOPIC-NEXT: pushq %rbx +; X64-NOPIC-NEXT: pushq %rax +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq %rdi, %rbx +; X64-NOPIC-NEXT: movq $-1, %r14 +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Lslh_ret_addr2, %rbp +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Lslh_ret_addr2: +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr2, %rbp +; X64-NOPIC-NEXT: cmovneq %r14, %rax +; X64-NOPIC-NEXT: movl (%rbx), %ebp +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Lslh_ret_addr3, %r15 +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Lslh_ret_addr3: +; X64-NOPIC-NEXT: movq %rsp, %rcx +; X64-NOPIC-NEXT: sarq $63, %rcx +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr3, %r15 +; X64-NOPIC-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: orl %ecx, %ebp +; X64-NOPIC-NEXT: shlq $47, %rcx +; X64-NOPIC-NEXT: movl %ebp, %eax +; X64-NOPIC-NEXT: orq %rcx, %rsp +; X64-NOPIC-NEXT: addq $8, %rsp +; X64-NOPIC-NEXT: popq %rbx +; X64-NOPIC-NEXT: popq %r14 +; X64-NOPIC-NEXT: popq %r15 +; X64-NOPIC-NEXT: popq %rbp +; X64-NOPIC-NEXT: retq +; +; X64-NOPIC-MCM-LABEL: test_calls_and_rets_noredzone: +; X64-NOPIC-MCM: # %bb.0: # %entry +; X64-NOPIC-MCM-NEXT: pushq %rbp +; X64-NOPIC-MCM-NEXT: pushq %r15 +; X64-NOPIC-MCM-NEXT: pushq %r14 +; X64-NOPIC-MCM-NEXT: pushq %rbx +; X64-NOPIC-MCM-NEXT: pushq %rax +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx +; X64-NOPIC-MCM-NEXT: movq $-1, %r14 +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr2(%rip), %rbp +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr2: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr2(%rip), %rcx +; X64-NOPIC-MCM-NEXT: cmpq %rcx, %rbp +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rax +; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr3(%rip), %r15 +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr3: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx +; X64-NOPIC-MCM-NEXT: sarq $63, %rcx +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr3(%rip), %rax +; X64-NOPIC-MCM-NEXT: cmpq %rax, %r15 +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: orl %ecx, %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rcx +; X64-NOPIC-MCM-NEXT: movl %ebp, %eax +; X64-NOPIC-MCM-NEXT: orq %rcx, %rsp +; X64-NOPIC-MCM-NEXT: addq $8, %rsp +; X64-NOPIC-MCM-NEXT: popq %rbx +; X64-NOPIC-MCM-NEXT: popq %r14 +; X64-NOPIC-MCM-NEXT: popq %r15 +; X64-NOPIC-MCM-NEXT: popq %rbp +; X64-NOPIC-MCM-NEXT: retq +; +; X64-PIC-LABEL: test_calls_and_rets_noredzone: +; X64-PIC: # %bb.0: # %entry +; X64-PIC-NEXT: pushq %rbp +; X64-PIC-NEXT: pushq %r15 +; X64-PIC-NEXT: pushq %r14 +; X64-PIC-NEXT: pushq %rbx +; X64-PIC-NEXT: pushq %rax +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq %rdi, %rbx +; X64-PIC-NEXT: movq $-1, %r14 +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq .Lslh_ret_addr2(%rip), %rbp +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Lslh_ret_addr2: +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: leaq .Lslh_ret_addr2(%rip), %rcx +; X64-PIC-NEXT: cmpq %rcx, %rbp +; X64-PIC-NEXT: cmovneq %r14, %rax +; X64-PIC-NEXT: movl (%rbx), %ebp +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq .Lslh_ret_addr3(%rip), %r15 +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Lslh_ret_addr3: +; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq .Lslh_ret_addr3(%rip), %rax +; X64-PIC-NEXT: cmpq %rax, %r15 +; X64-PIC-NEXT: cmovneq %r14, %rcx +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: orl %ecx, %ebp +; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl %ebp, %eax +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: addq $8, %rsp +; X64-PIC-NEXT: popq %rbx +; X64-PIC-NEXT: popq %r14 +; X64-PIC-NEXT: popq %r15 +; X64-PIC-NEXT: popq %rbp +; X64-PIC-NEXT: retq +entry: + call void @f() + %x = load i32, i32* %ptr + call void @f() + %y = load i32, i32* %ptr + %z = add i32 %x, %y + ret i32 %z +} + +declare i32 @setjmp(i8* %env) returns_twice +declare i32 @sigsetjmp(i8* %env, i32 %savemask) returns_twice +declare i32 @__sigsetjmp(i8* %foo, i8* %bar, i32 %baz) returns_twice + +define i32 @test_call_setjmp(i32 *%ptr) nounwind { +; X64-NOPIC-LABEL: test_call_setjmp: +; X64-NOPIC: # %bb.0: # %entry +; X64-NOPIC-NEXT: pushq %rbp +; X64-NOPIC-NEXT: pushq %r15 +; X64-NOPIC-NEXT: pushq %r14 +; X64-NOPIC-NEXT: pushq %r12 +; X64-NOPIC-NEXT: pushq %rbx +; X64-NOPIC-NEXT: subq $16, %rsp +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq %rdi, %rbx +; X64-NOPIC-NEXT: movq $-1, %r15 +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: movq %rsp, %r14 +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: movq %r14, %rdi +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Lslh_ret_addr4, %rbp +; X64-NOPIC-NEXT: callq setjmp +; X64-NOPIC-NEXT: .Lslh_ret_addr4: +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr4, %rbp +; X64-NOPIC-NEXT: cmovneq %r15, %rax +; X64-NOPIC-NEXT: movl (%rbx), %ebp +; X64-NOPIC-NEXT: movl $42, %esi +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: movq %r14, %rdi +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Lslh_ret_addr5, %r12 +; X64-NOPIC-NEXT: callq sigsetjmp +; X64-NOPIC-NEXT: .Lslh_ret_addr5: +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr5, %r12 +; X64-NOPIC-NEXT: cmovneq %r15, %rax +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: movl $42, %edx +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: movq %r14, %rdi +; X64-NOPIC-NEXT: movq %r14, %rsi +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Lslh_ret_addr6, %r14 +; X64-NOPIC-NEXT: callq __sigsetjmp +; X64-NOPIC-NEXT: .Lslh_ret_addr6: +; X64-NOPIC-NEXT: movq %rsp, %rcx +; X64-NOPIC-NEXT: sarq $63, %rcx +; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr6, %r14 +; X64-NOPIC-NEXT: cmovneq %r15, %rcx +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: orl %ecx, %ebp +; X64-NOPIC-NEXT: shlq $47, %rcx +; X64-NOPIC-NEXT: movl %ebp, %eax +; X64-NOPIC-NEXT: orq %rcx, %rsp +; X64-NOPIC-NEXT: addq $16, %rsp +; X64-NOPIC-NEXT: popq %rbx +; X64-NOPIC-NEXT: popq %r12 +; X64-NOPIC-NEXT: popq %r14 +; X64-NOPIC-NEXT: popq %r15 +; X64-NOPIC-NEXT: popq %rbp +; X64-NOPIC-NEXT: retq +; +; X64-NOPIC-MCM-LABEL: test_call_setjmp: +; X64-NOPIC-MCM: # %bb.0: # %entry +; X64-NOPIC-MCM-NEXT: pushq %rbp +; X64-NOPIC-MCM-NEXT: pushq %r15 +; X64-NOPIC-MCM-NEXT: pushq %r14 +; X64-NOPIC-MCM-NEXT: pushq %r12 +; X64-NOPIC-MCM-NEXT: pushq %rbx +; X64-NOPIC-MCM-NEXT: subq $16, %rsp +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx +; X64-NOPIC-MCM-NEXT: movq $-1, %r15 +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: movq %rsp, %r14 +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: movq %r14, %rdi +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr4(%rip), %rbp +; X64-NOPIC-MCM-NEXT: callq setjmp +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr4: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr4(%rip), %rcx +; X64-NOPIC-MCM-NEXT: cmpq %rcx, %rbp +; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rax +; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: movl $42, %esi +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: movq %r14, %rdi +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %r12 +; X64-NOPIC-MCM-NEXT: callq sigsetjmp +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr5: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %rcx +; X64-NOPIC-MCM-NEXT: cmpq %rcx, %r12 +; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rax +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: movl $42, %edx +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: movq %r14, %rdi +; X64-NOPIC-MCM-NEXT: movq %r14, %rsi +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr6(%rip), %r14 +; X64-NOPIC-MCM-NEXT: callq __sigsetjmp +; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr6: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx +; X64-NOPIC-MCM-NEXT: sarq $63, %rcx +; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr6(%rip), %rax +; X64-NOPIC-MCM-NEXT: cmpq %rax, %r14 +; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rcx +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: orl %ecx, %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rcx +; X64-NOPIC-MCM-NEXT: movl %ebp, %eax +; X64-NOPIC-MCM-NEXT: orq %rcx, %rsp +; X64-NOPIC-MCM-NEXT: addq $16, %rsp +; X64-NOPIC-MCM-NEXT: popq %rbx +; X64-NOPIC-MCM-NEXT: popq %r12 +; X64-NOPIC-MCM-NEXT: popq %r14 +; X64-NOPIC-MCM-NEXT: popq %r15 +; X64-NOPIC-MCM-NEXT: popq %rbp +; X64-NOPIC-MCM-NEXT: retq +; +; X64-PIC-LABEL: test_call_setjmp: +; X64-PIC: # %bb.0: # %entry +; X64-PIC-NEXT: pushq %rbp +; X64-PIC-NEXT: pushq %r15 +; X64-PIC-NEXT: pushq %r14 +; X64-PIC-NEXT: pushq %r12 +; X64-PIC-NEXT: pushq %rbx +; X64-PIC-NEXT: subq $16, %rsp +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq %rdi, %rbx +; X64-PIC-NEXT: movq $-1, %r15 +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: movq %rsp, %r14 +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: movq %r14, %rdi +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq .Lslh_ret_addr4(%rip), %rbp +; X64-PIC-NEXT: callq setjmp@PLT +; X64-PIC-NEXT: .Lslh_ret_addr4: +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: leaq .Lslh_ret_addr4(%rip), %rcx +; X64-PIC-NEXT: cmpq %rcx, %rbp +; X64-PIC-NEXT: cmovneq %r15, %rax +; X64-PIC-NEXT: movl (%rbx), %ebp +; X64-PIC-NEXT: movl $42, %esi +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: movq %r14, %rdi +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %r12 +; X64-PIC-NEXT: callq sigsetjmp@PLT +; X64-PIC-NEXT: .Lslh_ret_addr5: +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %rcx +; X64-PIC-NEXT: cmpq %rcx, %r12 +; X64-PIC-NEXT: cmovneq %r15, %rax +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: movl $42, %edx +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: movq %r14, %rdi +; X64-PIC-NEXT: movq %r14, %rsi +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq .Lslh_ret_addr6(%rip), %r14 +; X64-PIC-NEXT: callq __sigsetjmp@PLT +; X64-PIC-NEXT: .Lslh_ret_addr6: +; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq .Lslh_ret_addr6(%rip), %rax +; X64-PIC-NEXT: cmpq %rax, %r14 +; X64-PIC-NEXT: cmovneq %r15, %rcx +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: orl %ecx, %ebp +; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl %ebp, %eax +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: addq $16, %rsp +; X64-PIC-NEXT: popq %rbx +; X64-PIC-NEXT: popq %r12 +; X64-PIC-NEXT: popq %r14 +; X64-PIC-NEXT: popq %r15 +; X64-PIC-NEXT: popq %rbp +; X64-PIC-NEXT: retq +entry: + %env = alloca i8, i32 16 + ; Call a normal setjmp function. + call i32 @setjmp(i8* %env) + %x = load i32, i32* %ptr + ; Call something like sigsetjmp. + call i32 @sigsetjmp(i8* %env, i32 42) + %y = load i32, i32* %ptr + ; Call something that might be an implementation detail expanded out of a + ; macro that has a weird signature but still gets annotated as returning + ; twice. + call i32 @__sigsetjmp(i8* %env, i8* %env, i32 42) + %z = load i32, i32* %ptr + %s1 = add i32 %x, %y + %s2 = add i32 %s1, %z + ret i32 %s2 +} diff --git a/test/CodeGen/X86/speculative-load-hardening-indirect.ll b/test/CodeGen/X86/speculative-load-hardening-indirect.ll index a94dc9219e3..8761fcff5d9 100644 --- a/test/CodeGen/X86/speculative-load-hardening-indirect.ll +++ b/test/CodeGen/X86/speculative-load-hardening-indirect.ll @@ -17,56 +17,69 @@ define i32 @test_indirect_call(i32 ()** %ptr) nounwind { ; X64-LABEL: test_indirect_call: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax +; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movq (%rdi), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq *%rcx +; X64-NEXT: .Lslh_ret_addr0: ; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Lslh_ret_addr0, %rdx +; X64-NEXT: cmovneq %rbx, %rcx ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp -; X64-NEXT: popq %rcx +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; ; X64-PIC-LABEL: test_indirect_call: ; X64-PIC: # %bb.0: # %entry -; X64-PIC-NEXT: pushq %rax +; X64-PIC-NEXT: pushq %rbx ; X64-PIC-NEXT: movq %rsp, %rax -; X64-PIC-NEXT: movq $-1, %rcx +; X64-PIC-NEXT: movq $-1, %rbx ; X64-PIC-NEXT: sarq $63, %rax ; X64-PIC-NEXT: movq (%rdi), %rcx ; X64-PIC-NEXT: orq %rax, %rcx ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: orq %rax, %rsp ; X64-PIC-NEXT: callq *%rcx +; X64-PIC-NEXT: .Lslh_ret_addr0: ; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq .Lslh_ret_addr0(%rip), %rsi +; X64-PIC-NEXT: cmpq %rsi, %rdx +; X64-PIC-NEXT: cmovneq %rbx, %rcx ; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: orq %rcx, %rsp -; X64-PIC-NEXT: popq %rcx +; X64-PIC-NEXT: popq %rbx ; X64-PIC-NEXT: retq ; ; X64-RETPOLINE-LABEL: test_indirect_call: ; X64-RETPOLINE: # %bb.0: # %entry -; X64-RETPOLINE-NEXT: pushq %rax +; X64-RETPOLINE-NEXT: pushq %rbx ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rcx +; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: sarq $63, %rax ; X64-RETPOLINE-NEXT: movq (%rdi), %r11 ; X64-RETPOLINE-NEXT: orq %rax, %r11 ; X64-RETPOLINE-NEXT: shlq $47, %rax ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11 +; X64-RETPOLINE-NEXT: .Lslh_ret_addr0: ; X64-RETPOLINE-NEXT: movq %rsp, %rcx +; X64-RETPOLINE-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-RETPOLINE-NEXT: sarq $63, %rcx +; X64-RETPOLINE-NEXT: cmpq $.Lslh_ret_addr0, %rdx +; X64-RETPOLINE-NEXT: cmovneq %rbx, %rcx ; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: orq %rcx, %rsp -; X64-RETPOLINE-NEXT: popq %rcx +; X64-RETPOLINE-NEXT: popq %rbx ; X64-RETPOLINE-NEXT: retq entry: %fp = load i32 ()*, i32 ()** %ptr @@ -116,27 +129,31 @@ entry: define i32 @test_indirect_call_global() nounwind { ; X64-LABEL: test_indirect_call_global: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax +; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movq global_fnptr(%rip), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq *%rcx +; X64-NEXT: .Lslh_ret_addr1: ; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Lslh_ret_addr1, %rdx +; X64-NEXT: cmovneq %rbx, %rcx ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp -; X64-NEXT: popq %rcx +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; ; X64-PIC-LABEL: test_indirect_call_global: ; X64-PIC: # %bb.0: # %entry -; X64-PIC-NEXT: pushq %rax +; X64-PIC-NEXT: pushq %rbx ; X64-PIC-NEXT: movq %rsp, %rax -; X64-PIC-NEXT: movq $-1, %rcx +; X64-PIC-NEXT: movq $-1, %rbx ; X64-PIC-NEXT: sarq $63, %rax ; X64-PIC-NEXT: movq global_fnptr@GOTPCREL(%rip), %rcx ; X64-PIC-NEXT: movq (%rcx), %rcx @@ -144,28 +161,37 @@ define i32 @test_indirect_call_global() nounwind { ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: orq %rax, %rsp ; X64-PIC-NEXT: callq *%rcx +; X64-PIC-NEXT: .Lslh_ret_addr1: ; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq .Lslh_ret_addr1(%rip), %rsi +; X64-PIC-NEXT: cmpq %rsi, %rdx +; X64-PIC-NEXT: cmovneq %rbx, %rcx ; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: orq %rcx, %rsp -; X64-PIC-NEXT: popq %rcx +; X64-PIC-NEXT: popq %rbx ; X64-PIC-NEXT: retq ; ; X64-RETPOLINE-LABEL: test_indirect_call_global: ; X64-RETPOLINE: # %bb.0: # %entry -; X64-RETPOLINE-NEXT: pushq %rax +; X64-RETPOLINE-NEXT: pushq %rbx ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rcx +; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: sarq $63, %rax ; X64-RETPOLINE-NEXT: movq global_fnptr(%rip), %r11 ; X64-RETPOLINE-NEXT: shlq $47, %rax ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11 +; X64-RETPOLINE-NEXT: .Lslh_ret_addr1: ; X64-RETPOLINE-NEXT: movq %rsp, %rcx +; X64-RETPOLINE-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-RETPOLINE-NEXT: sarq $63, %rcx +; X64-RETPOLINE-NEXT: cmpq $.Lslh_ret_addr1, %rdx +; X64-RETPOLINE-NEXT: cmovneq %rbx, %rcx ; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: orq %rcx, %rsp -; X64-RETPOLINE-NEXT: popq %rcx +; X64-RETPOLINE-NEXT: popq %rbx ; X64-RETPOLINE-NEXT: retq entry: %fp = load i32 ()*, i32 ()** @global_fnptr diff --git a/test/CodeGen/X86/speculative-load-hardening.ll b/test/CodeGen/X86/speculative-load-hardening.ll index ef85fdf4070..152fc411cff 100644 --- a/test/CodeGen/X86/speculative-load-hardening.ll +++ b/test/CodeGen/X86/speculative-load-hardening.ll @@ -64,7 +64,7 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr ; X64-NEXT: retq ; X64-NEXT: .LBB1_4: # %then2 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: movq %r8, %r15 +; X64-NEXT: movq %r8, %r14 ; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: testl %edx, %edx ; X64-NEXT: je .LBB1_6 @@ -72,30 +72,34 @@ define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr ; X64-NEXT: cmoveq %rbx, %rax ; X64-NEXT: movslq (%r9), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: leaq (%r15,%rcx,4), %r14 -; X64-NEXT: movl %ecx, (%r15,%rcx,4) +; X64-NEXT: leaq (%r14,%rcx,4), %r15 +; X64-NEXT: movl %ecx, (%r14,%rcx,4) ; X64-NEXT: jmp .LBB1_7 ; X64-NEXT: .LBB1_6: # %then3 ; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: movl (%rcx), %ecx -; X64-NEXT: addl (%r15), %ecx +; X64-NEXT: addl (%r14), %ecx ; X64-NEXT: movslq %ecx, %rdi ; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movl (%r15,%rdi,4), %esi +; X64-NEXT: movl (%r14,%rdi,4), %esi ; X64-NEXT: orl %eax, %esi -; X64-NEXT: movq (%r9), %r14 -; X64-NEXT: orq %rax, %r14 -; X64-NEXT: addl (%r14), %esi +; X64-NEXT: movq (%r9), %r15 +; X64-NEXT: orq %rax, %r15 +; X64-NEXT: addl (%r15), %esi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: # kill: def $edi killed $edi killed $rdi ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq leak +; X64-NEXT: .Lslh_ret_addr0: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: .LBB1_7: # %merge -; X64-NEXT: movslq (%r14), %rcx +; X64-NEXT: movslq (%r15), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl $0, (%r15,%rcx,4) +; X64-NEXT: movl $0, (%r14,%rcx,4) ; X64-NEXT: jmp .LBB1_8 ; ; X64-LFENCE-LABEL: test_basic_conditions: @@ -225,8 +229,12 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind { ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr1: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr1, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: incl %ebx ; X64-NEXT: cmpl %ebp, %ebx ; X64-NEXT: jl .LBB2_6 @@ -304,19 +312,19 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r12 +; X64-NEXT: movq $-1, %rbp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB3_2 ; X64-NEXT: # %bb.1: -; X64-NEXT: cmoveq %r12, %rax +; X64-NEXT: cmoveq %rbp, %rax ; X64-NEXT: jmp .LBB3_10 ; X64-NEXT: .LBB3_2: # %l1.header.preheader ; X64-NEXT: movq %r8, %r14 ; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movl %edx, %ebp +; X64-NEXT: movl %edx, %r12d ; X64-NEXT: movl %esi, %r15d -; X64-NEXT: cmovneq %r12, %rax +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: xorl %r13d, %r13d ; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testl %r15d, %r15d @@ -324,16 +332,16 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: jmp .LBB3_4 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_12: -; X64-NEXT: cmovgeq %r12, %rax +; X64-NEXT: cmovgeq %rbp, %rax ; X64-NEXT: testl %r15d, %r15d ; X64-NEXT: jle .LBB3_4 ; X64-NEXT: .LBB3_5: # %l2.header.preheader -; X64-NEXT: cmovleq %r12, %rax +; X64-NEXT: cmovleq %rbp, %rax ; X64-NEXT: xorl %r15d, %r15d ; X64-NEXT: jmp .LBB3_6 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_11: # in Loop: Header=BB3_6 Depth=1 -; X64-NEXT: cmovgeq %r12, %rax +; X64-NEXT: cmovgeq %rbp, %rax ; X64-NEXT: .LBB3_6: # %l2.header ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movslq (%rbx), %rcx @@ -344,18 +352,22 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr2: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr2, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r15d -; X64-NEXT: cmpl %ebp, %r15d +; X64-NEXT: cmpl %r12d, %r15d ; X64-NEXT: jl .LBB3_11 ; X64-NEXT: # %bb.7: -; X64-NEXT: cmovlq %r12, %rax +; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload ; X64-NEXT: jmp .LBB3_8 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_4: -; X64-NEXT: cmovgq %r12, %rax +; X64-NEXT: cmovgq %rbp, %rax ; X64-NEXT: .LBB3_8: # %l1.latch ; X64-NEXT: movslq (%rbx), %rcx ; X64-NEXT: orq %rax, %rcx @@ -365,13 +377,17 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr3: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr3, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r13d ; X64-NEXT: cmpl %r15d, %r13d ; X64-NEXT: jl .LBB3_12 ; X64-NEXT: # %bb.9: -; X64-NEXT: cmovlq %r12, %rax +; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: .LBB3_10: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp @@ -486,22 +502,27 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) personality i8* bitca ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbp ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r15 ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %r14 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %rbx, -40 +; X64-NEXT: .cfi_offset %r14, -32 +; X64-NEXT: .cfi_offset %r15, -24 ; X64-NEXT: .cfi_offset %rbp, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpl $41, %edi ; X64-NEXT: jg .LBB4_1 ; X64-NEXT: # %bb.2: # %thrower ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: cmovgq %rcx, %rax +; X64-NEXT: cmovgq %r15, %rax ; X64-NEXT: movslq %edi, %rcx ; X64-NEXT: movl (%rsi,%rcx,4), %ebp ; X64-NEXT: orl %eax, %ebp @@ -509,8 +530,12 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) personality i8* bitca ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq __cxa_allocate_exception +; X64-NEXT: .Lslh_ret_addr4: ; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx +; X64-NEXT: cmovneq %r15, %rcx ; X64-NEXT: movl %ebp, (%rax) ; X64-NEXT: .Ltmp0: ; X64-NEXT: xorl %esi, %esi @@ -519,24 +544,32 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) personality i8* bitca ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: callq __cxa_throw +; X64-NEXT: .Lslh_ret_addr5: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: .Ltmp1: ; X64-NEXT: jmp .LBB4_3 ; X64-NEXT: .LBB4_1: -; X64-NEXT: cmovleq %rcx, %rax +; X64-NEXT: cmovleq %r15, %rax ; X64-NEXT: .LBB4_3: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .cfi_def_cfa_offset 40 ; X64-NEXT: popq %rbx -; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: .cfi_def_cfa_offset 32 ; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r15 ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: popq %rbp ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; X64-NEXT: .LBB4_4: # %lpad -; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: .cfi_def_cfa_offset 48 ; X64-NEXT: .Ltmp2: ; X64-NEXT: movq %rsp, %rcx ; X64-NEXT: sarq $63, %rcx @@ -549,8 +582,12 @@ define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) personality i8* bitca ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr6: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr6, %rcx +; X64-NEXT: cmovneq %r15, %rax ; ; X64-LFENCE-LABEL: test_basic_eh: ; X64-LFENCE: # %bb.0: # %entry @@ -636,79 +673,111 @@ define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64p ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %r15 ; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %rcx, %r15 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: movq %rdi, %r12 -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %r13 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: orq %rax, %r12 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Lslh_ret_addr7: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr7, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: orq %rax, %rbx ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Lslh_ret_addr8: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr8, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: cvtsd2ss %xmm0, %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Lslh_ret_addr9: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr9, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: cvtss2sd %xmm0, %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Lslh_ret_addr10: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr10, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: orq %rax, %r14 ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2ssl (%r14), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Lslh_ret_addr11: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr11, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: orq %rax, %r15 ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2sdq (%r15), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Lslh_ret_addr12: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr12, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2ssq (%r15), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Lslh_ret_addr13: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr13, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2sdl (%r14), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Lslh_ret_addr14: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr14, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 ; X64-NEXT: retq @@ -789,68 +858,96 @@ declare void @sink_v2i64(<2 x i64>) define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind { ; X64-LABEL: test_vec_loads: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbp ; X64-NEXT: pushq %r15 ; X64-NEXT: pushq %r14 ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %r9, %r14 ; X64-NEXT: movq %r8, %r15 ; X64-NEXT: movq %rcx, %r12 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %rbp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: orq %rax, %rdi ; X64-NEXT: movaps (%rdi), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v4f32 +; X64-NEXT: .Lslh_ret_addr15: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr15, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %rbx ; X64-NEXT: movaps (%rbx), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v2f64 +; X64-NEXT: .Lslh_ret_addr16: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr16, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r13 ; X64-NEXT: movaps (%r13), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v16i8 +; X64-NEXT: .Lslh_ret_addr17: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr17, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r12 ; X64-NEXT: movaps (%r12), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v8i16 +; X64-NEXT: .Lslh_ret_addr18: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr18, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r15 ; X64-NEXT: movaps (%r15), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v4i32 +; X64-NEXT: .Lslh_ret_addr19: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr19, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r14 ; X64-NEXT: movaps (%r14), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v2i64 +; X64-NEXT: .Lslh_ret_addr20: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr20, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 ; X64-NEXT: popq %r13 ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbp ; X64-NEXT: retq ; ; X64-LFENCE-LABEL: test_vec_loads: @@ -902,13 +999,13 @@ entry: define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X64-LABEL: test_deferred_hardening: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %r15 ; X64-NEXT: pushq %r14 ; X64-NEXT: pushq %rbx -; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %rsi, %r14 ; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movl (%rdi), %edi ; X64-NEXT: incl %edi @@ -917,8 +1014,12 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr21: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr21, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movl (%rbx), %ecx ; X64-NEXT: movl (%r14), %edx ; X64-NEXT: leal 1(%rcx,%rdx), %edi @@ -926,16 +1027,24 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr22: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr22, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movl (%rbx), %edi ; X64-NEXT: shll $7, %edi ; X64-NEXT: orl %eax, %edi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr23: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr23, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movzwl (%rbx), %ecx ; X64-NEXT: sarw $7, %cx ; X64-NEXT: movzwl %cx, %edi @@ -944,8 +1053,12 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr24: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr24, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movzwl (%rbx), %ecx ; X64-NEXT: rolw $9, %cx ; X64-NEXT: movswl %cx, %edi @@ -954,13 +1067,17 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Lslh_ret_addr25: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr25, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 ; X64-NEXT: retq ; ; X64-LFENCE-LABEL: test_deferred_hardening: