mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
4121e9a08c
Summary: Inc and Dec were at one point slow on Intel CPUs due to their tendency to cause partial flag stalls on P6 derived CPU cores. This is because these instructions are defined to preserve the carry flag. This partial flag stall issue persisted until Sandy Bridge when flag merging was changed to be handled as a data dependency instead of as a stall until retirement. Sandy Bridge and later CPUs rename the C flag separately from OSPAZ so there is no flag merge needed on INC/DEC to preserve the C flag. Given these improvements I don't know why INC/DEC was ever considered slow on Sandy Bridge. If anything they should have been disabled on the earlier CPUs instead. Note after this patch, INC/DEC are still considered slow on Silvermont, Goldmont, Knights Landing and our generic "x86-64" CPU. Reviewers: spatel, RKSimon, chandlerc Reviewed By: chandlerc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D58412 llvm-svn: 354436
139 lines
4.4 KiB
LLVM
139 lines
4.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=core-avx-i -mattr=+rdrnd | FileCheck %s --check-prefix=X86
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx-i -mattr=+rdrnd | FileCheck %s --check-prefix=X64
|
|
|
|
declare {i16, i32} @llvm.x86.rdrand.16()
|
|
declare {i32, i32} @llvm.x86.rdrand.32()
|
|
|
|
define i32 @_rdrand16_step(i16* %random_val) {
|
|
; X86-LABEL: _rdrand16_step:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: rdrandw %ax
|
|
; X86-NEXT: movzwl %ax, %edx
|
|
; X86-NEXT: movl $1, %eax
|
|
; X86-NEXT: cmovael %edx, %eax
|
|
; X86-NEXT: movw %dx, (%ecx)
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: _rdrand16_step:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: rdrandw %ax
|
|
; X64-NEXT: movzwl %ax, %ecx
|
|
; X64-NEXT: movl $1, %eax
|
|
; X64-NEXT: cmovael %ecx, %eax
|
|
; X64-NEXT: movw %cx, (%rdi)
|
|
; X64-NEXT: retq
|
|
%call = call {i16, i32} @llvm.x86.rdrand.16()
|
|
%randval = extractvalue {i16, i32} %call, 0
|
|
store i16 %randval, i16* %random_val
|
|
%isvalid = extractvalue {i16, i32} %call, 1
|
|
ret i32 %isvalid
|
|
}
|
|
|
|
define i32 @_rdrand32_step(i32* %random_val) {
|
|
; X86-LABEL: _rdrand32_step:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: rdrandl %edx
|
|
; X86-NEXT: movl $1, %eax
|
|
; X86-NEXT: cmovael %edx, %eax
|
|
; X86-NEXT: movl %edx, (%ecx)
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: _rdrand32_step:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: rdrandl %ecx
|
|
; X64-NEXT: movl $1, %eax
|
|
; X64-NEXT: cmovael %ecx, %eax
|
|
; X64-NEXT: movl %ecx, (%rdi)
|
|
; X64-NEXT: retq
|
|
%call = call {i32, i32} @llvm.x86.rdrand.32()
|
|
%randval = extractvalue {i32, i32} %call, 0
|
|
store i32 %randval, i32* %random_val
|
|
%isvalid = extractvalue {i32, i32} %call, 1
|
|
ret i32 %isvalid
|
|
}
|
|
|
|
; Check that MachineCSE doesn't eliminate duplicate rdrand instructions.
|
|
define i32 @CSE() nounwind {
|
|
; X86-LABEL: CSE:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: rdrandl %ecx
|
|
; X86-NEXT: rdrandl %eax
|
|
; X86-NEXT: addl %ecx, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: CSE:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: rdrandl %ecx
|
|
; X64-NEXT: rdrandl %eax
|
|
; X64-NEXT: addl %ecx, %eax
|
|
; X64-NEXT: retq
|
|
%rand1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
|
|
%v1 = extractvalue { i32, i32 } %rand1, 0
|
|
%rand2 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
|
|
%v2 = extractvalue { i32, i32 } %rand2, 0
|
|
%add = add i32 %v2, %v1
|
|
ret i32 %add
|
|
}
|
|
|
|
; Check that MachineLICM doesn't hoist rdrand instructions.
|
|
define void @loop(i32* %p, i32 %n) nounwind {
|
|
; X86-LABEL: loop:
|
|
; X86: # %bb.0: # %entry
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: testl %eax, %eax
|
|
; X86-NEXT: je .LBB3_3
|
|
; X86-NEXT: # %bb.1: # %while.body.preheader
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: xorl %edx, %edx
|
|
; X86-NEXT: .p2align 4, 0x90
|
|
; X86-NEXT: .LBB3_2: # %while.body
|
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X86-NEXT: rdrandl %esi
|
|
; X86-NEXT: movl %esi, (%ecx,%edx,4)
|
|
; X86-NEXT: incl %edx
|
|
; X86-NEXT: cmpl %edx, %eax
|
|
; X86-NEXT: jne .LBB3_2
|
|
; X86-NEXT: .LBB3_3: # %while.end
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: loop:
|
|
; X64: # %bb.0: # %entry
|
|
; X64-NEXT: testl %esi, %esi
|
|
; X64-NEXT: je .LBB3_3
|
|
; X64-NEXT: # %bb.1: # %while.body.preheader
|
|
; X64-NEXT: movl %esi, %eax
|
|
; X64-NEXT: xorl %ecx, %ecx
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB3_2: # %while.body
|
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X64-NEXT: rdrandl %edx
|
|
; X64-NEXT: movl %edx, (%rdi,%rcx,4)
|
|
; X64-NEXT: incq %rcx
|
|
; X64-NEXT: cmpl %ecx, %eax
|
|
; X64-NEXT: jne .LBB3_2
|
|
; X64-NEXT: .LBB3_3: # %while.end
|
|
; X64-NEXT: retq
|
|
entry:
|
|
%tobool1 = icmp eq i32 %n, 0
|
|
br i1 %tobool1, label %while.end, label %while.body
|
|
|
|
while.body: ; preds = %entry, %while.body
|
|
%p.addr.03 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
|
|
%n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
|
|
%dec = add nsw i32 %n.addr.02, -1
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %p.addr.03, i64 1
|
|
%rand = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
|
|
%v1 = extractvalue { i32, i32 } %rand, 0
|
|
store i32 %v1, i32* %p.addr.03, align 4
|
|
%tobool = icmp eq i32 %dec, 0
|
|
br i1 %tobool, label %while.end, label %while.body
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|