mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
030525f5bf
For NVPTX, try to use 32-bit division instead of 64-bit division when the dividend and divisor fit in 32 bits. This speeds up some internal benchmarks significantly. The underlying reason is that many index computations are carried out in 64-bits but never actually exceed the capacity of a 32-bit word. llvm-svn: 244684
81 lines
1.7 KiB
LLVM
81 lines
1.7 KiB
LLVM
; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
|
|
|
|
; 64-bit divides and rems should be split into a fast and slow path where
|
|
; the fast path uses a 32-bit operation.
|
|
|
|
define void @sdiv64(i64 %a, i64 %b, i64* %retptr) {
|
|
; CHECK-LABEL: sdiv64(
|
|
; CHECK: div.s64
|
|
; CHECK: div.u32
|
|
; CHECK: ret
|
|
%d = sdiv i64 %a, %b
|
|
store i64 %d, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @udiv64(i64 %a, i64 %b, i64* %retptr) {
|
|
; CHECK-LABEL: udiv64(
|
|
; CHECK: div.u64
|
|
; CHECK: div.u32
|
|
; CHECK: ret
|
|
%d = udiv i64 %a, %b
|
|
store i64 %d, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @srem64(i64 %a, i64 %b, i64* %retptr) {
|
|
; CHECK-LABEL: srem64(
|
|
; CHECK: rem.s64
|
|
; CHECK: rem.u32
|
|
; CHECK: ret
|
|
%d = srem i64 %a, %b
|
|
store i64 %d, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @urem64(i64 %a, i64 %b, i64* %retptr) {
|
|
; CHECK-LABEL: urem64(
|
|
; CHECK: rem.u64
|
|
; CHECK: rem.u32
|
|
; CHECK: ret
|
|
%d = urem i64 %a, %b
|
|
store i64 %d, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @sdiv32(i32 %a, i32 %b, i32* %retptr) {
|
|
; CHECK-LABEL: sdiv32(
|
|
; CHECK: div.s32
|
|
; CHECK-NOT: div.
|
|
%d = sdiv i32 %a, %b
|
|
store i32 %d, i32* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @udiv32(i32 %a, i32 %b, i32* %retptr) {
|
|
; CHECK-LABEL: udiv32(
|
|
; CHECK: div.u32
|
|
; CHECK-NOT: div.
|
|
%d = udiv i32 %a, %b
|
|
store i32 %d, i32* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @srem32(i32 %a, i32 %b, i32* %retptr) {
|
|
; CHECK-LABEL: srem32(
|
|
; CHECK: rem.s32
|
|
; CHECK-NOT: rem.
|
|
%d = srem i32 %a, %b
|
|
store i32 %d, i32* %retptr
|
|
ret void
|
|
}
|
|
|
|
define void @urem32(i32 %a, i32 %b, i32* %retptr) {
|
|
; CHECK-LABEL: urem32(
|
|
; CHECK: rem.u32
|
|
; CHECK-NOT: rem.
|
|
%d = urem i32 %a, %b
|
|
store i32 %d, i32* %retptr
|
|
ret void
|
|
}
|