1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00
llvm-mirror/test/CodeGen/RISCV/urem-lkk.ll
Craig Topper a2254d3fcc [RISCV] Teach RISCVMatInt about cases where it can use LUI+SLLI to replace LUI+ADDI+SLLI for large constants.
If we need to shift left anyway we might be able to take advantage
of LUI implicitly shifting its immediate left by 12 to cover part
of the shift. This allows us to use more bits of the LUI immediate
to avoid an ADDI.

isDesirableToCommuteWithShift now considers compressed instruction
opportunities when deciding if commuting should be allowed.

I believe this is the same or similar to one of the optimizations
from D79492.

Reviewed By: luismarques, arcbbb

Differential Revision: https://reviews.llvm.org/D105417
2021-07-20 09:22:06 -07:00

286 lines
8.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s
define i32 @fold_urem_positive_odd(i32 %x) nounwind {
; RV32I-LABEL: fold_urem_positive_odd:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: addi a1, zero, 95
; RV32I-NEXT: call __umodsi3@plt
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IM-LABEL: fold_urem_positive_odd:
; RV32IM: # %bb.0:
; RV32IM-NEXT: lui a1, 364242
; RV32IM-NEXT: addi a1, a1, 777
; RV32IM-NEXT: mulhu a1, a0, a1
; RV32IM-NEXT: sub a2, a0, a1
; RV32IM-NEXT: srli a2, a2, 1
; RV32IM-NEXT: add a1, a2, a1
; RV32IM-NEXT: srli a1, a1, 6
; RV32IM-NEXT: addi a2, zero, 95
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: sub a0, a0, a1
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_urem_positive_odd:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: addi a1, zero, 95
; RV64I-NEXT: call __umoddi3@plt
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: fold_urem_positive_odd:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: lui a2, 364242
; RV64IM-NEXT: addiw a2, a2, 777
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: sub a2, a0, a1
; RV64IM-NEXT: srliw a2, a2, 1
; RV64IM-NEXT: add a1, a2, a1
; RV64IM-NEXT: srli a1, a1, 6
; RV64IM-NEXT: addi a2, zero, 95
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
%1 = urem i32 %x, 95
ret i32 %1
}
define i32 @fold_urem_positive_even(i32 %x) nounwind {
; RV32I-LABEL: fold_urem_positive_even:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: addi a1, zero, 1060
; RV32I-NEXT: call __umodsi3@plt
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IM-LABEL: fold_urem_positive_even:
; RV32IM: # %bb.0:
; RV32IM-NEXT: lui a1, 1012964
; RV32IM-NEXT: addi a1, a1, -61
; RV32IM-NEXT: mulhu a1, a0, a1
; RV32IM-NEXT: srli a1, a1, 10
; RV32IM-NEXT: addi a2, zero, 1060
; RV32IM-NEXT: mul a1, a1, a2
; RV32IM-NEXT: sub a0, a0, a1
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_urem_positive_even:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: addi a1, zero, 1060
; RV64I-NEXT: call __umoddi3@plt
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: fold_urem_positive_even:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: lui a2, 253241
; RV64IM-NEXT: slli a2, a2, 2
; RV64IM-NEXT: addi a2, a2, -61
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 42
; RV64IM-NEXT: addi a2, zero, 1060
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
%1 = urem i32 %x, 1060
ret i32 %1
}
; Don't fold if we can combine urem with udiv.
define i32 @combine_urem_udiv(i32 %x) nounwind {
; RV32I-LABEL: combine_urem_udiv:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: addi a1, zero, 95
; RV32I-NEXT: call __umodsi3@plt
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: addi a1, zero, 95
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __udivsi3@plt
; RV32I-NEXT: add a0, s1, a0
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IM-LABEL: combine_urem_udiv:
; RV32IM: # %bb.0:
; RV32IM-NEXT: lui a1, 364242
; RV32IM-NEXT: addi a1, a1, 777
; RV32IM-NEXT: mulhu a1, a0, a1
; RV32IM-NEXT: sub a2, a0, a1
; RV32IM-NEXT: srli a2, a2, 1
; RV32IM-NEXT: add a1, a2, a1
; RV32IM-NEXT: srli a1, a1, 6
; RV32IM-NEXT: addi a2, zero, 95
; RV32IM-NEXT: mul a2, a1, a2
; RV32IM-NEXT: sub a0, a0, a2
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: ret
;
; RV64I-LABEL: combine_urem_udiv:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli s0, a0, 32
; RV64I-NEXT: addi a1, zero, 95
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __umoddi3@plt
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: addi a1, zero, 95
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __udivdi3@plt
; RV64I-NEXT: add a0, s1, a0
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
;
; RV64IM-LABEL: combine_urem_udiv:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: lui a2, 364242
; RV64IM-NEXT: addiw a2, a2, 777
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
; RV64IM-NEXT: sub a2, a0, a1
; RV64IM-NEXT: srliw a2, a2, 1
; RV64IM-NEXT: add a1, a2, a1
; RV64IM-NEXT: srli a1, a1, 6
; RV64IM-NEXT: addi a2, zero, 95
; RV64IM-NEXT: mul a2, a1, a2
; RV64IM-NEXT: sub a0, a0, a2
; RV64IM-NEXT: addw a0, a0, a1
; RV64IM-NEXT: ret
%1 = urem i32 %x, 95
%2 = udiv i32 %x, 95
%3 = add i32 %1, %2
ret i32 %3
}
; Don't fold for divisors that are a power of two.
define i32 @dont_fold_urem_power_of_two(i32 %x) nounwind {
; CHECK-LABEL: dont_fold_urem_power_of_two:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 63
; CHECK-NEXT: ret
%1 = urem i32 %x, 64
ret i32 %1
}
; Don't fold if the divisor is one.
define i32 @dont_fold_urem_one(i32 %x) nounwind {
; CHECK-LABEL: dont_fold_urem_one:
; CHECK: # %bb.0:
; CHECK-NEXT: mv a0, zero
; CHECK-NEXT: ret
%1 = urem i32 %x, 1
ret i32 %1
}
; Don't fold if the divisor is 2^32.
define i32 @dont_fold_urem_i32_umax(i32 %x) nounwind {
; CHECK-LABEL: dont_fold_urem_i32_umax:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
%1 = urem i32 %x, 4294967296
ret i32 %1
}
; Don't fold i64 urem
define i64 @dont_fold_urem_i64(i64 %x) nounwind {
; RV32I-LABEL: dont_fold_urem_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: addi a2, zero, 98
; RV32I-NEXT: mv a3, zero
; RV32I-NEXT: call __umoddi3@plt
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32IM-LABEL: dont_fold_urem_i64:
; RV32IM: # %bb.0:
; RV32IM-NEXT: addi sp, sp, -16
; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: addi a2, zero, 98
; RV32IM-NEXT: mv a3, zero
; RV32IM-NEXT: call __umoddi3@plt
; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IM-NEXT: addi sp, sp, 16
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_urem_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi a1, zero, 98
; RV64I-NEXT: call __umoddi3@plt
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: dont_fold_urem_i64:
; RV64IM: # %bb.0:
; RV64IM-NEXT: srli a1, a0, 1
; RV64IM-NEXT: lui a2, 2675
; RV64IM-NEXT: addiw a2, a2, -251
; RV64IM-NEXT: slli a2, a2, 13
; RV64IM-NEXT: addi a2, a2, 1839
; RV64IM-NEXT: slli a2, a2, 13
; RV64IM-NEXT: addi a2, a2, 167
; RV64IM-NEXT: slli a2, a2, 13
; RV64IM-NEXT: addi a2, a2, 1505
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 4
; RV64IM-NEXT: addi a2, zero, 98
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: sub a0, a0, a1
; RV64IM-NEXT: ret
%1 = urem i64 %x, 98
ret i64 %1
}