mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
acb413fc74
This patch custom lowers ISD::VSCALE into a csrr vlenb followed by a shift right by 3 followed by a multiply by the scale amount. I've added computeKnownBits support to indicate that the csrr vlenb always produces 3 trailng bits of 0s so the shift right is "exact". This allows the shift and multiply sequence to be nicely optimized into a single shift or removed completely when the scale amount is a power of 2. The non power of 2 case multiplying by 24 is still producing suboptimal code. We could remove the right shift and use a multiply by 3. Hopefully we can improve DAG combine to fix that since it's not unique to this sequence. This replaces D94144. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D94249
55 lines
1.3 KiB
LLVM
55 lines
1.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
|
|
; RUN: | FileCheck %s
|
|
|
|
define i32 @vscale_zero() nounwind {
|
|
; CHECK-LABEL: vscale_zero:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: mv a0, zero
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call i32 @llvm.vscale.i32()
|
|
%1 = mul i32 %0, 0
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @vscale_one() nounwind {
|
|
; CHECK-LABEL: vscale_one:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call i32 @llvm.vscale.i32()
|
|
%1 = mul i32 %0, 1
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @vscale_uimmpow2xlen() nounwind {
|
|
; CHECK-LABEL: vscale_uimmpow2xlen:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 3
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call i32 @llvm.vscale.i32()
|
|
%1 = mul i32 %0, 64
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @vscale_non_pow2() nounwind {
|
|
; CHECK-LABEL: vscale_non_pow2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: srli a0, a0, 3
|
|
; CHECK-NEXT: addi a1, zero, 24
|
|
; CHECK-NEXT: mul a0, a0, a1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call i32 @llvm.vscale.i32()
|
|
%1 = mul i32 %0, 24
|
|
ret i32 %1
|
|
}
|
|
|
|
declare i32 @llvm.vscale.i32()
|