1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[RISCV] Add tests for __builtin_parity idiom.

We use (and (ctpop X), 1) to represent parity.

The generated code for i32 parity on RV64 has more instructions than
necessary which I hope to improve in a followup patch.

Also add missing test for i64 ctpop.
This commit is contained in:
Craig Topper 2021-06-27 11:23:56 -07:00
parent a772deb8d1
commit 1d784e01f0

View File

@ -13,6 +13,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
define i16 @test_bswap_i16(i16 %a) nounwind {
; RV32I-LABEL: test_bswap_i16:
@ -1169,3 +1170,190 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
}
define i64 @test_ctpop_i64(i64 %a) nounwind {
; RV32I-LABEL: test_ctpop_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: srli a0, a1, 1
; RV32I-NEXT: lui a2, 349525
; RV32I-NEXT: addi s3, a2, 1365
; RV32I-NEXT: and a0, a0, s3
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: lui a1, 209715
; RV32I-NEXT: addi s0, a1, 819
; RV32I-NEXT: and a1, a0, s0
; RV32I-NEXT: srli a0, a0, 2
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi s4, a1, -241
; RV32I-NEXT: and a0, a0, s4
; RV32I-NEXT: lui a1, 4112
; RV32I-NEXT: addi s1, a1, 257
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: call __mulsi3@plt
; RV32I-NEXT: srli s5, a0, 24
; RV32I-NEXT: srli a0, s2, 1
; RV32I-NEXT: and a0, a0, s3
; RV32I-NEXT: sub a0, s2, a0
; RV32I-NEXT: and a1, a0, s0
; RV32I-NEXT: srli a0, a0, 2
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: add a0, a1, a0
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: and a0, a0, s4
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: call __mulsi3@plt
; RV32I-NEXT: srli a0, a0, 24
; RV32I-NEXT: add a0, a0, s5
; RV32I-NEXT: mv a1, zero
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctpop_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: lui a2, 21845
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 1365
; RV64I-NEXT: slli a2, a2, 12
; RV64I-NEXT: addi a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 13107
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: slli a1, a1, 12
; RV64I-NEXT: addi a1, a1, 819
; RV64I-NEXT: slli a1, a1, 12
; RV64I-NEXT: addi a1, a1, 819
; RV64I-NEXT: slli a1, a1, 12
; RV64I-NEXT: addi a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: lui a1, 3855
; RV64I-NEXT: addiw a1, a1, 241
; RV64I-NEXT: slli a1, a1, 12
; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: slli a1, a1, 12
; RV64I-NEXT: addi a1, a1, 241
; RV64I-NEXT: slli a1, a1, 12
; RV64I-NEXT: addi a1, a1, -241
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 4112
; RV64I-NEXT: addiw a1, a1, 257
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: addi a1, a1, 257
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: addi a1, a1, 257
; RV64I-NEXT: call __muldi3@plt
; RV64I-NEXT: srli a0, a0, 56
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %1
}
define i32 @test_parity_i32(i32 %a) {
; RV32I-LABEL: test_parity_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: andi a0, a0, 1
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_parity_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 32
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: srliw a0, a0, 16
; RV64I-NEXT: xor a0, a1, a0
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
%2 = and i32 %1, 1
ret i32 %2
}
define i64 @test_parity_i64(i64 %a) {
; RV32I-LABEL: test_parity_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 4
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 2
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: srli a1, a0, 1
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: andi a0, a0, 1
; RV32I-NEXT: mv a1, zero
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_parity_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: srli a1, a0, 32
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 16
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 4
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 2
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
%2 = and i64 %1, 1
ret i64 %2
}