mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[NVPTX] Add support for cttz/ctlz/ctpop
llvm-svn: 185176
This commit is contained in:
parent
d365a376eb
commit
6feb5e8392
@ -216,6 +216,22 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
||||
// Custom handling for i8 intrinsics
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
|
||||
|
||||
setOperationAction(ISD::CTLZ, MVT::i16, Legal);
|
||||
setOperationAction(ISD::CTLZ, MVT::i32, Legal);
|
||||
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
|
||||
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
|
||||
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
|
||||
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
|
||||
setOperationAction(ISD::CTPOP, MVT::i16, Legal);
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
|
||||
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
|
||||
|
||||
// Now deduce the information based on the above mentioned
|
||||
// actions
|
||||
computeRegisterProperties();
|
||||
|
@ -2406,6 +2406,64 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
|
||||
"mov.b64\t{{$d1, $d2}}, $s;",
|
||||
[]>;
|
||||
|
||||
// Count leading zeros
|
||||
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
|
||||
"clz.b32\t$d, $a;",
|
||||
[]>;
|
||||
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
||||
"clz.b64\t$d, $a;",
|
||||
[]>;
|
||||
|
||||
// 32-bit has a direct PTX instruction
|
||||
def : Pat<(ctlz Int32Regs:$a),
|
||||
(CLZr32 Int32Regs:$a)>;
|
||||
def : Pat<(ctlz_zero_undef Int32Regs:$a),
|
||||
(CLZr32 Int32Regs:$a)>;
|
||||
|
||||
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
|
||||
// to 64-bit to match the LLVM semantics
|
||||
def : Pat<(ctlz Int64Regs:$a),
|
||||
(CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
|
||||
def : Pat<(ctlz_zero_undef Int64Regs:$a),
|
||||
(CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
|
||||
|
||||
// For 16-bit, we zero-extend to 32-bit, then trunc the result back
|
||||
// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
|
||||
// than 16 bits to store). We also need to subtract 16 because the
|
||||
// high-order 16 zeros were counted.
|
||||
def : Pat<(ctlz Int16Regs:$a),
|
||||
(SUBi16ri (CVT_u16_u32 (CLZr32
|
||||
(CVT_u32_u16 Int16Regs:$a, CvtNONE)),
|
||||
CvtNONE), 16)>;
|
||||
def : Pat<(ctlz_zero_undef Int16Regs:$a),
|
||||
(SUBi16ri (CVT_u16_u32 (CLZr32
|
||||
(CVT_u32_u16 Int16Regs:$a, CvtNONE)),
|
||||
CvtNONE), 16)>;
|
||||
|
||||
// Population count
|
||||
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
|
||||
"popc.b32\t$d, $a;",
|
||||
[]>;
|
||||
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
||||
"popc.b64\t$d, $a;",
|
||||
[]>;
|
||||
|
||||
// 32-bit has a direct PTX instruction
|
||||
def : Pat<(ctpop Int32Regs:$a),
|
||||
(POPCr32 Int32Regs:$a)>;
|
||||
|
||||
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
|
||||
// to 64-bit to match the LLVM semantics
|
||||
def : Pat<(ctpop Int64Regs:$a),
|
||||
(CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
|
||||
|
||||
// For 16-bit, we zero-extend to 32-bit, then trunc the result back
|
||||
// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
|
||||
// than 16 bits to store)
|
||||
def : Pat<(ctpop Int16Regs:$a),
|
||||
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
|
||||
CvtNONE)>;
|
||||
|
||||
// fround f64 -> f32
|
||||
def : Pat<(f32 (fround Float64Regs:$a)),
|
||||
(CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
|
||||
|
44
test/CodeGen/NVPTX/ctlz.ll
Normal file
44
test/CodeGen/NVPTX/ctlz.ll
Normal file
@ -0,0 +1,44 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
declare i16 @llvm.ctlz.i16(i16, i1) readnone
|
||||
declare i32 @llvm.ctlz.i32(i32, i1) readnone
|
||||
declare i64 @llvm.ctlz.i64(i64, i1) readnone
|
||||
|
||||
define i32 @myctpop(i32 %a) {
|
||||
; CHECK: clz.b32
|
||||
%val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i16 @myctpop16(i16 %a) {
|
||||
; CHECK: clz.b32
|
||||
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
define i64 @myctpop64(i64 %a) {
|
||||
; CHECK: clz.b64
|
||||
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
|
||||
ret i64 %val
|
||||
}
|
||||
|
||||
|
||||
define i32 @myctpop_2(i32 %a) {
|
||||
; CHECK: clz.b32
|
||||
%val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i16 @myctpop16_2(i16 %a) {
|
||||
; CHECK: clz.b32
|
||||
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
define i64 @myctpop64_2(i64 %a) {
|
||||
; CHECK: clz.b64
|
||||
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
|
||||
ret i64 %val
|
||||
}
|
25
test/CodeGen/NVPTX/ctpop.ll
Normal file
25
test/CodeGen/NVPTX/ctpop.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
define i32 @myctpop(i32 %a) {
|
||||
; CHECK: popc.b32
|
||||
%val = tail call i32 @llvm.ctpop.i32(i32 %a)
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i16 @myctpop16(i16 %a) {
|
||||
; CHECK: popc.b32
|
||||
%val = tail call i16 @llvm.ctpop.i16(i16 %a)
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
define i64 @myctpop64(i64 %a) {
|
||||
; CHECK: popc.b64
|
||||
%val = tail call i64 @llvm.ctpop.i64(i64 %a)
|
||||
ret i64 %val
|
||||
}
|
||||
|
||||
declare i16 @llvm.ctpop.i16(i16)
|
||||
declare i32 @llvm.ctpop.i32(i32)
|
||||
declare i64 @llvm.ctpop.i64(i64)
|
45
test/CodeGen/NVPTX/cttz.ll
Normal file
45
test/CodeGen/NVPTX/cttz.ll
Normal file
@ -0,0 +1,45 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
declare i16 @llvm.cttz.i16(i16, i1) readnone
|
||||
declare i32 @llvm.cttz.i32(i32, i1) readnone
|
||||
declare i64 @llvm.cttz.i64(i64, i1) readnone
|
||||
|
||||
define i32 @myctpop(i32 %a) {
|
||||
; CHECK: popc.b32
|
||||
%val = call i32 @llvm.cttz.i32(i32 %a, i1 false) readnone
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i16 @myctpop16(i16 %a) {
|
||||
; CHECK: popc.b32
|
||||
%val = call i16 @llvm.cttz.i16(i16 %a, i1 false) readnone
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
define i64 @myctpop64(i64 %a) {
|
||||
; CHECK: popc.b64
|
||||
%val = call i64 @llvm.cttz.i64(i64 %a, i1 false) readnone
|
||||
ret i64 %val
|
||||
}
|
||||
|
||||
|
||||
define i32 @myctpop_2(i32 %a) {
|
||||
; CHECK: popc.b32
|
||||
%val = call i32 @llvm.cttz.i32(i32 %a, i1 true) readnone
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
define i16 @myctpop16_2(i16 %a) {
|
||||
; CHECK: popc.b32
|
||||
%val = call i16 @llvm.cttz.i16(i16 %a, i1 true) readnone
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
define i64 @myctpop64_2(i64 %a) {
|
||||
; CHECK: popc.b64
|
||||
%val = call i64 @llvm.cttz.i64(i64 %a, i1 true) readnone
|
||||
ret i64 %val
|
||||
}
|
Loading…
Reference in New Issue
Block a user