mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Allow targets to prefer TypeSplitVector over TypePromoteInteger when computing the legalization method for vectors
For some targets, it is desirable to prefer scalarizing <N x i1> instead of promoting to a larger legal type, such as <N x i32>. llvm-svn: 168882
This commit is contained in:
parent
fafa2ae4b6
commit
c9fa05b437
@ -159,6 +159,11 @@ public:
|
|||||||
|
|
||||||
virtual bool isSelectSupported(SelectSupportKind kind) const { return true; }
|
virtual bool isSelectSupported(SelectSupportKind kind) const { return true; }
|
||||||
|
|
||||||
|
/// shouldSplitVectorElementType - Return true if a vector of the given type
|
||||||
|
/// should be split (TypeSplitVector) instead of promoted
|
||||||
|
/// (TypePromoteInteger) during type legalization.
|
||||||
|
virtual bool shouldSplitVectorElementType(EVT VT) const { return false; }
|
||||||
|
|
||||||
/// isIntDivCheap() - Return true if integer divide is usually cheaper than
|
/// isIntDivCheap() - Return true if integer divide is usually cheaper than
|
||||||
/// a sequence of several shifts, adds, and multiplies for this target.
|
/// a sequence of several shifts, adds, and multiplies for this target.
|
||||||
bool isIntDivCheap() const { return IntDivIsCheap; }
|
bool isIntDivCheap() const { return IntDivIsCheap; }
|
||||||
|
@ -825,7 +825,7 @@ void TargetLowering::computeRegisterProperties() {
|
|||||||
// that wider vector type.
|
// that wider vector type.
|
||||||
EVT EltVT = VT.getVectorElementType();
|
EVT EltVT = VT.getVectorElementType();
|
||||||
unsigned NElts = VT.getVectorNumElements();
|
unsigned NElts = VT.getVectorNumElements();
|
||||||
if (NElts != 1) {
|
if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
|
||||||
bool IsLegalWiderType = false;
|
bool IsLegalWiderType = false;
|
||||||
// First try to promote the elements of integer vectors. If no legal
|
// First try to promote the elements of integer vectors. If no legal
|
||||||
// promotion was found, fallback to the widen-vector method.
|
// promotion was found, fallback to the widen-vector method.
|
||||||
|
@ -271,6 +271,9 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
|
||||||
|
return VT == MVT::i1;
|
||||||
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
@ -92,6 +92,8 @@ public:
|
|||||||
virtual unsigned getFunctionAlignment(const Function *F) const;
|
virtual unsigned getFunctionAlignment(const Function *F) const;
|
||||||
|
|
||||||
virtual EVT getSetCCResultType(EVT VT) const {
|
virtual EVT getSetCCResultType(EVT VT) const {
|
||||||
|
if (VT.isVector())
|
||||||
|
return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
|
||||||
return MVT::i1;
|
return MVT::i1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,6 +131,8 @@ public:
|
|||||||
return MVT::i32;
|
return MVT::i32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool shouldSplitVectorElementType(EVT VT) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
|
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
|
||||||
|
|
||||||
|
19
test/CodeGen/NVPTX/vector-compare.ll
Normal file
19
test/CodeGen/NVPTX/vector-compare.ll
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
; RUN: llc < %s -march=nvptx -mcpu=sm_20
|
||||||
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
|
||||||
|
|
||||||
|
; This test makes sure that the result of vector compares are properly
|
||||||
|
; scalarized. If codegen fails, then the type legalizer incorrectly
|
||||||
|
; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
|
||||||
|
|
||||||
|
define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
|
||||||
|
%aval = load <2 x i32>* %a
|
||||||
|
%bval = load <2 x i32>* %b
|
||||||
|
%res = icmp slt <2 x i32> %aval, %bval
|
||||||
|
%t1 = extractelement <2 x i1> %res, i32 0
|
||||||
|
%t2 = extractelement <2 x i1> %res, i32 1
|
||||||
|
%t1a = zext i1 %t1 to i32
|
||||||
|
%t2a = zext i1 %t2 to i32
|
||||||
|
store i32 %t1a, i32* %r1
|
||||||
|
store i32 %t2a, i32* %r2
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user