mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
PTX: Always use registers for return values, but use .param space for device
parameters if SM >= 2.0 - Update test cases to be more robust against register allocation changes - Bump up the number of registers to 128 per type - Include Python script to re-generate register file with any number of registers llvm-svn: 133736
This commit is contained in:
parent
acf53a172e
commit
a1dd1dd26e
@ -433,25 +433,16 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
|
||||
|
||||
if (!isKernel) {
|
||||
decl += " (";
|
||||
|
||||
for (PTXMachineFunctionInfo::ret_iterator
|
||||
i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
|
||||
i != e; ++i) {
|
||||
if (i != b) {
|
||||
decl += ", ";
|
||||
}
|
||||
if (ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
|
||||
decl += ".param .b";
|
||||
decl += utostr(*i);
|
||||
decl += " ";
|
||||
decl += RETURN_PREFIX;
|
||||
decl += utostr(++cnt);
|
||||
} else {
|
||||
decl += ".reg .";
|
||||
decl += getRegisterTypeName(*i);
|
||||
decl += " ";
|
||||
decl += getRegisterName(*i);
|
||||
}
|
||||
decl += ".reg .";
|
||||
decl += getRegisterTypeName(*i);
|
||||
decl += " ";
|
||||
decl += getRegisterName(*i);
|
||||
}
|
||||
decl += ")";
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
@ -11,26 +12,18 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Currently, we reserve one register of each type for return values and let
|
||||
// the rest be used for parameters. This is a dirty hack, but I am not sure
|
||||
// how to tell LLVM that registers used for parameter passing cannot be used
|
||||
// for return values.
|
||||
|
||||
// PTX Calling Conventions
|
||||
// PTX Formal Parameter Calling Convention
|
||||
def CC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[P1, P2, P3, P4, P5, P6, P7]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[RH1, RH2, RH3, RH4, RH5, RH6, RH7]>>,
|
||||
CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7]>>,
|
||||
CCIfType<[i64, f64], CCAssignToReg<[RD1, RD2, RD3, RD4, RD5, RD6, RD7]>>
|
||||
CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Return Value Calling Conventions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// PTX Return Value Calling Convention
|
||||
def RetCC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[P0]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[RH0]>>,
|
||||
CCIfType<[i32, f32], CCAssignToReg<[R0]>>,
|
||||
CCIfType<[i64, f64], CCAssignToReg<[RD0]>>
|
||||
CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
|
||||
]>;
|
||||
|
@ -307,49 +307,35 @@ SDValue PTXTargetLowering::
|
||||
|
||||
MachineFunction& MF = DAG.getMachineFunction();
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
|
||||
|
||||
SDValue Flag;
|
||||
|
||||
if (ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
|
||||
// For SM 2.0+, we return arguments in the param space
|
||||
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
|
||||
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
SDValue ParamIndex = DAG.getTargetConstant(i, MVT::i32);
|
||||
SDValue Ops[] = { Chain, ParamIndex, OutVals[i], Flag };
|
||||
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, VTs, Ops,
|
||||
Flag.getNode() ? 4 : 3);
|
||||
Flag = Chain.getValue(1);
|
||||
// Instead of storing a physical register in our argument list, we just
|
||||
// store the total size of the parameter, in bits. The ASM printer
|
||||
// knows how to process this.
|
||||
MFI->addRetReg(Outs[i].VT.getStoreSizeInBits());
|
||||
}
|
||||
} else {
|
||||
// For SM < 2.0, we return arguments in registers
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), RVLocs, *DAG.getContext());
|
||||
// Even though we could use the .param space for return arguments for
|
||||
// device functions if SM >= 2.0 and the number of return arguments is
|
||||
// only 1, we just always use registers since this makes the codegen
|
||||
// easier.
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), RVLocs, *DAG.getContext());
|
||||
|
||||
CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
|
||||
CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
|
||||
|
||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
||||
CCValAssign& VA = RVLocs[i];
|
||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
||||
CCValAssign& VA = RVLocs[i];
|
||||
|
||||
assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
|
||||
assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
|
||||
|
||||
unsigned Reg = VA.getLocReg();
|
||||
unsigned Reg = VA.getLocReg();
|
||||
|
||||
DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
|
||||
DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
|
||||
|
||||
Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
|
||||
|
||||
// Guarantee that all emitted copies are stuck together,
|
||||
// avoiding something bad
|
||||
Flag = Chain.getValue(1);
|
||||
// Guarantee that all emitted copies are stuck together,
|
||||
// avoiding something bad
|
||||
Flag = Chain.getValue(1);
|
||||
|
||||
MFI->addRetReg(Reg);
|
||||
}
|
||||
MFI->addRetReg(Reg);
|
||||
}
|
||||
|
||||
if (Flag.getNode() == 0) {
|
||||
|
@ -26,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo {
|
||||
private:
|
||||
bool is_kernel;
|
||||
std::vector<unsigned> reg_arg, reg_local_var;
|
||||
DenseSet<unsigned> reg_ret;
|
||||
std::vector<unsigned> reg_ret;
|
||||
bool _isDoneAddArg;
|
||||
|
||||
public:
|
||||
@ -40,7 +40,11 @@ public:
|
||||
|
||||
void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
|
||||
void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
|
||||
void addRetReg(unsigned reg) { reg_ret.insert(reg); }
|
||||
void addRetReg(unsigned reg) {
|
||||
if (!isRetReg(reg)) {
|
||||
reg_ret.push_back(reg);
|
||||
}
|
||||
}
|
||||
|
||||
void doneAddArg(void) {
|
||||
_isDoneAddArg = true;
|
||||
@ -51,7 +55,7 @@ public:
|
||||
|
||||
typedef std::vector<unsigned>::const_iterator reg_iterator;
|
||||
typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
|
||||
typedef DenseSet<unsigned>::const_iterator ret_iterator;
|
||||
typedef std::vector<unsigned>::const_iterator ret_iterator;
|
||||
|
||||
bool argRegEmpty() const { return reg_arg.empty(); }
|
||||
int getNumArg() const { return reg_arg.size(); }
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
@ -21,55 +22,534 @@ class PTXReg<string n> : Register<n> {
|
||||
|
||||
///===- Predicate Registers -----------------------------------------------===//
|
||||
|
||||
def P0 : PTXReg<"p0">;
|
||||
def P1 : PTXReg<"p1">;
|
||||
def P2 : PTXReg<"p2">;
|
||||
def P3 : PTXReg<"p3">;
|
||||
def P4 : PTXReg<"p4">;
|
||||
def P5 : PTXReg<"p5">;
|
||||
def P6 : PTXReg<"p6">;
|
||||
def P7 : PTXReg<"p7">;
|
||||
def P0 : PTXReg<"p0">;
|
||||
def P1 : PTXReg<"p1">;
|
||||
def P2 : PTXReg<"p2">;
|
||||
def P3 : PTXReg<"p3">;
|
||||
def P4 : PTXReg<"p4">;
|
||||
def P5 : PTXReg<"p5">;
|
||||
def P6 : PTXReg<"p6">;
|
||||
def P7 : PTXReg<"p7">;
|
||||
def P8 : PTXReg<"p8">;
|
||||
def P9 : PTXReg<"p9">;
|
||||
def P10 : PTXReg<"p10">;
|
||||
def P11 : PTXReg<"p11">;
|
||||
def P12 : PTXReg<"p12">;
|
||||
def P13 : PTXReg<"p13">;
|
||||
def P14 : PTXReg<"p14">;
|
||||
def P15 : PTXReg<"p15">;
|
||||
def P16 : PTXReg<"p16">;
|
||||
def P17 : PTXReg<"p17">;
|
||||
def P18 : PTXReg<"p18">;
|
||||
def P19 : PTXReg<"p19">;
|
||||
def P20 : PTXReg<"p20">;
|
||||
def P21 : PTXReg<"p21">;
|
||||
def P22 : PTXReg<"p22">;
|
||||
def P23 : PTXReg<"p23">;
|
||||
def P24 : PTXReg<"p24">;
|
||||
def P25 : PTXReg<"p25">;
|
||||
def P26 : PTXReg<"p26">;
|
||||
def P27 : PTXReg<"p27">;
|
||||
def P28 : PTXReg<"p28">;
|
||||
def P29 : PTXReg<"p29">;
|
||||
def P30 : PTXReg<"p30">;
|
||||
def P31 : PTXReg<"p31">;
|
||||
def P32 : PTXReg<"p32">;
|
||||
def P33 : PTXReg<"p33">;
|
||||
def P34 : PTXReg<"p34">;
|
||||
def P35 : PTXReg<"p35">;
|
||||
def P36 : PTXReg<"p36">;
|
||||
def P37 : PTXReg<"p37">;
|
||||
def P38 : PTXReg<"p38">;
|
||||
def P39 : PTXReg<"p39">;
|
||||
def P40 : PTXReg<"p40">;
|
||||
def P41 : PTXReg<"p41">;
|
||||
def P42 : PTXReg<"p42">;
|
||||
def P43 : PTXReg<"p43">;
|
||||
def P44 : PTXReg<"p44">;
|
||||
def P45 : PTXReg<"p45">;
|
||||
def P46 : PTXReg<"p46">;
|
||||
def P47 : PTXReg<"p47">;
|
||||
def P48 : PTXReg<"p48">;
|
||||
def P49 : PTXReg<"p49">;
|
||||
def P50 : PTXReg<"p50">;
|
||||
def P51 : PTXReg<"p51">;
|
||||
def P52 : PTXReg<"p52">;
|
||||
def P53 : PTXReg<"p53">;
|
||||
def P54 : PTXReg<"p54">;
|
||||
def P55 : PTXReg<"p55">;
|
||||
def P56 : PTXReg<"p56">;
|
||||
def P57 : PTXReg<"p57">;
|
||||
def P58 : PTXReg<"p58">;
|
||||
def P59 : PTXReg<"p59">;
|
||||
def P60 : PTXReg<"p60">;
|
||||
def P61 : PTXReg<"p61">;
|
||||
def P62 : PTXReg<"p62">;
|
||||
def P63 : PTXReg<"p63">;
|
||||
def P64 : PTXReg<"p64">;
|
||||
def P65 : PTXReg<"p65">;
|
||||
def P66 : PTXReg<"p66">;
|
||||
def P67 : PTXReg<"p67">;
|
||||
def P68 : PTXReg<"p68">;
|
||||
def P69 : PTXReg<"p69">;
|
||||
def P70 : PTXReg<"p70">;
|
||||
def P71 : PTXReg<"p71">;
|
||||
def P72 : PTXReg<"p72">;
|
||||
def P73 : PTXReg<"p73">;
|
||||
def P74 : PTXReg<"p74">;
|
||||
def P75 : PTXReg<"p75">;
|
||||
def P76 : PTXReg<"p76">;
|
||||
def P77 : PTXReg<"p77">;
|
||||
def P78 : PTXReg<"p78">;
|
||||
def P79 : PTXReg<"p79">;
|
||||
def P80 : PTXReg<"p80">;
|
||||
def P81 : PTXReg<"p81">;
|
||||
def P82 : PTXReg<"p82">;
|
||||
def P83 : PTXReg<"p83">;
|
||||
def P84 : PTXReg<"p84">;
|
||||
def P85 : PTXReg<"p85">;
|
||||
def P86 : PTXReg<"p86">;
|
||||
def P87 : PTXReg<"p87">;
|
||||
def P88 : PTXReg<"p88">;
|
||||
def P89 : PTXReg<"p89">;
|
||||
def P90 : PTXReg<"p90">;
|
||||
def P91 : PTXReg<"p91">;
|
||||
def P92 : PTXReg<"p92">;
|
||||
def P93 : PTXReg<"p93">;
|
||||
def P94 : PTXReg<"p94">;
|
||||
def P95 : PTXReg<"p95">;
|
||||
def P96 : PTXReg<"p96">;
|
||||
def P97 : PTXReg<"p97">;
|
||||
def P98 : PTXReg<"p98">;
|
||||
def P99 : PTXReg<"p99">;
|
||||
def P100 : PTXReg<"p100">;
|
||||
def P101 : PTXReg<"p101">;
|
||||
def P102 : PTXReg<"p102">;
|
||||
def P103 : PTXReg<"p103">;
|
||||
def P104 : PTXReg<"p104">;
|
||||
def P105 : PTXReg<"p105">;
|
||||
def P106 : PTXReg<"p106">;
|
||||
def P107 : PTXReg<"p107">;
|
||||
def P108 : PTXReg<"p108">;
|
||||
def P109 : PTXReg<"p109">;
|
||||
def P110 : PTXReg<"p110">;
|
||||
def P111 : PTXReg<"p111">;
|
||||
def P112 : PTXReg<"p112">;
|
||||
def P113 : PTXReg<"p113">;
|
||||
def P114 : PTXReg<"p114">;
|
||||
def P115 : PTXReg<"p115">;
|
||||
def P116 : PTXReg<"p116">;
|
||||
def P117 : PTXReg<"p117">;
|
||||
def P118 : PTXReg<"p118">;
|
||||
def P119 : PTXReg<"p119">;
|
||||
def P120 : PTXReg<"p120">;
|
||||
def P121 : PTXReg<"p121">;
|
||||
def P122 : PTXReg<"p122">;
|
||||
def P123 : PTXReg<"p123">;
|
||||
def P124 : PTXReg<"p124">;
|
||||
def P125 : PTXReg<"p125">;
|
||||
def P126 : PTXReg<"p126">;
|
||||
def P127 : PTXReg<"p127">;
|
||||
|
||||
///===- 16-bit Integer Registers ------------------------------------------===//
|
||||
///===- 16-Bit Registers --------------------------------------------------===//
|
||||
|
||||
def RH0 : PTXReg<"rh0">;
|
||||
def RH1 : PTXReg<"rh1">;
|
||||
def RH2 : PTXReg<"rh2">;
|
||||
def RH3 : PTXReg<"rh3">;
|
||||
def RH4 : PTXReg<"rh4">;
|
||||
def RH5 : PTXReg<"rh5">;
|
||||
def RH6 : PTXReg<"rh6">;
|
||||
def RH7 : PTXReg<"rh7">;
|
||||
def RH0 : PTXReg<"rh0">;
|
||||
def RH1 : PTXReg<"rh1">;
|
||||
def RH2 : PTXReg<"rh2">;
|
||||
def RH3 : PTXReg<"rh3">;
|
||||
def RH4 : PTXReg<"rh4">;
|
||||
def RH5 : PTXReg<"rh5">;
|
||||
def RH6 : PTXReg<"rh6">;
|
||||
def RH7 : PTXReg<"rh7">;
|
||||
def RH8 : PTXReg<"rh8">;
|
||||
def RH9 : PTXReg<"rh9">;
|
||||
def RH10 : PTXReg<"rh10">;
|
||||
def RH11 : PTXReg<"rh11">;
|
||||
def RH12 : PTXReg<"rh12">;
|
||||
def RH13 : PTXReg<"rh13">;
|
||||
def RH14 : PTXReg<"rh14">;
|
||||
def RH15 : PTXReg<"rh15">;
|
||||
def RH16 : PTXReg<"rh16">;
|
||||
def RH17 : PTXReg<"rh17">;
|
||||
def RH18 : PTXReg<"rh18">;
|
||||
def RH19 : PTXReg<"rh19">;
|
||||
def RH20 : PTXReg<"rh20">;
|
||||
def RH21 : PTXReg<"rh21">;
|
||||
def RH22 : PTXReg<"rh22">;
|
||||
def RH23 : PTXReg<"rh23">;
|
||||
def RH24 : PTXReg<"rh24">;
|
||||
def RH25 : PTXReg<"rh25">;
|
||||
def RH26 : PTXReg<"rh26">;
|
||||
def RH27 : PTXReg<"rh27">;
|
||||
def RH28 : PTXReg<"rh28">;
|
||||
def RH29 : PTXReg<"rh29">;
|
||||
def RH30 : PTXReg<"rh30">;
|
||||
def RH31 : PTXReg<"rh31">;
|
||||
def RH32 : PTXReg<"rh32">;
|
||||
def RH33 : PTXReg<"rh33">;
|
||||
def RH34 : PTXReg<"rh34">;
|
||||
def RH35 : PTXReg<"rh35">;
|
||||
def RH36 : PTXReg<"rh36">;
|
||||
def RH37 : PTXReg<"rh37">;
|
||||
def RH38 : PTXReg<"rh38">;
|
||||
def RH39 : PTXReg<"rh39">;
|
||||
def RH40 : PTXReg<"rh40">;
|
||||
def RH41 : PTXReg<"rh41">;
|
||||
def RH42 : PTXReg<"rh42">;
|
||||
def RH43 : PTXReg<"rh43">;
|
||||
def RH44 : PTXReg<"rh44">;
|
||||
def RH45 : PTXReg<"rh45">;
|
||||
def RH46 : PTXReg<"rh46">;
|
||||
def RH47 : PTXReg<"rh47">;
|
||||
def RH48 : PTXReg<"rh48">;
|
||||
def RH49 : PTXReg<"rh49">;
|
||||
def RH50 : PTXReg<"rh50">;
|
||||
def RH51 : PTXReg<"rh51">;
|
||||
def RH52 : PTXReg<"rh52">;
|
||||
def RH53 : PTXReg<"rh53">;
|
||||
def RH54 : PTXReg<"rh54">;
|
||||
def RH55 : PTXReg<"rh55">;
|
||||
def RH56 : PTXReg<"rh56">;
|
||||
def RH57 : PTXReg<"rh57">;
|
||||
def RH58 : PTXReg<"rh58">;
|
||||
def RH59 : PTXReg<"rh59">;
|
||||
def RH60 : PTXReg<"rh60">;
|
||||
def RH61 : PTXReg<"rh61">;
|
||||
def RH62 : PTXReg<"rh62">;
|
||||
def RH63 : PTXReg<"rh63">;
|
||||
def RH64 : PTXReg<"rh64">;
|
||||
def RH65 : PTXReg<"rh65">;
|
||||
def RH66 : PTXReg<"rh66">;
|
||||
def RH67 : PTXReg<"rh67">;
|
||||
def RH68 : PTXReg<"rh68">;
|
||||
def RH69 : PTXReg<"rh69">;
|
||||
def RH70 : PTXReg<"rh70">;
|
||||
def RH71 : PTXReg<"rh71">;
|
||||
def RH72 : PTXReg<"rh72">;
|
||||
def RH73 : PTXReg<"rh73">;
|
||||
def RH74 : PTXReg<"rh74">;
|
||||
def RH75 : PTXReg<"rh75">;
|
||||
def RH76 : PTXReg<"rh76">;
|
||||
def RH77 : PTXReg<"rh77">;
|
||||
def RH78 : PTXReg<"rh78">;
|
||||
def RH79 : PTXReg<"rh79">;
|
||||
def RH80 : PTXReg<"rh80">;
|
||||
def RH81 : PTXReg<"rh81">;
|
||||
def RH82 : PTXReg<"rh82">;
|
||||
def RH83 : PTXReg<"rh83">;
|
||||
def RH84 : PTXReg<"rh84">;
|
||||
def RH85 : PTXReg<"rh85">;
|
||||
def RH86 : PTXReg<"rh86">;
|
||||
def RH87 : PTXReg<"rh87">;
|
||||
def RH88 : PTXReg<"rh88">;
|
||||
def RH89 : PTXReg<"rh89">;
|
||||
def RH90 : PTXReg<"rh90">;
|
||||
def RH91 : PTXReg<"rh91">;
|
||||
def RH92 : PTXReg<"rh92">;
|
||||
def RH93 : PTXReg<"rh93">;
|
||||
def RH94 : PTXReg<"rh94">;
|
||||
def RH95 : PTXReg<"rh95">;
|
||||
def RH96 : PTXReg<"rh96">;
|
||||
def RH97 : PTXReg<"rh97">;
|
||||
def RH98 : PTXReg<"rh98">;
|
||||
def RH99 : PTXReg<"rh99">;
|
||||
def RH100 : PTXReg<"rh100">;
|
||||
def RH101 : PTXReg<"rh101">;
|
||||
def RH102 : PTXReg<"rh102">;
|
||||
def RH103 : PTXReg<"rh103">;
|
||||
def RH104 : PTXReg<"rh104">;
|
||||
def RH105 : PTXReg<"rh105">;
|
||||
def RH106 : PTXReg<"rh106">;
|
||||
def RH107 : PTXReg<"rh107">;
|
||||
def RH108 : PTXReg<"rh108">;
|
||||
def RH109 : PTXReg<"rh109">;
|
||||
def RH110 : PTXReg<"rh110">;
|
||||
def RH111 : PTXReg<"rh111">;
|
||||
def RH112 : PTXReg<"rh112">;
|
||||
def RH113 : PTXReg<"rh113">;
|
||||
def RH114 : PTXReg<"rh114">;
|
||||
def RH115 : PTXReg<"rh115">;
|
||||
def RH116 : PTXReg<"rh116">;
|
||||
def RH117 : PTXReg<"rh117">;
|
||||
def RH118 : PTXReg<"rh118">;
|
||||
def RH119 : PTXReg<"rh119">;
|
||||
def RH120 : PTXReg<"rh120">;
|
||||
def RH121 : PTXReg<"rh121">;
|
||||
def RH122 : PTXReg<"rh122">;
|
||||
def RH123 : PTXReg<"rh123">;
|
||||
def RH124 : PTXReg<"rh124">;
|
||||
def RH125 : PTXReg<"rh125">;
|
||||
def RH126 : PTXReg<"rh126">;
|
||||
def RH127 : PTXReg<"rh127">;
|
||||
|
||||
///===- 32-bit Integer Registers ------------------------------------------===//
|
||||
///===- 32-Bit Registers --------------------------------------------------===//
|
||||
|
||||
def R0 : PTXReg<"r0">;
|
||||
def R1 : PTXReg<"r1">;
|
||||
def R2 : PTXReg<"r2">;
|
||||
def R3 : PTXReg<"r3">;
|
||||
def R4 : PTXReg<"r4">;
|
||||
def R5 : PTXReg<"r5">;
|
||||
def R6 : PTXReg<"r6">;
|
||||
def R7 : PTXReg<"r7">;
|
||||
def R0 : PTXReg<"r0">;
|
||||
def R1 : PTXReg<"r1">;
|
||||
def R2 : PTXReg<"r2">;
|
||||
def R3 : PTXReg<"r3">;
|
||||
def R4 : PTXReg<"r4">;
|
||||
def R5 : PTXReg<"r5">;
|
||||
def R6 : PTXReg<"r6">;
|
||||
def R7 : PTXReg<"r7">;
|
||||
def R8 : PTXReg<"r8">;
|
||||
def R9 : PTXReg<"r9">;
|
||||
def R10 : PTXReg<"r10">;
|
||||
def R11 : PTXReg<"r11">;
|
||||
def R12 : PTXReg<"r12">;
|
||||
def R13 : PTXReg<"r13">;
|
||||
def R14 : PTXReg<"r14">;
|
||||
def R15 : PTXReg<"r15">;
|
||||
def R16 : PTXReg<"r16">;
|
||||
def R17 : PTXReg<"r17">;
|
||||
def R18 : PTXReg<"r18">;
|
||||
def R19 : PTXReg<"r19">;
|
||||
def R20 : PTXReg<"r20">;
|
||||
def R21 : PTXReg<"r21">;
|
||||
def R22 : PTXReg<"r22">;
|
||||
def R23 : PTXReg<"r23">;
|
||||
def R24 : PTXReg<"r24">;
|
||||
def R25 : PTXReg<"r25">;
|
||||
def R26 : PTXReg<"r26">;
|
||||
def R27 : PTXReg<"r27">;
|
||||
def R28 : PTXReg<"r28">;
|
||||
def R29 : PTXReg<"r29">;
|
||||
def R30 : PTXReg<"r30">;
|
||||
def R31 : PTXReg<"r31">;
|
||||
def R32 : PTXReg<"r32">;
|
||||
def R33 : PTXReg<"r33">;
|
||||
def R34 : PTXReg<"r34">;
|
||||
def R35 : PTXReg<"r35">;
|
||||
def R36 : PTXReg<"r36">;
|
||||
def R37 : PTXReg<"r37">;
|
||||
def R38 : PTXReg<"r38">;
|
||||
def R39 : PTXReg<"r39">;
|
||||
def R40 : PTXReg<"r40">;
|
||||
def R41 : PTXReg<"r41">;
|
||||
def R42 : PTXReg<"r42">;
|
||||
def R43 : PTXReg<"r43">;
|
||||
def R44 : PTXReg<"r44">;
|
||||
def R45 : PTXReg<"r45">;
|
||||
def R46 : PTXReg<"r46">;
|
||||
def R47 : PTXReg<"r47">;
|
||||
def R48 : PTXReg<"r48">;
|
||||
def R49 : PTXReg<"r49">;
|
||||
def R50 : PTXReg<"r50">;
|
||||
def R51 : PTXReg<"r51">;
|
||||
def R52 : PTXReg<"r52">;
|
||||
def R53 : PTXReg<"r53">;
|
||||
def R54 : PTXReg<"r54">;
|
||||
def R55 : PTXReg<"r55">;
|
||||
def R56 : PTXReg<"r56">;
|
||||
def R57 : PTXReg<"r57">;
|
||||
def R58 : PTXReg<"r58">;
|
||||
def R59 : PTXReg<"r59">;
|
||||
def R60 : PTXReg<"r60">;
|
||||
def R61 : PTXReg<"r61">;
|
||||
def R62 : PTXReg<"r62">;
|
||||
def R63 : PTXReg<"r63">;
|
||||
def R64 : PTXReg<"r64">;
|
||||
def R65 : PTXReg<"r65">;
|
||||
def R66 : PTXReg<"r66">;
|
||||
def R67 : PTXReg<"r67">;
|
||||
def R68 : PTXReg<"r68">;
|
||||
def R69 : PTXReg<"r69">;
|
||||
def R70 : PTXReg<"r70">;
|
||||
def R71 : PTXReg<"r71">;
|
||||
def R72 : PTXReg<"r72">;
|
||||
def R73 : PTXReg<"r73">;
|
||||
def R74 : PTXReg<"r74">;
|
||||
def R75 : PTXReg<"r75">;
|
||||
def R76 : PTXReg<"r76">;
|
||||
def R77 : PTXReg<"r77">;
|
||||
def R78 : PTXReg<"r78">;
|
||||
def R79 : PTXReg<"r79">;
|
||||
def R80 : PTXReg<"r80">;
|
||||
def R81 : PTXReg<"r81">;
|
||||
def R82 : PTXReg<"r82">;
|
||||
def R83 : PTXReg<"r83">;
|
||||
def R84 : PTXReg<"r84">;
|
||||
def R85 : PTXReg<"r85">;
|
||||
def R86 : PTXReg<"r86">;
|
||||
def R87 : PTXReg<"r87">;
|
||||
def R88 : PTXReg<"r88">;
|
||||
def R89 : PTXReg<"r89">;
|
||||
def R90 : PTXReg<"r90">;
|
||||
def R91 : PTXReg<"r91">;
|
||||
def R92 : PTXReg<"r92">;
|
||||
def R93 : PTXReg<"r93">;
|
||||
def R94 : PTXReg<"r94">;
|
||||
def R95 : PTXReg<"r95">;
|
||||
def R96 : PTXReg<"r96">;
|
||||
def R97 : PTXReg<"r97">;
|
||||
def R98 : PTXReg<"r98">;
|
||||
def R99 : PTXReg<"r99">;
|
||||
def R100 : PTXReg<"r100">;
|
||||
def R101 : PTXReg<"r101">;
|
||||
def R102 : PTXReg<"r102">;
|
||||
def R103 : PTXReg<"r103">;
|
||||
def R104 : PTXReg<"r104">;
|
||||
def R105 : PTXReg<"r105">;
|
||||
def R106 : PTXReg<"r106">;
|
||||
def R107 : PTXReg<"r107">;
|
||||
def R108 : PTXReg<"r108">;
|
||||
def R109 : PTXReg<"r109">;
|
||||
def R110 : PTXReg<"r110">;
|
||||
def R111 : PTXReg<"r111">;
|
||||
def R112 : PTXReg<"r112">;
|
||||
def R113 : PTXReg<"r113">;
|
||||
def R114 : PTXReg<"r114">;
|
||||
def R115 : PTXReg<"r115">;
|
||||
def R116 : PTXReg<"r116">;
|
||||
def R117 : PTXReg<"r117">;
|
||||
def R118 : PTXReg<"r118">;
|
||||
def R119 : PTXReg<"r119">;
|
||||
def R120 : PTXReg<"r120">;
|
||||
def R121 : PTXReg<"r121">;
|
||||
def R122 : PTXReg<"r122">;
|
||||
def R123 : PTXReg<"r123">;
|
||||
def R124 : PTXReg<"r124">;
|
||||
def R125 : PTXReg<"r125">;
|
||||
def R126 : PTXReg<"r126">;
|
||||
def R127 : PTXReg<"r127">;
|
||||
|
||||
///===- 64-bit Integer Registers ------------------------------------------===//
|
||||
///===- 64-Bit Registers --------------------------------------------------===//
|
||||
|
||||
def RD0 : PTXReg<"rd0">;
|
||||
def RD1 : PTXReg<"rd1">;
|
||||
def RD2 : PTXReg<"rd2">;
|
||||
def RD3 : PTXReg<"rd3">;
|
||||
def RD4 : PTXReg<"rd4">;
|
||||
def RD5 : PTXReg<"rd5">;
|
||||
def RD6 : PTXReg<"rd6">;
|
||||
def RD7 : PTXReg<"rd7">;
|
||||
def RD0 : PTXReg<"rd0">;
|
||||
def RD1 : PTXReg<"rd1">;
|
||||
def RD2 : PTXReg<"rd2">;
|
||||
def RD3 : PTXReg<"rd3">;
|
||||
def RD4 : PTXReg<"rd4">;
|
||||
def RD5 : PTXReg<"rd5">;
|
||||
def RD6 : PTXReg<"rd6">;
|
||||
def RD7 : PTXReg<"rd7">;
|
||||
def RD8 : PTXReg<"rd8">;
|
||||
def RD9 : PTXReg<"rd9">;
|
||||
def RD10 : PTXReg<"rd10">;
|
||||
def RD11 : PTXReg<"rd11">;
|
||||
def RD12 : PTXReg<"rd12">;
|
||||
def RD13 : PTXReg<"rd13">;
|
||||
def RD14 : PTXReg<"rd14">;
|
||||
def RD15 : PTXReg<"rd15">;
|
||||
def RD16 : PTXReg<"rd16">;
|
||||
def RD17 : PTXReg<"rd17">;
|
||||
def RD18 : PTXReg<"rd18">;
|
||||
def RD19 : PTXReg<"rd19">;
|
||||
def RD20 : PTXReg<"rd20">;
|
||||
def RD21 : PTXReg<"rd21">;
|
||||
def RD22 : PTXReg<"rd22">;
|
||||
def RD23 : PTXReg<"rd23">;
|
||||
def RD24 : PTXReg<"rd24">;
|
||||
def RD25 : PTXReg<"rd25">;
|
||||
def RD26 : PTXReg<"rd26">;
|
||||
def RD27 : PTXReg<"rd27">;
|
||||
def RD28 : PTXReg<"rd28">;
|
||||
def RD29 : PTXReg<"rd29">;
|
||||
def RD30 : PTXReg<"rd30">;
|
||||
def RD31 : PTXReg<"rd31">;
|
||||
def RD32 : PTXReg<"rd32">;
|
||||
def RD33 : PTXReg<"rd33">;
|
||||
def RD34 : PTXReg<"rd34">;
|
||||
def RD35 : PTXReg<"rd35">;
|
||||
def RD36 : PTXReg<"rd36">;
|
||||
def RD37 : PTXReg<"rd37">;
|
||||
def RD38 : PTXReg<"rd38">;
|
||||
def RD39 : PTXReg<"rd39">;
|
||||
def RD40 : PTXReg<"rd40">;
|
||||
def RD41 : PTXReg<"rd41">;
|
||||
def RD42 : PTXReg<"rd42">;
|
||||
def RD43 : PTXReg<"rd43">;
|
||||
def RD44 : PTXReg<"rd44">;
|
||||
def RD45 : PTXReg<"rd45">;
|
||||
def RD46 : PTXReg<"rd46">;
|
||||
def RD47 : PTXReg<"rd47">;
|
||||
def RD48 : PTXReg<"rd48">;
|
||||
def RD49 : PTXReg<"rd49">;
|
||||
def RD50 : PTXReg<"rd50">;
|
||||
def RD51 : PTXReg<"rd51">;
|
||||
def RD52 : PTXReg<"rd52">;
|
||||
def RD53 : PTXReg<"rd53">;
|
||||
def RD54 : PTXReg<"rd54">;
|
||||
def RD55 : PTXReg<"rd55">;
|
||||
def RD56 : PTXReg<"rd56">;
|
||||
def RD57 : PTXReg<"rd57">;
|
||||
def RD58 : PTXReg<"rd58">;
|
||||
def RD59 : PTXReg<"rd59">;
|
||||
def RD60 : PTXReg<"rd60">;
|
||||
def RD61 : PTXReg<"rd61">;
|
||||
def RD62 : PTXReg<"rd62">;
|
||||
def RD63 : PTXReg<"rd63">;
|
||||
def RD64 : PTXReg<"rd64">;
|
||||
def RD65 : PTXReg<"rd65">;
|
||||
def RD66 : PTXReg<"rd66">;
|
||||
def RD67 : PTXReg<"rd67">;
|
||||
def RD68 : PTXReg<"rd68">;
|
||||
def RD69 : PTXReg<"rd69">;
|
||||
def RD70 : PTXReg<"rd70">;
|
||||
def RD71 : PTXReg<"rd71">;
|
||||
def RD72 : PTXReg<"rd72">;
|
||||
def RD73 : PTXReg<"rd73">;
|
||||
def RD74 : PTXReg<"rd74">;
|
||||
def RD75 : PTXReg<"rd75">;
|
||||
def RD76 : PTXReg<"rd76">;
|
||||
def RD77 : PTXReg<"rd77">;
|
||||
def RD78 : PTXReg<"rd78">;
|
||||
def RD79 : PTXReg<"rd79">;
|
||||
def RD80 : PTXReg<"rd80">;
|
||||
def RD81 : PTXReg<"rd81">;
|
||||
def RD82 : PTXReg<"rd82">;
|
||||
def RD83 : PTXReg<"rd83">;
|
||||
def RD84 : PTXReg<"rd84">;
|
||||
def RD85 : PTXReg<"rd85">;
|
||||
def RD86 : PTXReg<"rd86">;
|
||||
def RD87 : PTXReg<"rd87">;
|
||||
def RD88 : PTXReg<"rd88">;
|
||||
def RD89 : PTXReg<"rd89">;
|
||||
def RD90 : PTXReg<"rd90">;
|
||||
def RD91 : PTXReg<"rd91">;
|
||||
def RD92 : PTXReg<"rd92">;
|
||||
def RD93 : PTXReg<"rd93">;
|
||||
def RD94 : PTXReg<"rd94">;
|
||||
def RD95 : PTXReg<"rd95">;
|
||||
def RD96 : PTXReg<"rd96">;
|
||||
def RD97 : PTXReg<"rd97">;
|
||||
def RD98 : PTXReg<"rd98">;
|
||||
def RD99 : PTXReg<"rd99">;
|
||||
def RD100 : PTXReg<"rd100">;
|
||||
def RD101 : PTXReg<"rd101">;
|
||||
def RD102 : PTXReg<"rd102">;
|
||||
def RD103 : PTXReg<"rd103">;
|
||||
def RD104 : PTXReg<"rd104">;
|
||||
def RD105 : PTXReg<"rd105">;
|
||||
def RD106 : PTXReg<"rd106">;
|
||||
def RD107 : PTXReg<"rd107">;
|
||||
def RD108 : PTXReg<"rd108">;
|
||||
def RD109 : PTXReg<"rd109">;
|
||||
def RD110 : PTXReg<"rd110">;
|
||||
def RD111 : PTXReg<"rd111">;
|
||||
def RD112 : PTXReg<"rd112">;
|
||||
def RD113 : PTXReg<"rd113">;
|
||||
def RD114 : PTXReg<"rd114">;
|
||||
def RD115 : PTXReg<"rd115">;
|
||||
def RD116 : PTXReg<"rd116">;
|
||||
def RD117 : PTXReg<"rd117">;
|
||||
def RD118 : PTXReg<"rd118">;
|
||||
def RD119 : PTXReg<"rd119">;
|
||||
def RD120 : PTXReg<"rd120">;
|
||||
def RD121 : PTXReg<"rd121">;
|
||||
def RD122 : PTXReg<"rd122">;
|
||||
def RD123 : PTXReg<"rd123">;
|
||||
def RD124 : PTXReg<"rd124">;
|
||||
def RD125 : PTXReg<"rd125">;
|
||||
def RD126 : PTXReg<"rd126">;
|
||||
def RD127 : PTXReg<"rd127">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 7)>;
|
||||
def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 7)>;
|
||||
def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 7)>;
|
||||
def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 7)>;
|
||||
def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 7)>;
|
||||
def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 7)>;
|
||||
def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
|
||||
def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
|
||||
def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
|
||||
def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
|
||||
def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
|
||||
def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
|
||||
|
163
lib/Target/PTX/generate-register-td.py
Executable file
163
lib/Target/PTX/generate-register-td.py
Executable file
@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python
|
||||
##===- generate-register-td.py --------------------------------*-python-*--===##
|
||||
##
|
||||
## The LLVM Compiler Infrastructure
|
||||
##
|
||||
## This file is distributed under the University of Illinois Open Source
|
||||
## License. See LICENSE.TXT for details.
|
||||
##
|
||||
##===----------------------------------------------------------------------===##
|
||||
##
|
||||
## This file describes the PTX register file generator.
|
||||
##
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
from sys import argv, exit, stdout
|
||||
|
||||
|
||||
if len(argv) != 5:
|
||||
print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
num_pred = int(argv[1])
|
||||
num_16bit = int(argv[2])
|
||||
num_32bit = int(argv[3])
|
||||
num_64bit = int(argv[4])
|
||||
except:
|
||||
print('ERROR: Invalid integer parameter')
|
||||
exit(1)
|
||||
|
||||
## Print the register definition file
|
||||
td_file = open('PTXRegisterInfo.td', 'w')
|
||||
|
||||
td_file.write('''
|
||||
//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declarations that describe the PTX register file
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class PTXReg<string n> : Register<n> {
|
||||
let Namespace = "PTX";
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
''')
|
||||
|
||||
|
||||
# Print predicate registers
|
||||
td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
|
||||
for r in range(0, num_pred):
|
||||
td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
|
||||
|
||||
# Print 16-bit registers
|
||||
td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
|
||||
for r in range(0, num_16bit):
|
||||
td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
|
||||
|
||||
# Print 32-bit registers
|
||||
td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
|
||||
for r in range(0, num_32bit):
|
||||
td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
|
||||
|
||||
# Print 64-bit registers
|
||||
td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
|
||||
for r in range(0, num_64bit):
|
||||
td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
|
||||
|
||||
|
||||
td_file.write('''
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
''')
|
||||
|
||||
|
||||
# Print register classes
|
||||
|
||||
td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
|
||||
td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
|
||||
td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
|
||||
td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
|
||||
td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
|
||||
td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
|
||||
|
||||
|
||||
td_file.close()
|
||||
|
||||
## Now write the PTXCallingConv.td file
|
||||
td_file = open('PTXCallingConv.td', 'w')
|
||||
|
||||
# Reserve 10% of the available registers for return values, and the other 90%
|
||||
# for parameters
|
||||
num_ret_pred = int(0.1 * num_pred)
|
||||
num_ret_16bit = int(0.1 * num_16bit)
|
||||
num_ret_32bit = int(0.1 * num_32bit)
|
||||
num_ret_64bit = int(0.1 * num_64bit)
|
||||
num_param_pred = num_pred - num_ret_pred
|
||||
num_param_16bit = num_16bit - num_ret_16bit
|
||||
num_param_32bit = num_32bit - num_ret_32bit
|
||||
num_param_64bit = num_64bit - num_ret_64bit
|
||||
|
||||
param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
|
||||
ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
|
||||
param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
|
||||
ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
|
||||
param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
|
||||
ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)]
|
||||
param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
|
||||
ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
|
||||
|
||||
param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
|
||||
ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
|
||||
param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
|
||||
ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
|
||||
param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
|
||||
ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
|
||||
param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
|
||||
ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
|
||||
|
||||
td_file.write('''
|
||||
//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the calling conventions for the PTX architecture.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// PTX Formal Parameter Calling Convention
|
||||
def CC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
|
||||
]>;
|
||||
|
||||
// PTX Return Value Calling Convention
|
||||
def RetCC_PTX : CallingConv<[
|
||||
CCIfType<[i1], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
|
||||
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
|
||||
]>;
|
||||
''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
|
||||
ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
|
||||
|
||||
|
||||
td_file.close()
|
@ -1,70 +1,70 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||
; CHECK: add.u16 rh0, rh1, rh2;
|
||||
; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i16 %x, %y
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||
; CHECK: add.u32 r0, r1, r2;
|
||||
; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
; CHECK: add.u64 rd0, rd1, rd2;
|
||||
; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i64 %x, %y
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: add.rn.f32 r0, r1, r2
|
||||
; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: add.rn.f64 rd0, rd1, rd2
|
||||
; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: add.u16 rh0, rh1, 1;
|
||||
; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i16 %x, 1
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: add.u32 r0, r1, 1;
|
||||
; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i32 %x, 1
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: add.u64 rd0, rd1, 1;
|
||||
; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i64 %x, 1
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.rn.f32 r0, r1, 0F3F800000;
|
||||
; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F3F800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.rn.f64 rd0, rd1, 0D3FF0000000000000;
|
||||
; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D3FF0000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd double %x, 1.0
|
||||
ret double %z
|
||||
|
23
test/CodeGen/PTX/aggregates.ll
Normal file
23
test/CodeGen/PTX/aggregates.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
||||
|
||||
%complex = type { float, float }
|
||||
|
||||
define ptx_device %complex @complex_add(%complex %a, %complex %b) {
|
||||
entry:
|
||||
; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1];
|
||||
; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3];
|
||||
; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2];
|
||||
; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4];
|
||||
; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]];
|
||||
; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a.real = extractvalue %complex %a, 0
|
||||
%a.imag = extractvalue %complex %a, 1
|
||||
%b.real = extractvalue %complex %b, 0
|
||||
%b.imag = extractvalue %complex %b, 1
|
||||
%ret.real = fadd float %a.real, %b.real
|
||||
%ret.imag = fadd float %a.imag, %b.imag
|
||||
%ret.0 = insertvalue %complex undef, float %ret.real, 0
|
||||
%ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
|
||||
ret %complex %ret.1
|
||||
}
|
@ -3,21 +3,21 @@
|
||||
; preds
|
||||
|
||||
define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
|
||||
; CHECK: and.pred p0, p1, p2
|
||||
; CHECK: and.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
|
||||
%c = and i1 %x, %y
|
||||
%d = zext i1 %c to i32
|
||||
ret i32 %d
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
|
||||
; CHECK: or.pred p0, p1, p2
|
||||
; CHECK: or.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
|
||||
%a = or i1 %x, %y
|
||||
%b = zext i1 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
|
||||
; CHECK: xor.pred p0, p1, p2
|
||||
; CHECK: xor.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
|
||||
%a = xor i1 %x, %y
|
||||
%b = zext i1 %a to i32
|
||||
ret i32 %b
|
||||
|
@ -10,15 +10,15 @@ loop:
|
||||
|
||||
define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
|
||||
entry:
|
||||
; CHECK: setp.le.u32 p0, r1, r2
|
||||
; CHECK: setp.le.u32 p0, r[[R0:[0-9]+]], r[[R1:[0-9]+]]
|
||||
%p = icmp ugt i32 %x, %y
|
||||
; CHECK-NEXT: @p0 bra
|
||||
; CHECK-NOT: bra
|
||||
br i1 %p, label %clause.if, label %clause.else
|
||||
clause.if:
|
||||
; CHECK: mov.u32 r0, r1
|
||||
; CHECK: mov.u32 r{{[0-9]+}}, r[[R0]]
|
||||
ret i32 %x
|
||||
clause.else:
|
||||
; CHECK: mov.u32 r0, r2
|
||||
; CHECK: mov.u32 r{{[0-9]+}}, r[[R1]]
|
||||
ret i32 %y
|
||||
}
|
||||
|
@ -4,9 +4,9 @@
|
||||
; (note: we convert back to i32 to return)
|
||||
|
||||
define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
|
||||
; CHECK: setp.gt.b16 p0, rh1, 0
|
||||
; CHECK-NEXT: and.pred p0, p0, p1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i16 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -15,9 +15,9 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
|
||||
; CHECK: setp.gt.b32 p0, r1, 0
|
||||
; CHECK-NEXT: and.pred p0, p0, p1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i32 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -26,9 +26,9 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
|
||||
; CHECK: setp.gt.b64 p0, rd1, 0
|
||||
; CHECK-NEXT: and.pred p0, p0, p1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.b64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i64 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -37,9 +37,9 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
|
||||
; CHECK: setp.gt.b32 p0, r1, 0
|
||||
; CHECK-NEXT: and.pred p0, p0, p1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui float %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -48,9 +48,9 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
|
||||
; CHECK: setp.gt.b64 p0, rd1, 0
|
||||
; CHECK-NEXT: and.pred p0, p0, p1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.b64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui double %x to i1
|
||||
%b = and i1 %a, %y
|
||||
@ -61,35 +61,35 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
|
||||
; i16
|
||||
|
||||
define ptx_device i16 @cvt_i16_preds(i1 %x) {
|
||||
; CHECK: selp.u16 rh0, 1, 0, p1;
|
||||
; CHECK: selp.u16 rh{{[0-9]+}}, 1, 0, p{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i1 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_i32(i32 %x) {
|
||||
; CHECK: cvt.u16.u32 rh0, r1;
|
||||
; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i32 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_i64(i64 %x) {
|
||||
; CHECK: cvt.u16.u64 rh0, rd1;
|
||||
; CHECK: cvt.u16.u64 rh{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i64 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u16.f32 rh0, r1;
|
||||
; CHECK: cvt.rzi.u16.f32 rh{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui float %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u16.f64 rh0, rd1;
|
||||
; CHECK: cvt.rzi.u16.f64 rh{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui double %x to i16
|
||||
ret i16 %a
|
||||
@ -98,35 +98,35 @@ define ptx_device i16 @cvt_i16_f64(double %x) {
|
||||
; i32
|
||||
|
||||
define ptx_device i32 @cvt_i32_preds(i1 %x) {
|
||||
; CHECK: selp.u32 r0, 1, 0, p1;
|
||||
; CHECK: selp.u32 r{{[0-9]+}}, 1, 0, p{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i1 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_i16(i16 %x) {
|
||||
; CHECK: cvt.u32.u16 r0, rh1;
|
||||
; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i16 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_i64(i64 %x) {
|
||||
; CHECK: cvt.u32.u64 r0, rd1;
|
||||
; CHECK: cvt.u32.u64 r{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = trunc i64 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u32.f32 r0, r1;
|
||||
; CHECK: cvt.rzi.u32.f32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui float %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u32.f64 r0, rd1;
|
||||
; CHECK: cvt.rzi.u32.f64 r{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui double %x to i32
|
||||
ret i32 %a
|
||||
@ -135,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) {
|
||||
; i64
|
||||
|
||||
define ptx_device i64 @cvt_i64_preds(i1 %x) {
|
||||
; CHECK: selp.u64 rd0, 1, 0, p1;
|
||||
; CHECK: selp.u64 rd{{[0-9]+}}, 1, 0, p{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i1 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_i16(i16 %x) {
|
||||
; CHECK: cvt.u64.u16 rd0, rh1;
|
||||
; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i16 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_i32(i32 %x) {
|
||||
; CHECK: cvt.u64.u32 rd0, r1;
|
||||
; CHECK: cvt.u64.u32 rd{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = zext i32 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u64.f32 rd0, r1;
|
||||
; CHECK: cvt.rzi.u64.f32 rd{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui float %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u64.f64 rd0, rd1;
|
||||
; CHECK: cvt.rzi.u64.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i64
|
||||
ret i64 %a
|
||||
@ -172,35 +172,35 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
|
||||
; f32
|
||||
|
||||
define ptx_device float @cvt_f32_preds(i1 %x) {
|
||||
; CHECK: selp.f32 r0, 0F3F800000, 0F00000000, p1;
|
||||
; CHECK: selp.f32 r{{[0-9]+}}, 0F3F800000, 0F00000000, p{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i1 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f32.u16 r0, rh1;
|
||||
; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i16 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i32(i32 %x) {
|
||||
; CHECK: cvt.rn.f32.u32 r0, r1;
|
||||
; CHECK: cvt.rn.f32.u32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i32 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i64(i64 %x) {
|
||||
; CHECK: cvt.rn.f32.u64 r0, rd1;
|
||||
; CHECK: cvt.rn.f32.u64 r{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i64 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_f64(double %x) {
|
||||
; CHECK: cvt.rn.f32.f64 r0, rd1;
|
||||
; CHECK: cvt.rn.f32.f64 r{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptrunc double %x to float
|
||||
ret float %a
|
||||
@ -209,35 +209,35 @@ define ptx_device float @cvt_f32_f64(double %x) {
|
||||
; f64
|
||||
|
||||
define ptx_device double @cvt_f64_preds(i1 %x) {
|
||||
; CHECK: selp.f64 rd0, 0D3F80000000000000, 0D0000000000000000, p1;
|
||||
; CHECK: selp.f64 rd{{[0-9]+}}, 0D3F80000000000000, 0D0000000000000000, p{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i1 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f64.u16 rd0, rh1;
|
||||
; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i16 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i32(i32 %x) {
|
||||
; CHECK: cvt.rn.f64.u32 rd0, r1;
|
||||
; CHECK: cvt.rn.f64.u32 rd{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i32 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i64(i64 %x) {
|
||||
; CHECK: cvt.rn.f64.u64 rd0, rd1;
|
||||
; CHECK: cvt.rn.f64.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i64 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_f32(float %x) {
|
||||
; CHECK: cvt.f64.f32 rd0, r1;
|
||||
; CHECK: cvt.f64.f32 rd{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fpext float %x to double
|
||||
ret double %a
|
||||
|
@ -1,14 +1,14 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.f32 r0, r1, r2;
|
||||
; CHECK: div.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: div.f64 rd0, rd1, rd2;
|
||||
; CHECK: div.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv double %x, %y
|
||||
ret double %a
|
||||
|
@ -1,14 +1,14 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.rn.f32 r0, r1, r2;
|
||||
; CHECK: div.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: div.rn.f64 rd0, rd1, rd2;
|
||||
; CHECK: div.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fdiv double %x, %y
|
||||
ret double %a
|
||||
|
@ -1,14 +1,14 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x) {
|
||||
; CHECK: neg.f32 r0, r1;
|
||||
; CHECK: neg.f32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = fsub float -0.000000e+00, %x
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x) {
|
||||
; CHECK: neg.f64 rd0, rd1;
|
||||
; CHECK: neg.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = fsub double -0.000000e+00, %x
|
||||
ret double %y
|
||||
|
@ -63,7 +63,7 @@
|
||||
|
||||
define ptx_device i16 @t1_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 rh0, [r1];
|
||||
;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load i16* %p
|
||||
ret i16 %x
|
||||
@ -71,7 +71,7 @@ entry:
|
||||
|
||||
define ptx_device i32 @t1_u32(i32* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 r0, [r1];
|
||||
;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load i32* %p
|
||||
ret i32 %x
|
||||
@ -79,7 +79,7 @@ entry:
|
||||
|
||||
define ptx_device i64 @t1_u64(i64* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u64 rd0, [r1];
|
||||
;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load i64* %p
|
||||
ret i64 %x
|
||||
@ -87,7 +87,7 @@ entry:
|
||||
|
||||
define ptx_device float @t1_f32(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 r0, [r1];
|
||||
;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load float* %p
|
||||
ret float %x
|
||||
@ -95,7 +95,7 @@ entry:
|
||||
|
||||
define ptx_device double @t1_f64(double* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 rd0, [r1];
|
||||
;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load double* %p
|
||||
ret double %x
|
||||
@ -103,7 +103,7 @@ entry:
|
||||
|
||||
define ptx_device i16 @t2_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 rh0, [r1+2];
|
||||
;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i16* %p, i32 1
|
||||
%x = load i16* %i
|
||||
@ -112,7 +112,7 @@ entry:
|
||||
|
||||
define ptx_device i32 @t2_u32(i32* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 r0, [r1+4];
|
||||
;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
%x = load i32* %i
|
||||
@ -121,7 +121,7 @@ entry:
|
||||
|
||||
define ptx_device i64 @t2_u64(i64* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u64 rd0, [r1+8];
|
||||
;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i64* %p, i32 1
|
||||
%x = load i64* %i
|
||||
@ -130,7 +130,7 @@ entry:
|
||||
|
||||
define ptx_device float @t2_f32(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 r0, [r1+4];
|
||||
;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
%x = load float* %i
|
||||
@ -139,7 +139,7 @@ entry:
|
||||
|
||||
define ptx_device double @t2_f64(double* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 rd0, [r1+8];
|
||||
;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr double* %p, i32 1
|
||||
%x = load double* %i
|
||||
@ -148,9 +148,9 @@ entry:
|
||||
|
||||
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 1;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.u16 rh0, [r0];
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
|
||||
%i = getelementptr i16* %p, i32 %q
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
@ -158,9 +158,9 @@ entry:
|
||||
|
||||
define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.u32 r0, [r0];
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
@ -168,9 +168,9 @@ entry:
|
||||
|
||||
define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.u64 rd0, [r0];
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
%i = getelementptr i64* %p, i32 %q
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
@ -178,9 +178,9 @@ entry:
|
||||
|
||||
define ptx_device float @t3_f32(float* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.f32 r0, [r0];
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
@ -188,9 +188,9 @@ entry:
|
||||
|
||||
define ptx_device double @t3_f64(double* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.f64 rd0, [r0];
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
%i = getelementptr double* %p, i32 %q
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
@ -198,8 +198,8 @@ entry:
|
||||
|
||||
define ptx_device i16 @t4_global_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i16;
|
||||
;CHECK-NEXT: ld.global.u16 rh0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
|
||||
%x = load i16* %i
|
||||
@ -208,8 +208,8 @@ entry:
|
||||
|
||||
define ptx_device i32 @t4_global_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i32;
|
||||
;CHECK-NEXT: ld.global.u32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||
%x = load i32* %i
|
||||
@ -218,8 +218,8 @@ entry:
|
||||
|
||||
define ptx_device i64 @t4_global_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i64;
|
||||
;CHECK-NEXT: ld.global.u64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||
%x = load i64* %i
|
||||
@ -228,8 +228,8 @@ entry:
|
||||
|
||||
define ptx_device float @t4_global_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_float;
|
||||
;CHECK-NEXT: ld.global.f32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
|
||||
;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||
%x = load float* %i
|
||||
@ -238,8 +238,8 @@ entry:
|
||||
|
||||
define ptx_device double @t4_global_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_double;
|
||||
;CHECK-NEXT: ld.global.f64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
|
||||
;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||
%x = load double* %i
|
||||
@ -248,8 +248,8 @@ entry:
|
||||
|
||||
define ptx_device i16 @t4_const_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_constant_i16;
|
||||
;CHECK-NEXT: ld.const.u16 rh0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16;
|
||||
;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(1)* %i
|
||||
@ -258,8 +258,8 @@ entry:
|
||||
|
||||
define ptx_device i32 @t4_const_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_constant_i32;
|
||||
;CHECK-NEXT: ld.const.u32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32;
|
||||
;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(1)* %i
|
||||
@ -268,8 +268,8 @@ entry:
|
||||
|
||||
define ptx_device i64 @t4_const_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_constant_i64;
|
||||
;CHECK-NEXT: ld.const.u64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64;
|
||||
;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(1)* %i
|
||||
@ -278,8 +278,8 @@ entry:
|
||||
|
||||
define ptx_device float @t4_const_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_constant_float;
|
||||
;CHECK-NEXT: ld.const.f32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float;
|
||||
;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
|
||||
%x = load float addrspace(1)* %i
|
||||
@ -288,8 +288,8 @@ entry:
|
||||
|
||||
define ptx_device double @t4_const_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_constant_double;
|
||||
;CHECK-NEXT: ld.const.f64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double;
|
||||
;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
|
||||
%x = load double addrspace(1)* %i
|
||||
@ -298,8 +298,8 @@ entry:
|
||||
|
||||
define ptx_device i16 @t4_local_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_i16;
|
||||
;CHECK-NEXT: ld.local.u16 rh0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
|
||||
;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(2)* %i
|
||||
@ -308,8 +308,8 @@ entry:
|
||||
|
||||
define ptx_device i32 @t4_local_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_i32;
|
||||
;CHECK-NEXT: ld.local.u32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
|
||||
;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(2)* %i
|
||||
@ -318,8 +318,8 @@ entry:
|
||||
|
||||
define ptx_device i64 @t4_local_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_i64;
|
||||
;CHECK-NEXT: ld.local.u64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
|
||||
;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(2)* %i
|
||||
@ -328,8 +328,8 @@ entry:
|
||||
|
||||
define ptx_device float @t4_local_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_float;
|
||||
;CHECK-NEXT: ld.local.f32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
|
||||
;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||
%x = load float addrspace(2)* %i
|
||||
@ -338,8 +338,8 @@ entry:
|
||||
|
||||
define ptx_device double @t4_local_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_double;
|
||||
;CHECK-NEXT: ld.local.f64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
|
||||
;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||
%x = load double addrspace(2)* %i
|
||||
@ -348,8 +348,8 @@ entry:
|
||||
|
||||
define ptx_device i16 @t4_shared_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_i16;
|
||||
;CHECK-NEXT: ld.shared.u16 rh0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
|
||||
;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(4)* %i
|
||||
@ -358,8 +358,8 @@ entry:
|
||||
|
||||
define ptx_device i32 @t4_shared_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_i32;
|
||||
;CHECK-NEXT: ld.shared.u32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
|
||||
;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(4)* %i
|
||||
@ -368,8 +368,8 @@ entry:
|
||||
|
||||
define ptx_device i64 @t4_shared_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_i64;
|
||||
;CHECK-NEXT: ld.shared.u64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
|
||||
;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(4)* %i
|
||||
@ -378,8 +378,8 @@ entry:
|
||||
|
||||
define ptx_device float @t4_shared_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_float;
|
||||
;CHECK-NEXT: ld.shared.f32 r0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
|
||||
;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||
%x = load float addrspace(4)* %i
|
||||
@ -388,8 +388,8 @@ entry:
|
||||
|
||||
define ptx_device double @t4_shared_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_double;
|
||||
;CHECK-NEXT: ld.shared.f64 rd0, [r0];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
|
||||
;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||
%x = load double addrspace(4)* %i
|
||||
@ -398,8 +398,8 @@ entry:
|
||||
|
||||
define ptx_device i16 @t5_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i16;
|
||||
;CHECK-NEXT: ld.global.u16 rh0, [r0+2];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||
%x = load i16* %i
|
||||
@ -408,8 +408,8 @@ entry:
|
||||
|
||||
define ptx_device i32 @t5_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i32;
|
||||
;CHECK-NEXT: ld.global.u32 r0, [r0+4];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||
%x = load i32* %i
|
||||
@ -418,8 +418,8 @@ entry:
|
||||
|
||||
define ptx_device i64 @t5_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i64;
|
||||
;CHECK-NEXT: ld.global.u64 rd0, [r0+8];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||
%x = load i64* %i
|
||||
@ -428,8 +428,8 @@ entry:
|
||||
|
||||
define ptx_device float @t5_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_float;
|
||||
;CHECK-NEXT: ld.global.f32 r0, [r0+4];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
|
||||
;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||
%x = load float* %i
|
||||
@ -438,8 +438,8 @@ entry:
|
||||
|
||||
define ptx_device double @t5_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_double;
|
||||
;CHECK-NEXT: ld.global.f64 rd0, [r0+8];
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
|
||||
;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||
%x = load double* %i
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
define ptx_device float @test_sqrt_f32(float %x) {
|
||||
entry:
|
||||
; CHECK: sqrt.rn.f32 r0, r1;
|
||||
; CHECK: sqrt.rn.f32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = call float @llvm.sqrt.f32(float %x)
|
||||
ret float %y
|
||||
@ -10,7 +10,7 @@ entry:
|
||||
|
||||
define ptx_device double @test_sqrt_f64(double %x) {
|
||||
entry:
|
||||
; CHECK: sqrt.rn.f64 rd0, rd1;
|
||||
; CHECK: sqrt.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = call double @llvm.sqrt.f64(double %x)
|
||||
ret double %y
|
||||
@ -18,7 +18,7 @@ entry:
|
||||
|
||||
define ptx_device float @test_sin_f32(float %x) {
|
||||
entry:
|
||||
; CHECK: sin.approx.f32 r0, r1;
|
||||
; CHECK: sin.approx.f32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = call float @llvm.sin.f32(float %x)
|
||||
ret float %y
|
||||
@ -26,7 +26,7 @@ entry:
|
||||
|
||||
define ptx_device double @test_sin_f64(double %x) {
|
||||
entry:
|
||||
; CHECK: sin.approx.f64 rd0, rd1;
|
||||
; CHECK: sin.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = call double @llvm.sin.f64(double %x)
|
||||
ret double %y
|
||||
@ -34,7 +34,7 @@ entry:
|
||||
|
||||
define ptx_device float @test_cos_f32(float %x) {
|
||||
entry:
|
||||
; CHECK: cos.approx.f32 r0, r1;
|
||||
; CHECK: cos.approx.f32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = call float @llvm.cos.f32(float %x)
|
||||
ret float %y
|
||||
@ -42,7 +42,7 @@ entry:
|
||||
|
||||
define ptx_device double @test_cos_f64(double %x) {
|
||||
entry:
|
||||
; CHECK: cos.approx.f64 rd0, rd1;
|
||||
; CHECK: cos.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%y = call double @llvm.cos.f64(double %x)
|
||||
ret double %y
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y, float %z) {
|
||||
; CHECK: mad.rn.f32 r0, r1, r2, r3;
|
||||
; CHECK: mad.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fmul float %x, %y
|
||||
%b = fadd float %a, %z
|
||||
@ -9,7 +9,7 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) {
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y, double %z) {
|
||||
; CHECK: mad.rn.f64 rd0, rd1, rd2, rd3;
|
||||
; CHECK: mad.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fmul double %x, %y
|
||||
%b = fadd double %a, %z
|
||||
|
@ -1,61 +1,61 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @t1_u16() {
|
||||
; CHECK: mov.u16 rh0, 0;
|
||||
; CHECK: mov.u16 rh{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i16 0
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32() {
|
||||
; CHECK: mov.u32 r0, 0;
|
||||
; CHECK: mov.u32 r{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64() {
|
||||
; CHECK: mov.u64 rd0, 0;
|
||||
; CHECK: mov.u64 rd{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i64 0
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32() {
|
||||
; CHECK: mov.f32 r0, 0F00000000;
|
||||
; CHECK: mov.f32 r{{[0-9]+}}, 0F00000000;
|
||||
; CHECK: ret;
|
||||
ret float 0.0
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64() {
|
||||
; CHECK: mov.f64 rd0, 0D0000000000000000;
|
||||
; CHECK: mov.f64 rd{{[0-9]+}}, 0D0000000000000000;
|
||||
; CHECK: ret;
|
||||
ret double 0.0
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: mov.u16 rh0, rh1;
|
||||
; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: mov.u32 r0, r1;
|
||||
; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: mov.u64 rd0, rd1;
|
||||
; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3_f32(float %x) {
|
||||
; CHECK: mov.u32 r0, r1;
|
||||
; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t3_f64(double %x) {
|
||||
; CHECK: mov.u64 rd0, rd1;
|
||||
; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
ret double %x
|
||||
}
|
||||
|
@ -11,28 +11,28 @@
|
||||
;}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: mul.rn.f32 r0, r1, r2
|
||||
; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: mul.rn.f64 rd0, rd1, rd2
|
||||
; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: mul.rn.f32 r0, r1, 0F40A00000;
|
||||
; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F40A00000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, 5.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: mul.rn.f64 rd0, rd1, 0D4014000000000000;
|
||||
; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D4014000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul double %x, 5.0
|
||||
ret double %z
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
; CHECK: .func (.reg .b32 r0) test_parameter_order (.reg .b32 r1, .reg .b32 r2, .reg .b32 r3, .reg .b32 r4)
|
||||
; CHECK: .func (.reg .b32 r{{[0-9]+}}) test_parameter_order (.reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}})
|
||||
define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
|
||||
; CHECK: sub.u32 r0, r2, r3
|
||||
; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
%result = sub i32 %b, %c
|
||||
ret i32 %result
|
||||
}
|
||||
|
@ -1,25 +1,25 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
|
||||
; CHECK: selp.u32 r0, r1, r2, p1;
|
||||
; CHECK: selp.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}};
|
||||
%a = select i1 %x, i32 %y, i32 %z
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
|
||||
; CHECK: selp.u64 rd0, rd1, rd2, p1;
|
||||
; CHECK: selp.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}};
|
||||
%a = select i1 %x, i64 %y, i64 %z
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
|
||||
; CHECK: selp.f32 r0, r1, r2, p1;
|
||||
; CHECK: selp.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}};
|
||||
%a = select i1 %x, float %y, float %z
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
|
||||
; CHECK: selp.f64 rd0, rd1, rd2, p1;
|
||||
; CHECK: selp.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}};
|
||||
%a = select i1 %x, double %y, double %z
|
||||
ret double %a
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.eq.u32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp eq i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -10,8 +10,8 @@ define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.ne.u32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ne i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -19,8 +19,8 @@ define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.lt.u32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ult i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -28,8 +28,8 @@ define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.le.u32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.le.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ule i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -37,8 +37,8 @@ define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.gt.u32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ugt i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -46,8 +46,8 @@ define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.ge.u32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.ge.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp uge i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -55,8 +55,8 @@ define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.lt.s32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp slt i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -64,8 +64,8 @@ define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.le.s32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.le.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp sle i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -73,8 +73,8 @@ define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.gt.s32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp sgt i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -82,8 +82,8 @@ define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.ge.s32 p0, r1, r2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.ge.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp sge i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
@ -91,8 +91,8 @@ define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
|
||||
; CHECK: setp.eq.u32 p0, r1, 1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp eq i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -100,8 +100,8 @@ define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
|
||||
; CHECK: setp.ne.u32 p0, r1, 1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ne i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -109,8 +109,8 @@ define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
|
||||
; CHECK: setp.eq.u32 p0, r1, 0;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ult i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -118,8 +118,8 @@ define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
|
||||
; CHECK: setp.lt.u32 p0, r1, 2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ule i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -127,8 +127,8 @@ define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
|
||||
; CHECK: setp.gt.u32 p0, r1, 1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp ugt i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -136,8 +136,8 @@ define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
|
||||
; CHECK: setp.ne.u32 p0, r1, 0;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp uge i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -145,8 +145,8 @@ define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
|
||||
; CHECK: setp.lt.s32 p0, r1, 1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp slt i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -154,8 +154,8 @@ define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
|
||||
; CHECK: setp.lt.s32 p0, r1, 2;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp sle i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -163,8 +163,8 @@ define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
|
||||
; CHECK: setp.gt.s32 p0, r1, 1;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp sgt i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -172,8 +172,8 @@ define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
|
||||
; CHECK: setp.gt.s32 p0, r1, 0;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%p = icmp sge i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
@ -181,9 +181,9 @@ define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
|
||||
; CHECK: setp.gt.u32 p0, r3, r4;
|
||||
; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, p[[P0]];
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%c = icmp eq i32 %x, %y
|
||||
%d = icmp ugt i32 %u, %v
|
||||
@ -193,9 +193,9 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
|
||||
; CHECK: setp.gt.b32 p0, r3, 0;
|
||||
; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0;
|
||||
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
|
||||
; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
|
||||
; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, !p[[P0]];
|
||||
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
|
||||
; CHECK-NEXT: ret;
|
||||
%c = trunc i32 %w to i1
|
||||
%d = icmp eq i32 %x, %y
|
||||
|
@ -1,21 +1,21 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; CHECK: shl.b32 r0, r1, r2
|
||||
; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
%z = shl i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: shl.b32 r0, r1, 3
|
||||
; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, 3
|
||||
%z = shl i32 %x, 3
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3(i32 %x) {
|
||||
; CHECK: shl.b32 r0, 3, r1
|
||||
; CHECK: shl.b32 r{{[0-9]+}}, 3, r{{[0-9]+}}
|
||||
%z = shl i32 3, %x
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
|
@ -1,42 +1,42 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; CHECK: shr.u32 r0, r1, r2
|
||||
; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
%z = lshr i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: shr.u32 r0, r1, 3
|
||||
; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, 3
|
||||
%z = lshr i32 %x, 3
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3(i32 %x) {
|
||||
; CHECK: shr.u32 r0, 3, r1
|
||||
; CHECK: shr.u32 r{{[0-9]+}}, 3, r{{[0-9]+}}
|
||||
%z = lshr i32 3, %x
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4(i32 %x, i32 %y) {
|
||||
; CHECK: shr.s32 r0, r1, r2
|
||||
; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
%z = ashr i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t5(i32 %x) {
|
||||
; CHECK: shr.s32 r0, r1, 3
|
||||
; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, 3
|
||||
%z = ashr i32 %x, 3
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t6(i32 %x) {
|
||||
; CHECK: shr.s32 r0, -3, r1
|
||||
; CHECK: shr.s32 r{{[0-9]+}}, -3, r{{[0-9]+}}
|
||||
%z = ashr i32 -3, %x
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
|
@ -63,7 +63,7 @@
|
||||
|
||||
define ptx_device void @t1_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [r1], rh1;
|
||||
;CHECK: st.global.u16 [r{{[0-9]+}}], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
store i16 %x, i16* %p
|
||||
ret void
|
||||
@ -71,7 +71,7 @@ entry:
|
||||
|
||||
define ptx_device void @t1_u32(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [r1], r2;
|
||||
;CHECK: st.global.u32 [r{{[0-9]+}}], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
store i32 %x, i32* %p
|
||||
ret void
|
||||
@ -79,7 +79,7 @@ entry:
|
||||
|
||||
define ptx_device void @t1_u64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u64 [r1], rd1;
|
||||
;CHECK: st.global.u64 [r{{[0-9]+}}], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
store i64 %x, i64* %p
|
||||
ret void
|
||||
@ -87,7 +87,7 @@ entry:
|
||||
|
||||
define ptx_device void @t1_f32(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1], r2;
|
||||
;CHECK: st.global.f32 [r{{[0-9]+}}], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
store float %x, float* %p
|
||||
ret void
|
||||
@ -95,7 +95,7 @@ entry:
|
||||
|
||||
define ptx_device void @t1_f64(double* %p, double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [r1], rd1;
|
||||
;CHECK: st.global.f64 [r{{[0-9]+}}], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
store double %x, double* %p
|
||||
ret void
|
||||
@ -103,7 +103,7 @@ entry:
|
||||
|
||||
define ptx_device void @t2_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [r1+2], rh1;
|
||||
;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i16* %p, i32 1
|
||||
store i16 %x, i16* %i
|
||||
@ -112,7 +112,7 @@ entry:
|
||||
|
||||
define ptx_device void @t2_u32(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [r1+4], r2;
|
||||
;CHECK: st.global.u32 [r{{[0-9]+}}+4], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
store i32 %x, i32* %i
|
||||
@ -121,7 +121,7 @@ entry:
|
||||
|
||||
define ptx_device void @t2_u64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u64 [r1+8], rd1;
|
||||
;CHECK: st.global.u64 [r{{[0-9]+}}+8], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i64* %p, i32 1
|
||||
store i64 %x, i64* %i
|
||||
@ -130,7 +130,7 @@ entry:
|
||||
|
||||
define ptx_device void @t2_f32(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1+4], r2;
|
||||
;CHECK: st.global.f32 [r{{[0-9]+}}+4], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
store float %x, float* %i
|
||||
@ -139,7 +139,7 @@ entry:
|
||||
|
||||
define ptx_device void @t2_f64(double* %p, double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [r1+8], rd1;
|
||||
;CHECK: st.global.f64 [r{{[0-9]+}}+8], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr double* %p, i32 1
|
||||
store double %x, double* %i
|
||||
@ -148,9 +148,9 @@ entry:
|
||||
|
||||
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 1;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.u16 [r0], rh1;
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i16* %p, i32 %q
|
||||
store i16 %x, i16* %i
|
||||
@ -159,9 +159,9 @@ entry:
|
||||
|
||||
define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.u32 [r0], r3;
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
store i32 %x, i32* %i
|
||||
@ -170,9 +170,9 @@ entry:
|
||||
|
||||
define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.u64 [r0], rd1;
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i64* %p, i32 %q
|
||||
store i64 %x, i64* %i
|
||||
@ -181,9 +181,9 @@ entry:
|
||||
|
||||
define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.f32 [r0], r3;
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
store float %x, float* %i
|
||||
@ -192,9 +192,9 @@ entry:
|
||||
|
||||
define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.f64 [r0], rd1;
|
||||
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
|
||||
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
|
||||
;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr double* %p, i32 %q
|
||||
store double %x, double* %i
|
||||
@ -203,8 +203,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_global_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i16;
|
||||
;CHECK-NEXT: st.global.u16 [r0], rh1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
|
||||
store i16 %x, i16* %i
|
||||
@ -213,8 +213,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_global_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i32;
|
||||
;CHECK-NEXT: st.global.u32 [r0], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||
store i32 %x, i32* %i
|
||||
@ -223,8 +223,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_global_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i64;
|
||||
;CHECK-NEXT: st.global.u64 [r0], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||
store i64 %x, i64* %i
|
||||
@ -233,8 +233,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_global_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_float;
|
||||
;CHECK-NEXT: st.global.f32 [r0], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
|
||||
;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||
store float %x, float* %i
|
||||
@ -243,8 +243,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_global_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_double;
|
||||
;CHECK-NEXT: st.global.f64 [r0], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
|
||||
;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||
store double %x, double* %i
|
||||
@ -253,8 +253,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_local_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_i16;
|
||||
;CHECK-NEXT: st.local.u16 [r0], rh1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
|
||||
;CHECK-NEXT: st.local.u16 [r[[R0]]], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||
store i16 %x, i16 addrspace(2)* %i
|
||||
@ -263,8 +263,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_local_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_i32;
|
||||
;CHECK-NEXT: st.local.u32 [r0], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
|
||||
;CHECK-NEXT: st.local.u32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(2)* %i
|
||||
@ -273,8 +273,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_local_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_i64;
|
||||
;CHECK-NEXT: st.local.u64 [r0], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
|
||||
;CHECK-NEXT: st.local.u64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||
store i64 %x, i64 addrspace(2)* %i
|
||||
@ -283,8 +283,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_local_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_float;
|
||||
;CHECK-NEXT: st.local.f32 [r0], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
|
||||
;CHECK-NEXT: st.local.f32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||
store float %x, float addrspace(2)* %i
|
||||
@ -293,8 +293,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_local_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_local_double;
|
||||
;CHECK-NEXT: st.local.f64 [r0], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
|
||||
;CHECK-NEXT: st.local.f64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||
store double %x, double addrspace(2)* %i
|
||||
@ -303,8 +303,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_shared_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_i16;
|
||||
;CHECK-NEXT: st.shared.u16 [r0], rh1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
|
||||
;CHECK-NEXT: st.shared.u16 [r[[R0]]], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||
store i16 %x, i16 addrspace(4)* %i
|
||||
@ -313,8 +313,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_shared_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_i32;
|
||||
;CHECK-NEXT: st.shared.u32 [r0], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
|
||||
;CHECK-NEXT: st.shared.u32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(4)* %i
|
||||
@ -323,8 +323,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_shared_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_i64;
|
||||
;CHECK-NEXT: st.shared.u64 [r0], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
|
||||
;CHECK-NEXT: st.shared.u64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||
store i64 %x, i64 addrspace(4)* %i
|
||||
@ -333,8 +333,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_shared_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_float;
|
||||
;CHECK-NEXT: st.shared.f32 [r0], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
|
||||
;CHECK-NEXT: st.shared.f32 [r[[R0]]], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||
store float %x, float addrspace(4)* %i
|
||||
@ -343,8 +343,8 @@ entry:
|
||||
|
||||
define ptx_device void @t4_shared_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_shared_double;
|
||||
;CHECK-NEXT: st.shared.f64 [r0], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
|
||||
;CHECK-NEXT: st.shared.f64 [r[[R0]]], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||
store double %x, double addrspace(4)* %i
|
||||
@ -353,8 +353,8 @@ entry:
|
||||
|
||||
define ptx_device void @t5_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i16;
|
||||
;CHECK-NEXT: st.global.u16 [r0+2], rh1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK-NEXT: st.global.u16 [r[[R0]]+2], rh{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||
store i16 %x, i16* %i
|
||||
@ -363,8 +363,8 @@ entry:
|
||||
|
||||
define ptx_device void @t5_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i32;
|
||||
;CHECK-NEXT: st.global.u32 [r0+4], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK-NEXT: st.global.u32 [r[[R0]]+4], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||
store i32 %x, i32* %i
|
||||
@ -373,8 +373,8 @@ entry:
|
||||
|
||||
define ptx_device void @t5_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_i64;
|
||||
;CHECK-NEXT: st.global.u64 [r0+8], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK-NEXT: st.global.u64 [r[[R0]]+8], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||
store i64 %x, i64* %i
|
||||
@ -383,8 +383,8 @@ entry:
|
||||
|
||||
define ptx_device void @t5_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_float;
|
||||
;CHECK-NEXT: st.global.f32 [r0+4], r1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
|
||||
;CHECK-NEXT: st.global.f32 [r[[R0]]+4], r{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||
store float %x, float* %i
|
||||
@ -393,8 +393,8 @@ entry:
|
||||
|
||||
define ptx_device void @t5_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 r0, array_double;
|
||||
;CHECK-NEXT: st.global.f64 [r0+8], rd1;
|
||||
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
|
||||
;CHECK-NEXT: st.global.f64 [r[[R0]]+8], rd{{[0-9]+}};
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||
store double %x, double* %i
|
||||
|
@ -1,70 +1,70 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||
; CHECK: sub.u16 rh0, rh1, rh2;
|
||||
; CHECK: sub.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i16 %x, %y
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||
; CHECK: sub.u32 r0, r1, r2;
|
||||
; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
; CHECK: sub.u64 rd0, rd1, rd2;
|
||||
; CHECK: sub.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i64 %x, %y
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: sub.rn.f32 r0, r1, r2
|
||||
; CHECK: sub.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: sub.rn.f64 rd0, rd1, rd2
|
||||
; CHECK: sub.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: add.u16 rh0, rh1, -1;
|
||||
; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, -1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i16 %x, 1
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: add.u32 r0, r1, -1;
|
||||
; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, -1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i32 %x, 1
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: add.u64 rd0, rd1, -1;
|
||||
; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, -1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i64 %x, 1
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.rn.f32 r0, r1, 0FBF800000;
|
||||
; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0FBF800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.rn.f64 rd0, rd1, 0DBFF0000000000000;
|
||||
; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0DBFF0000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub double %x, 1.0
|
||||
ret double %z
|
||||
|
Loading…
Reference in New Issue
Block a user