1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

PTX: Always use registers for return values, but use .param space for device

parameters if SM >= 2.0

- Update test cases to be more robust against register allocation changes
- Bump up the number of registers to 128 per type
- Include Python script to re-generate register file with any number of
  registers

llvm-svn: 133736
This commit is contained in:
Justin Holewinski 2011-06-23 18:10:13 +00:00
parent acf53a172e
commit a1dd1dd26e
26 changed files with 1043 additions and 403 deletions

View File

@ -433,25 +433,16 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
if (!isKernel) {
decl += " (";
for (PTXMachineFunctionInfo::ret_iterator
i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
i != e; ++i) {
if (i != b) {
decl += ", ";
}
if (ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
decl += ".param .b";
decl += utostr(*i);
decl += " ";
decl += RETURN_PREFIX;
decl += utostr(++cnt);
} else {
decl += ".reg .";
decl += getRegisterTypeName(*i);
decl += " ";
decl += getRegisterName(*i);
}
decl += ".reg .";
decl += getRegisterTypeName(*i);
decl += " ";
decl += getRegisterName(*i);
}
decl += ")";
}

View File

@ -1,3 +1,4 @@
//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
@ -11,26 +12,18 @@
//
//===----------------------------------------------------------------------===//
// Currently, we reserve one register of each type for return values and let
// the rest be used for parameters. This is a dirty hack, but I am not sure
// how to tell LLVM that registers used for parameter passing cannot be used
// for return values.
// PTX Calling Conventions
// PTX Formal Parameter Calling Convention
def CC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[P1, P2, P3, P4, P5, P6, P7]>>,
CCIfType<[i16], CCAssignToReg<[RH1, RH2, RH3, RH4, RH5, RH6, RH7]>>,
CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7]>>,
CCIfType<[i64, f64], CCAssignToReg<[RD1, RD2, RD3, RD4, RD5, RD6, RD7]>>
CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
]>;
//===----------------------------------------------------------------------===//
// Return Value Calling Conventions
//===----------------------------------------------------------------------===//
// PTX Return Value Calling Convention
def RetCC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[P0]>>,
CCIfType<[i16], CCAssignToReg<[RH0]>>,
CCIfType<[i32, f32], CCAssignToReg<[R0]>>,
CCIfType<[i64, f64], CCAssignToReg<[RD0]>>
CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
]>;

View File

@ -307,49 +307,35 @@ SDValue PTXTargetLowering::
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
SDValue Flag;
if (ST.getShaderModel() >= PTXSubtarget::PTX_SM_2_0) {
// For SM 2.0+, we return arguments in the param space
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue ParamIndex = DAG.getTargetConstant(i, MVT::i32);
SDValue Ops[] = { Chain, ParamIndex, OutVals[i], Flag };
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, VTs, Ops,
Flag.getNode() ? 4 : 3);
Flag = Chain.getValue(1);
// Instead of storing a physical register in our argument list, we just
// store the total size of the parameter, in bits. The ASM printer
// knows how to process this.
MFI->addRetReg(Outs[i].VT.getStoreSizeInBits());
}
} else {
// For SM < 2.0, we return arguments in registers
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
// Even though we could use the .param space for return arguments for
// device functions if SM >= 2.0 and the number of return arguments is
// only 1, we just always use registers since this makes the codegen
// easier.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign& VA = RVLocs[i];
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign& VA = RVLocs[i];
assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
unsigned Reg = VA.getLocReg();
unsigned Reg = VA.getLocReg();
DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad
Flag = Chain.getValue(1);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad
Flag = Chain.getValue(1);
MFI->addRetReg(Reg);
}
MFI->addRetReg(Reg);
}
if (Flag.getNode() == 0) {

View File

@ -26,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo {
private:
bool is_kernel;
std::vector<unsigned> reg_arg, reg_local_var;
DenseSet<unsigned> reg_ret;
std::vector<unsigned> reg_ret;
bool _isDoneAddArg;
public:
@ -40,7 +40,11 @@ public:
void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
void addRetReg(unsigned reg) { reg_ret.insert(reg); }
void addRetReg(unsigned reg) {
if (!isRetReg(reg)) {
reg_ret.push_back(reg);
}
}
void doneAddArg(void) {
_isDoneAddArg = true;
@ -51,7 +55,7 @@ public:
typedef std::vector<unsigned>::const_iterator reg_iterator;
typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
typedef DenseSet<unsigned>::const_iterator ret_iterator;
typedef std::vector<unsigned>::const_iterator ret_iterator;
bool argRegEmpty() const { return reg_arg.empty(); }
int getNumArg() const { return reg_arg.size(); }

View File

@ -1,3 +1,4 @@
//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
//
// The LLVM Compiler Infrastructure
@ -21,55 +22,534 @@ class PTXReg<string n> : Register<n> {
///===- Predicate Registers -----------------------------------------------===//
def P0 : PTXReg<"p0">;
def P1 : PTXReg<"p1">;
def P2 : PTXReg<"p2">;
def P3 : PTXReg<"p3">;
def P4 : PTXReg<"p4">;
def P5 : PTXReg<"p5">;
def P6 : PTXReg<"p6">;
def P7 : PTXReg<"p7">;
def P0 : PTXReg<"p0">;
def P1 : PTXReg<"p1">;
def P2 : PTXReg<"p2">;
def P3 : PTXReg<"p3">;
def P4 : PTXReg<"p4">;
def P5 : PTXReg<"p5">;
def P6 : PTXReg<"p6">;
def P7 : PTXReg<"p7">;
def P8 : PTXReg<"p8">;
def P9 : PTXReg<"p9">;
def P10 : PTXReg<"p10">;
def P11 : PTXReg<"p11">;
def P12 : PTXReg<"p12">;
def P13 : PTXReg<"p13">;
def P14 : PTXReg<"p14">;
def P15 : PTXReg<"p15">;
def P16 : PTXReg<"p16">;
def P17 : PTXReg<"p17">;
def P18 : PTXReg<"p18">;
def P19 : PTXReg<"p19">;
def P20 : PTXReg<"p20">;
def P21 : PTXReg<"p21">;
def P22 : PTXReg<"p22">;
def P23 : PTXReg<"p23">;
def P24 : PTXReg<"p24">;
def P25 : PTXReg<"p25">;
def P26 : PTXReg<"p26">;
def P27 : PTXReg<"p27">;
def P28 : PTXReg<"p28">;
def P29 : PTXReg<"p29">;
def P30 : PTXReg<"p30">;
def P31 : PTXReg<"p31">;
def P32 : PTXReg<"p32">;
def P33 : PTXReg<"p33">;
def P34 : PTXReg<"p34">;
def P35 : PTXReg<"p35">;
def P36 : PTXReg<"p36">;
def P37 : PTXReg<"p37">;
def P38 : PTXReg<"p38">;
def P39 : PTXReg<"p39">;
def P40 : PTXReg<"p40">;
def P41 : PTXReg<"p41">;
def P42 : PTXReg<"p42">;
def P43 : PTXReg<"p43">;
def P44 : PTXReg<"p44">;
def P45 : PTXReg<"p45">;
def P46 : PTXReg<"p46">;
def P47 : PTXReg<"p47">;
def P48 : PTXReg<"p48">;
def P49 : PTXReg<"p49">;
def P50 : PTXReg<"p50">;
def P51 : PTXReg<"p51">;
def P52 : PTXReg<"p52">;
def P53 : PTXReg<"p53">;
def P54 : PTXReg<"p54">;
def P55 : PTXReg<"p55">;
def P56 : PTXReg<"p56">;
def P57 : PTXReg<"p57">;
def P58 : PTXReg<"p58">;
def P59 : PTXReg<"p59">;
def P60 : PTXReg<"p60">;
def P61 : PTXReg<"p61">;
def P62 : PTXReg<"p62">;
def P63 : PTXReg<"p63">;
def P64 : PTXReg<"p64">;
def P65 : PTXReg<"p65">;
def P66 : PTXReg<"p66">;
def P67 : PTXReg<"p67">;
def P68 : PTXReg<"p68">;
def P69 : PTXReg<"p69">;
def P70 : PTXReg<"p70">;
def P71 : PTXReg<"p71">;
def P72 : PTXReg<"p72">;
def P73 : PTXReg<"p73">;
def P74 : PTXReg<"p74">;
def P75 : PTXReg<"p75">;
def P76 : PTXReg<"p76">;
def P77 : PTXReg<"p77">;
def P78 : PTXReg<"p78">;
def P79 : PTXReg<"p79">;
def P80 : PTXReg<"p80">;
def P81 : PTXReg<"p81">;
def P82 : PTXReg<"p82">;
def P83 : PTXReg<"p83">;
def P84 : PTXReg<"p84">;
def P85 : PTXReg<"p85">;
def P86 : PTXReg<"p86">;
def P87 : PTXReg<"p87">;
def P88 : PTXReg<"p88">;
def P89 : PTXReg<"p89">;
def P90 : PTXReg<"p90">;
def P91 : PTXReg<"p91">;
def P92 : PTXReg<"p92">;
def P93 : PTXReg<"p93">;
def P94 : PTXReg<"p94">;
def P95 : PTXReg<"p95">;
def P96 : PTXReg<"p96">;
def P97 : PTXReg<"p97">;
def P98 : PTXReg<"p98">;
def P99 : PTXReg<"p99">;
def P100 : PTXReg<"p100">;
def P101 : PTXReg<"p101">;
def P102 : PTXReg<"p102">;
def P103 : PTXReg<"p103">;
def P104 : PTXReg<"p104">;
def P105 : PTXReg<"p105">;
def P106 : PTXReg<"p106">;
def P107 : PTXReg<"p107">;
def P108 : PTXReg<"p108">;
def P109 : PTXReg<"p109">;
def P110 : PTXReg<"p110">;
def P111 : PTXReg<"p111">;
def P112 : PTXReg<"p112">;
def P113 : PTXReg<"p113">;
def P114 : PTXReg<"p114">;
def P115 : PTXReg<"p115">;
def P116 : PTXReg<"p116">;
def P117 : PTXReg<"p117">;
def P118 : PTXReg<"p118">;
def P119 : PTXReg<"p119">;
def P120 : PTXReg<"p120">;
def P121 : PTXReg<"p121">;
def P122 : PTXReg<"p122">;
def P123 : PTXReg<"p123">;
def P124 : PTXReg<"p124">;
def P125 : PTXReg<"p125">;
def P126 : PTXReg<"p126">;
def P127 : PTXReg<"p127">;
///===- 16-bit Integer Registers ------------------------------------------===//
///===- 16-Bit Registers --------------------------------------------------===//
def RH0 : PTXReg<"rh0">;
def RH1 : PTXReg<"rh1">;
def RH2 : PTXReg<"rh2">;
def RH3 : PTXReg<"rh3">;
def RH4 : PTXReg<"rh4">;
def RH5 : PTXReg<"rh5">;
def RH6 : PTXReg<"rh6">;
def RH7 : PTXReg<"rh7">;
def RH0 : PTXReg<"rh0">;
def RH1 : PTXReg<"rh1">;
def RH2 : PTXReg<"rh2">;
def RH3 : PTXReg<"rh3">;
def RH4 : PTXReg<"rh4">;
def RH5 : PTXReg<"rh5">;
def RH6 : PTXReg<"rh6">;
def RH7 : PTXReg<"rh7">;
def RH8 : PTXReg<"rh8">;
def RH9 : PTXReg<"rh9">;
def RH10 : PTXReg<"rh10">;
def RH11 : PTXReg<"rh11">;
def RH12 : PTXReg<"rh12">;
def RH13 : PTXReg<"rh13">;
def RH14 : PTXReg<"rh14">;
def RH15 : PTXReg<"rh15">;
def RH16 : PTXReg<"rh16">;
def RH17 : PTXReg<"rh17">;
def RH18 : PTXReg<"rh18">;
def RH19 : PTXReg<"rh19">;
def RH20 : PTXReg<"rh20">;
def RH21 : PTXReg<"rh21">;
def RH22 : PTXReg<"rh22">;
def RH23 : PTXReg<"rh23">;
def RH24 : PTXReg<"rh24">;
def RH25 : PTXReg<"rh25">;
def RH26 : PTXReg<"rh26">;
def RH27 : PTXReg<"rh27">;
def RH28 : PTXReg<"rh28">;
def RH29 : PTXReg<"rh29">;
def RH30 : PTXReg<"rh30">;
def RH31 : PTXReg<"rh31">;
def RH32 : PTXReg<"rh32">;
def RH33 : PTXReg<"rh33">;
def RH34 : PTXReg<"rh34">;
def RH35 : PTXReg<"rh35">;
def RH36 : PTXReg<"rh36">;
def RH37 : PTXReg<"rh37">;
def RH38 : PTXReg<"rh38">;
def RH39 : PTXReg<"rh39">;
def RH40 : PTXReg<"rh40">;
def RH41 : PTXReg<"rh41">;
def RH42 : PTXReg<"rh42">;
def RH43 : PTXReg<"rh43">;
def RH44 : PTXReg<"rh44">;
def RH45 : PTXReg<"rh45">;
def RH46 : PTXReg<"rh46">;
def RH47 : PTXReg<"rh47">;
def RH48 : PTXReg<"rh48">;
def RH49 : PTXReg<"rh49">;
def RH50 : PTXReg<"rh50">;
def RH51 : PTXReg<"rh51">;
def RH52 : PTXReg<"rh52">;
def RH53 : PTXReg<"rh53">;
def RH54 : PTXReg<"rh54">;
def RH55 : PTXReg<"rh55">;
def RH56 : PTXReg<"rh56">;
def RH57 : PTXReg<"rh57">;
def RH58 : PTXReg<"rh58">;
def RH59 : PTXReg<"rh59">;
def RH60 : PTXReg<"rh60">;
def RH61 : PTXReg<"rh61">;
def RH62 : PTXReg<"rh62">;
def RH63 : PTXReg<"rh63">;
def RH64 : PTXReg<"rh64">;
def RH65 : PTXReg<"rh65">;
def RH66 : PTXReg<"rh66">;
def RH67 : PTXReg<"rh67">;
def RH68 : PTXReg<"rh68">;
def RH69 : PTXReg<"rh69">;
def RH70 : PTXReg<"rh70">;
def RH71 : PTXReg<"rh71">;
def RH72 : PTXReg<"rh72">;
def RH73 : PTXReg<"rh73">;
def RH74 : PTXReg<"rh74">;
def RH75 : PTXReg<"rh75">;
def RH76 : PTXReg<"rh76">;
def RH77 : PTXReg<"rh77">;
def RH78 : PTXReg<"rh78">;
def RH79 : PTXReg<"rh79">;
def RH80 : PTXReg<"rh80">;
def RH81 : PTXReg<"rh81">;
def RH82 : PTXReg<"rh82">;
def RH83 : PTXReg<"rh83">;
def RH84 : PTXReg<"rh84">;
def RH85 : PTXReg<"rh85">;
def RH86 : PTXReg<"rh86">;
def RH87 : PTXReg<"rh87">;
def RH88 : PTXReg<"rh88">;
def RH89 : PTXReg<"rh89">;
def RH90 : PTXReg<"rh90">;
def RH91 : PTXReg<"rh91">;
def RH92 : PTXReg<"rh92">;
def RH93 : PTXReg<"rh93">;
def RH94 : PTXReg<"rh94">;
def RH95 : PTXReg<"rh95">;
def RH96 : PTXReg<"rh96">;
def RH97 : PTXReg<"rh97">;
def RH98 : PTXReg<"rh98">;
def RH99 : PTXReg<"rh99">;
def RH100 : PTXReg<"rh100">;
def RH101 : PTXReg<"rh101">;
def RH102 : PTXReg<"rh102">;
def RH103 : PTXReg<"rh103">;
def RH104 : PTXReg<"rh104">;
def RH105 : PTXReg<"rh105">;
def RH106 : PTXReg<"rh106">;
def RH107 : PTXReg<"rh107">;
def RH108 : PTXReg<"rh108">;
def RH109 : PTXReg<"rh109">;
def RH110 : PTXReg<"rh110">;
def RH111 : PTXReg<"rh111">;
def RH112 : PTXReg<"rh112">;
def RH113 : PTXReg<"rh113">;
def RH114 : PTXReg<"rh114">;
def RH115 : PTXReg<"rh115">;
def RH116 : PTXReg<"rh116">;
def RH117 : PTXReg<"rh117">;
def RH118 : PTXReg<"rh118">;
def RH119 : PTXReg<"rh119">;
def RH120 : PTXReg<"rh120">;
def RH121 : PTXReg<"rh121">;
def RH122 : PTXReg<"rh122">;
def RH123 : PTXReg<"rh123">;
def RH124 : PTXReg<"rh124">;
def RH125 : PTXReg<"rh125">;
def RH126 : PTXReg<"rh126">;
def RH127 : PTXReg<"rh127">;
///===- 32-bit Integer Registers ------------------------------------------===//
///===- 32-Bit Registers --------------------------------------------------===//
def R0 : PTXReg<"r0">;
def R1 : PTXReg<"r1">;
def R2 : PTXReg<"r2">;
def R3 : PTXReg<"r3">;
def R4 : PTXReg<"r4">;
def R5 : PTXReg<"r5">;
def R6 : PTXReg<"r6">;
def R7 : PTXReg<"r7">;
def R0 : PTXReg<"r0">;
def R1 : PTXReg<"r1">;
def R2 : PTXReg<"r2">;
def R3 : PTXReg<"r3">;
def R4 : PTXReg<"r4">;
def R5 : PTXReg<"r5">;
def R6 : PTXReg<"r6">;
def R7 : PTXReg<"r7">;
def R8 : PTXReg<"r8">;
def R9 : PTXReg<"r9">;
def R10 : PTXReg<"r10">;
def R11 : PTXReg<"r11">;
def R12 : PTXReg<"r12">;
def R13 : PTXReg<"r13">;
def R14 : PTXReg<"r14">;
def R15 : PTXReg<"r15">;
def R16 : PTXReg<"r16">;
def R17 : PTXReg<"r17">;
def R18 : PTXReg<"r18">;
def R19 : PTXReg<"r19">;
def R20 : PTXReg<"r20">;
def R21 : PTXReg<"r21">;
def R22 : PTXReg<"r22">;
def R23 : PTXReg<"r23">;
def R24 : PTXReg<"r24">;
def R25 : PTXReg<"r25">;
def R26 : PTXReg<"r26">;
def R27 : PTXReg<"r27">;
def R28 : PTXReg<"r28">;
def R29 : PTXReg<"r29">;
def R30 : PTXReg<"r30">;
def R31 : PTXReg<"r31">;
def R32 : PTXReg<"r32">;
def R33 : PTXReg<"r33">;
def R34 : PTXReg<"r34">;
def R35 : PTXReg<"r35">;
def R36 : PTXReg<"r36">;
def R37 : PTXReg<"r37">;
def R38 : PTXReg<"r38">;
def R39 : PTXReg<"r39">;
def R40 : PTXReg<"r40">;
def R41 : PTXReg<"r41">;
def R42 : PTXReg<"r42">;
def R43 : PTXReg<"r43">;
def R44 : PTXReg<"r44">;
def R45 : PTXReg<"r45">;
def R46 : PTXReg<"r46">;
def R47 : PTXReg<"r47">;
def R48 : PTXReg<"r48">;
def R49 : PTXReg<"r49">;
def R50 : PTXReg<"r50">;
def R51 : PTXReg<"r51">;
def R52 : PTXReg<"r52">;
def R53 : PTXReg<"r53">;
def R54 : PTXReg<"r54">;
def R55 : PTXReg<"r55">;
def R56 : PTXReg<"r56">;
def R57 : PTXReg<"r57">;
def R58 : PTXReg<"r58">;
def R59 : PTXReg<"r59">;
def R60 : PTXReg<"r60">;
def R61 : PTXReg<"r61">;
def R62 : PTXReg<"r62">;
def R63 : PTXReg<"r63">;
def R64 : PTXReg<"r64">;
def R65 : PTXReg<"r65">;
def R66 : PTXReg<"r66">;
def R67 : PTXReg<"r67">;
def R68 : PTXReg<"r68">;
def R69 : PTXReg<"r69">;
def R70 : PTXReg<"r70">;
def R71 : PTXReg<"r71">;
def R72 : PTXReg<"r72">;
def R73 : PTXReg<"r73">;
def R74 : PTXReg<"r74">;
def R75 : PTXReg<"r75">;
def R76 : PTXReg<"r76">;
def R77 : PTXReg<"r77">;
def R78 : PTXReg<"r78">;
def R79 : PTXReg<"r79">;
def R80 : PTXReg<"r80">;
def R81 : PTXReg<"r81">;
def R82 : PTXReg<"r82">;
def R83 : PTXReg<"r83">;
def R84 : PTXReg<"r84">;
def R85 : PTXReg<"r85">;
def R86 : PTXReg<"r86">;
def R87 : PTXReg<"r87">;
def R88 : PTXReg<"r88">;
def R89 : PTXReg<"r89">;
def R90 : PTXReg<"r90">;
def R91 : PTXReg<"r91">;
def R92 : PTXReg<"r92">;
def R93 : PTXReg<"r93">;
def R94 : PTXReg<"r94">;
def R95 : PTXReg<"r95">;
def R96 : PTXReg<"r96">;
def R97 : PTXReg<"r97">;
def R98 : PTXReg<"r98">;
def R99 : PTXReg<"r99">;
def R100 : PTXReg<"r100">;
def R101 : PTXReg<"r101">;
def R102 : PTXReg<"r102">;
def R103 : PTXReg<"r103">;
def R104 : PTXReg<"r104">;
def R105 : PTXReg<"r105">;
def R106 : PTXReg<"r106">;
def R107 : PTXReg<"r107">;
def R108 : PTXReg<"r108">;
def R109 : PTXReg<"r109">;
def R110 : PTXReg<"r110">;
def R111 : PTXReg<"r111">;
def R112 : PTXReg<"r112">;
def R113 : PTXReg<"r113">;
def R114 : PTXReg<"r114">;
def R115 : PTXReg<"r115">;
def R116 : PTXReg<"r116">;
def R117 : PTXReg<"r117">;
def R118 : PTXReg<"r118">;
def R119 : PTXReg<"r119">;
def R120 : PTXReg<"r120">;
def R121 : PTXReg<"r121">;
def R122 : PTXReg<"r122">;
def R123 : PTXReg<"r123">;
def R124 : PTXReg<"r124">;
def R125 : PTXReg<"r125">;
def R126 : PTXReg<"r126">;
def R127 : PTXReg<"r127">;
///===- 64-bit Integer Registers ------------------------------------------===//
///===- 64-Bit Registers --------------------------------------------------===//
def RD0 : PTXReg<"rd0">;
def RD1 : PTXReg<"rd1">;
def RD2 : PTXReg<"rd2">;
def RD3 : PTXReg<"rd3">;
def RD4 : PTXReg<"rd4">;
def RD5 : PTXReg<"rd5">;
def RD6 : PTXReg<"rd6">;
def RD7 : PTXReg<"rd7">;
def RD0 : PTXReg<"rd0">;
def RD1 : PTXReg<"rd1">;
def RD2 : PTXReg<"rd2">;
def RD3 : PTXReg<"rd3">;
def RD4 : PTXReg<"rd4">;
def RD5 : PTXReg<"rd5">;
def RD6 : PTXReg<"rd6">;
def RD7 : PTXReg<"rd7">;
def RD8 : PTXReg<"rd8">;
def RD9 : PTXReg<"rd9">;
def RD10 : PTXReg<"rd10">;
def RD11 : PTXReg<"rd11">;
def RD12 : PTXReg<"rd12">;
def RD13 : PTXReg<"rd13">;
def RD14 : PTXReg<"rd14">;
def RD15 : PTXReg<"rd15">;
def RD16 : PTXReg<"rd16">;
def RD17 : PTXReg<"rd17">;
def RD18 : PTXReg<"rd18">;
def RD19 : PTXReg<"rd19">;
def RD20 : PTXReg<"rd20">;
def RD21 : PTXReg<"rd21">;
def RD22 : PTXReg<"rd22">;
def RD23 : PTXReg<"rd23">;
def RD24 : PTXReg<"rd24">;
def RD25 : PTXReg<"rd25">;
def RD26 : PTXReg<"rd26">;
def RD27 : PTXReg<"rd27">;
def RD28 : PTXReg<"rd28">;
def RD29 : PTXReg<"rd29">;
def RD30 : PTXReg<"rd30">;
def RD31 : PTXReg<"rd31">;
def RD32 : PTXReg<"rd32">;
def RD33 : PTXReg<"rd33">;
def RD34 : PTXReg<"rd34">;
def RD35 : PTXReg<"rd35">;
def RD36 : PTXReg<"rd36">;
def RD37 : PTXReg<"rd37">;
def RD38 : PTXReg<"rd38">;
def RD39 : PTXReg<"rd39">;
def RD40 : PTXReg<"rd40">;
def RD41 : PTXReg<"rd41">;
def RD42 : PTXReg<"rd42">;
def RD43 : PTXReg<"rd43">;
def RD44 : PTXReg<"rd44">;
def RD45 : PTXReg<"rd45">;
def RD46 : PTXReg<"rd46">;
def RD47 : PTXReg<"rd47">;
def RD48 : PTXReg<"rd48">;
def RD49 : PTXReg<"rd49">;
def RD50 : PTXReg<"rd50">;
def RD51 : PTXReg<"rd51">;
def RD52 : PTXReg<"rd52">;
def RD53 : PTXReg<"rd53">;
def RD54 : PTXReg<"rd54">;
def RD55 : PTXReg<"rd55">;
def RD56 : PTXReg<"rd56">;
def RD57 : PTXReg<"rd57">;
def RD58 : PTXReg<"rd58">;
def RD59 : PTXReg<"rd59">;
def RD60 : PTXReg<"rd60">;
def RD61 : PTXReg<"rd61">;
def RD62 : PTXReg<"rd62">;
def RD63 : PTXReg<"rd63">;
def RD64 : PTXReg<"rd64">;
def RD65 : PTXReg<"rd65">;
def RD66 : PTXReg<"rd66">;
def RD67 : PTXReg<"rd67">;
def RD68 : PTXReg<"rd68">;
def RD69 : PTXReg<"rd69">;
def RD70 : PTXReg<"rd70">;
def RD71 : PTXReg<"rd71">;
def RD72 : PTXReg<"rd72">;
def RD73 : PTXReg<"rd73">;
def RD74 : PTXReg<"rd74">;
def RD75 : PTXReg<"rd75">;
def RD76 : PTXReg<"rd76">;
def RD77 : PTXReg<"rd77">;
def RD78 : PTXReg<"rd78">;
def RD79 : PTXReg<"rd79">;
def RD80 : PTXReg<"rd80">;
def RD81 : PTXReg<"rd81">;
def RD82 : PTXReg<"rd82">;
def RD83 : PTXReg<"rd83">;
def RD84 : PTXReg<"rd84">;
def RD85 : PTXReg<"rd85">;
def RD86 : PTXReg<"rd86">;
def RD87 : PTXReg<"rd87">;
def RD88 : PTXReg<"rd88">;
def RD89 : PTXReg<"rd89">;
def RD90 : PTXReg<"rd90">;
def RD91 : PTXReg<"rd91">;
def RD92 : PTXReg<"rd92">;
def RD93 : PTXReg<"rd93">;
def RD94 : PTXReg<"rd94">;
def RD95 : PTXReg<"rd95">;
def RD96 : PTXReg<"rd96">;
def RD97 : PTXReg<"rd97">;
def RD98 : PTXReg<"rd98">;
def RD99 : PTXReg<"rd99">;
def RD100 : PTXReg<"rd100">;
def RD101 : PTXReg<"rd101">;
def RD102 : PTXReg<"rd102">;
def RD103 : PTXReg<"rd103">;
def RD104 : PTXReg<"rd104">;
def RD105 : PTXReg<"rd105">;
def RD106 : PTXReg<"rd106">;
def RD107 : PTXReg<"rd107">;
def RD108 : PTXReg<"rd108">;
def RD109 : PTXReg<"rd109">;
def RD110 : PTXReg<"rd110">;
def RD111 : PTXReg<"rd111">;
def RD112 : PTXReg<"rd112">;
def RD113 : PTXReg<"rd113">;
def RD114 : PTXReg<"rd114">;
def RD115 : PTXReg<"rd115">;
def RD116 : PTXReg<"rd116">;
def RD117 : PTXReg<"rd117">;
def RD118 : PTXReg<"rd118">;
def RD119 : PTXReg<"rd119">;
def RD120 : PTXReg<"rd120">;
def RD121 : PTXReg<"rd121">;
def RD122 : PTXReg<"rd122">;
def RD123 : PTXReg<"rd123">;
def RD124 : PTXReg<"rd124">;
def RD125 : PTXReg<"rd125">;
def RD126 : PTXReg<"rd126">;
def RD127 : PTXReg<"rd127">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 7)>;
def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 7)>;
def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 7)>;
def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 7)>;
def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 7)>;
def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 7)>;
def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;

View File

@ -0,0 +1,163 @@
#!/usr/bin/env python
##===- generate-register-td.py --------------------------------*-python-*--===##
##
## The LLVM Compiler Infrastructure
##
## This file is distributed under the University of Illinois Open Source
## License. See LICENSE.TXT for details.
##
##===----------------------------------------------------------------------===##
##
## This file describes the PTX register file generator.
##
##===----------------------------------------------------------------------===##
from sys import argv, exit, stdout
if len(argv) != 5:
print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
exit(1)
try:
num_pred = int(argv[1])
num_16bit = int(argv[2])
num_32bit = int(argv[3])
num_64bit = int(argv[4])
except:
print('ERROR: Invalid integer parameter')
exit(1)
## Print the register definition file
td_file = open('PTXRegisterInfo.td', 'w')
td_file.write('''
//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Declarations that describe the PTX register file
//===----------------------------------------------------------------------===//
class PTXReg<string n> : Register<n> {
let Namespace = "PTX";
}
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
''')
# Print predicate registers
td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
for r in range(0, num_pred):
td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
# Print 16-bit registers
td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
for r in range(0, num_16bit):
td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
# Print 32-bit registers
td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
for r in range(0, num_32bit):
td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
# Print 64-bit registers
td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
for r in range(0, num_64bit):
td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
td_file.write('''
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
''')
# Print register classes
td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
td_file.close()
## Now write the PTXCallingConv.td file
td_file = open('PTXCallingConv.td', 'w')
# Reserve 10% of the available registers for return values, and the other 90%
# for parameters
num_ret_pred = int(0.1 * num_pred)
num_ret_16bit = int(0.1 * num_16bit)
num_ret_32bit = int(0.1 * num_32bit)
num_ret_64bit = int(0.1 * num_64bit)
num_param_pred = num_pred - num_ret_pred
num_param_16bit = num_16bit - num_ret_16bit
num_param_32bit = num_32bit - num_ret_32bit
num_param_64bit = num_64bit - num_ret_64bit
param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)]
param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
td_file.write('''
//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This describes the calling conventions for the PTX architecture.
//
//===----------------------------------------------------------------------===//
// PTX Formal Parameter Calling Convention
def CC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[%s]>>,
CCIfType<[i16], CCAssignToReg<[%s]>>,
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
]>;
// PTX Return Value Calling Convention
def RetCC_PTX : CallingConv<[
CCIfType<[i1], CCAssignToReg<[%s]>>,
CCIfType<[i16], CCAssignToReg<[%s]>>,
CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
CCIfType<[i64,f64], CCAssignToReg<[%s]>>
]>;
''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
td_file.close()

View File

@ -1,70 +1,70 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
; CHECK: add.u16 rh0, rh1, rh2;
; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
%z = add i16 %x, %y
ret i16 %z
}
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
; CHECK: add.u32 r0, r1, r2;
; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%z = add i32 %x, %y
ret i32 %z
}
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
; CHECK: add.u64 rd0, rd1, rd2;
; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%z = add i64 %x, %y
ret i64 %z
}
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: add.rn.f32 r0, r1, r2
; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NEXT: ret;
%z = fadd float %x, %y
ret float %z
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: add.rn.f64 rd0, rd1, rd2
; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
; CHECK-NEXT: ret;
%z = fadd double %x, %y
ret double %z
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: add.u16 rh0, rh1, 1;
; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, 1;
; CHECK-NEXT: ret;
%z = add i16 %x, 1
ret i16 %z
}
define ptx_device i32 @t2_u32(i32 %x) {
; CHECK: add.u32 r0, r1, 1;
; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, 1;
; CHECK-NEXT: ret;
%z = add i32 %x, 1
ret i32 %z
}
define ptx_device i64 @t2_u64(i64 %x) {
; CHECK: add.u64 rd0, rd1, 1;
; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, 1;
; CHECK-NEXT: ret;
%z = add i64 %x, 1
ret i64 %z
}
define ptx_device float @t2_f32(float %x) {
; CHECK: add.rn.f32 r0, r1, 0F3F800000;
; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F3F800000;
; CHECK-NEXT: ret;
%z = fadd float %x, 1.0
ret float %z
}
define ptx_device double @t2_f64(double %x) {
; CHECK: add.rn.f64 rd0, rd1, 0D3FF0000000000000;
; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D3FF0000000000000;
; CHECK-NEXT: ret;
%z = fadd double %x, 1.0
ret double %z

View File

@ -0,0 +1,23 @@
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
%complex = type { float, float }
define ptx_device %complex @complex_add(%complex %a, %complex %b) {
entry:
; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1];
; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3];
; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2];
; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4];
; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]];
; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]];
; CHECK-NEXT: ret;
%a.real = extractvalue %complex %a, 0
%a.imag = extractvalue %complex %a, 1
%b.real = extractvalue %complex %b, 0
%b.imag = extractvalue %complex %b, 1
%ret.real = fadd float %a.real, %b.real
%ret.imag = fadd float %a.imag, %b.imag
%ret.0 = insertvalue %complex undef, float %ret.real, 0
%ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
ret %complex %ret.1
}

View File

@ -3,21 +3,21 @@
; preds
define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
; CHECK: and.pred p0, p1, p2
; CHECK: and.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
%c = and i1 %x, %y
%d = zext i1 %c to i32
ret i32 %d
}
define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
; CHECK: or.pred p0, p1, p2
; CHECK: or.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
%a = or i1 %x, %y
%b = zext i1 %a to i32
ret i32 %b
}
define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
; CHECK: xor.pred p0, p1, p2
; CHECK: xor.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
%a = xor i1 %x, %y
%b = zext i1 %a to i32
ret i32 %b

View File

@ -10,15 +10,15 @@ loop:
define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
entry:
; CHECK: setp.le.u32 p0, r1, r2
; CHECK: setp.le.u32 p0, r[[R0:[0-9]+]], r[[R1:[0-9]+]]
%p = icmp ugt i32 %x, %y
; CHECK-NEXT: @p0 bra
; CHECK-NOT: bra
br i1 %p, label %clause.if, label %clause.else
clause.if:
; CHECK: mov.u32 r0, r1
; CHECK: mov.u32 r{{[0-9]+}}, r[[R0]]
ret i32 %x
clause.else:
; CHECK: mov.u32 r0, r2
; CHECK: mov.u32 r{{[0-9]+}}, r[[R1]]
ret i32 %y
}

View File

@ -4,9 +4,9 @@
; (note: we convert back to i32 to return)
define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
; CHECK: setp.gt.b16 p0, rh1, 0
; CHECK-NEXT: and.pred p0, p0, p1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
; CHECK-NEXT: ret;
%a = trunc i16 %x to i1
%b = and i1 %a, %y
@ -15,9 +15,9 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
}
define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
; CHECK: setp.gt.b32 p0, r1, 0
; CHECK-NEXT: and.pred p0, p0, p1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
; CHECK-NEXT: ret;
%a = trunc i32 %x to i1
%b = and i1 %a, %y
@ -26,9 +26,9 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
}
define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
; CHECK: setp.gt.b64 p0, rd1, 0
; CHECK-NEXT: and.pred p0, p0, p1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.b64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
; CHECK-NEXT: ret;
%a = trunc i64 %x to i1
%b = and i1 %a, %y
@ -37,9 +37,9 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
}
define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
; CHECK: setp.gt.b32 p0, r1, 0
; CHECK-NEXT: and.pred p0, p0, p1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
; CHECK-NEXT: ret;
%a = fptoui float %x to i1
%b = and i1 %a, %y
@ -48,9 +48,9 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
}
define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
; CHECK: setp.gt.b64 p0, rd1, 0
; CHECK-NEXT: and.pred p0, p0, p1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.b64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0
; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
; CHECK-NEXT: ret;
%a = fptoui double %x to i1
%b = and i1 %a, %y
@ -61,35 +61,35 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
; i16
define ptx_device i16 @cvt_i16_preds(i1 %x) {
; CHECK: selp.u16 rh0, 1, 0, p1;
; CHECK: selp.u16 rh{{[0-9]+}}, 1, 0, p{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i1 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_i32(i32 %x) {
; CHECK: cvt.u16.u32 rh0, r1;
; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = trunc i32 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_i64(i64 %x) {
; CHECK: cvt.u16.u64 rh0, rd1;
; CHECK: cvt.u16.u64 rh{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = trunc i64 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_f32(float %x) {
; CHECK: cvt.rzi.u16.f32 rh0, r1;
; CHECK: cvt.rzi.u16.f32 rh{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui float %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_f64(double %x) {
; CHECK: cvt.rzi.u16.f64 rh0, rd1;
; CHECK: cvt.rzi.u16.f64 rh{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui double %x to i16
ret i16 %a
@ -98,35 +98,35 @@ define ptx_device i16 @cvt_i16_f64(double %x) {
; i32
define ptx_device i32 @cvt_i32_preds(i1 %x) {
; CHECK: selp.u32 r0, 1, 0, p1;
; CHECK: selp.u32 r{{[0-9]+}}, 1, 0, p{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i1 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_i16(i16 %x) {
; CHECK: cvt.u32.u16 r0, rh1;
; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i16 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_i64(i64 %x) {
; CHECK: cvt.u32.u64 r0, rd1;
; CHECK: cvt.u32.u64 r{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = trunc i64 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_f32(float %x) {
; CHECK: cvt.rzi.u32.f32 r0, r1;
; CHECK: cvt.rzi.u32.f32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui float %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_f64(double %x) {
; CHECK: cvt.rzi.u32.f64 r0, rd1;
; CHECK: cvt.rzi.u32.f64 r{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui double %x to i32
ret i32 %a
@ -135,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) {
; i64
define ptx_device i64 @cvt_i64_preds(i1 %x) {
; CHECK: selp.u64 rd0, 1, 0, p1;
; CHECK: selp.u64 rd{{[0-9]+}}, 1, 0, p{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i1 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_i16(i16 %x) {
; CHECK: cvt.u64.u16 rd0, rh1;
; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i16 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_i32(i32 %x) {
; CHECK: cvt.u64.u32 rd0, r1;
; CHECK: cvt.u64.u32 rd{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = zext i32 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_f32(float %x) {
; CHECK: cvt.rzi.u64.f32 rd0, r1;
; CHECK: cvt.rzi.u64.f32 rd{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptoui float %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_f64(double %x) {
; CHECK: cvt.rzi.u64.f64 rd0, rd1;
; CHECK: cvt.rzi.u64.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i64
ret i64 %a
@ -172,35 +172,35 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
; f32
define ptx_device float @cvt_f32_preds(i1 %x) {
; CHECK: selp.f32 r0, 0F3F800000, 0F00000000, p1;
; CHECK: selp.f32 r{{[0-9]+}}, 0F3F800000, 0F00000000, p{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i1 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i16(i16 %x) {
; CHECK: cvt.rn.f32.u16 r0, rh1;
; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i16 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i32(i32 %x) {
; CHECK: cvt.rn.f32.u32 r0, r1;
; CHECK: cvt.rn.f32.u32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i32 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i64(i64 %x) {
; CHECK: cvt.rn.f32.u64 r0, rd1;
; CHECK: cvt.rn.f32.u64 r{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i64 %x to float
ret float %a
}
define ptx_device float @cvt_f32_f64(double %x) {
; CHECK: cvt.rn.f32.f64 r0, rd1;
; CHECK: cvt.rn.f32.f64 r{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fptrunc double %x to float
ret float %a
@ -209,35 +209,35 @@ define ptx_device float @cvt_f32_f64(double %x) {
; f64
define ptx_device double @cvt_f64_preds(i1 %x) {
; CHECK: selp.f64 rd0, 0D3F80000000000000, 0D0000000000000000, p1;
; CHECK: selp.f64 rd{{[0-9]+}}, 0D3F80000000000000, 0D0000000000000000, p{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i1 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i16(i16 %x) {
; CHECK: cvt.rn.f64.u16 rd0, rh1;
; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i16 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i32(i32 %x) {
; CHECK: cvt.rn.f64.u32 rd0, r1;
; CHECK: cvt.rn.f64.u32 rd{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i32 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i64(i64 %x) {
; CHECK: cvt.rn.f64.u64 rd0, rd1;
; CHECK: cvt.rn.f64.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = uitofp i64 %x to double
ret double %a
}
define ptx_device double @cvt_f64_f32(float %x) {
; CHECK: cvt.f64.f32 rd0, r1;
; CHECK: cvt.f64.f32 rd{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fpext float %x to double
ret double %a

View File

@ -1,14 +1,14 @@
; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: div.f32 r0, r1, r2;
; CHECK: div.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fdiv float %x, %y
ret float %a
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: div.f64 rd0, rd1, rd2;
; CHECK: div.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fdiv double %x, %y
ret double %a

View File

@ -1,14 +1,14 @@
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: div.rn.f32 r0, r1, r2;
; CHECK: div.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fdiv float %x, %y
ret float %a
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: div.rn.f64 rd0, rd1, rd2;
; CHECK: div.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fdiv double %x, %y
ret double %a

View File

@ -1,14 +1,14 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device float @t1_f32(float %x) {
; CHECK: neg.f32 r0, r1;
; CHECK: neg.f32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%y = fsub float -0.000000e+00, %x
ret float %y
}
define ptx_device double @t1_f64(double %x) {
; CHECK: neg.f64 rd0, rd1;
; CHECK: neg.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%y = fsub double -0.000000e+00, %x
ret double %y

View File

@ -63,7 +63,7 @@
define ptx_device i16 @t1_u16(i16* %p) {
entry:
;CHECK: ld.global.u16 rh0, [r1];
;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}];
;CHECK-NEXT: ret;
%x = load i16* %p
ret i16 %x
@ -71,7 +71,7 @@ entry:
define ptx_device i32 @t1_u32(i32* %p) {
entry:
;CHECK: ld.global.u32 r0, [r1];
;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}];
;CHECK-NEXT: ret;
%x = load i32* %p
ret i32 %x
@ -79,7 +79,7 @@ entry:
define ptx_device i64 @t1_u64(i64* %p) {
entry:
;CHECK: ld.global.u64 rd0, [r1];
;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}];
;CHECK-NEXT: ret;
%x = load i64* %p
ret i64 %x
@ -87,7 +87,7 @@ entry:
define ptx_device float @t1_f32(float* %p) {
entry:
;CHECK: ld.global.f32 r0, [r1];
;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}];
;CHECK-NEXT: ret;
%x = load float* %p
ret float %x
@ -95,7 +95,7 @@ entry:
define ptx_device double @t1_f64(double* %p) {
entry:
;CHECK: ld.global.f64 rd0, [r1];
;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}];
;CHECK-NEXT: ret;
%x = load double* %p
ret double %x
@ -103,7 +103,7 @@ entry:
define ptx_device i16 @t2_u16(i16* %p) {
entry:
;CHECK: ld.global.u16 rh0, [r1+2];
;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
;CHECK-NEXT: ret;
%i = getelementptr i16* %p, i32 1
%x = load i16* %i
@ -112,7 +112,7 @@ entry:
define ptx_device i32 @t2_u32(i32* %p) {
entry:
;CHECK: ld.global.u32 r0, [r1+4];
;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
;CHECK-NEXT: ret;
%i = getelementptr i32* %p, i32 1
%x = load i32* %i
@ -121,7 +121,7 @@ entry:
define ptx_device i64 @t2_u64(i64* %p) {
entry:
;CHECK: ld.global.u64 rd0, [r1+8];
;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
;CHECK-NEXT: ret;
%i = getelementptr i64* %p, i32 1
%x = load i64* %i
@ -130,7 +130,7 @@ entry:
define ptx_device float @t2_f32(float* %p) {
entry:
;CHECK: ld.global.f32 r0, [r1+4];
;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 1
%x = load float* %i
@ -139,7 +139,7 @@ entry:
define ptx_device double @t2_f64(double* %p) {
entry:
;CHECK: ld.global.f64 rd0, [r1+8];
;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
;CHECK-NEXT: ret;
%i = getelementptr double* %p, i32 1
%x = load double* %i
@ -148,9 +148,9 @@ entry:
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 1;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: ld.global.u16 rh0, [r0];
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
%i = getelementptr i16* %p, i32 %q
%x = load i16* %i
ret i16 %x
@ -158,9 +158,9 @@ entry:
define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 2;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: ld.global.u32 r0, [r0];
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
%i = getelementptr i32* %p, i32 %q
%x = load i32* %i
ret i32 %x
@ -168,9 +168,9 @@ entry:
define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 3;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: ld.global.u64 rd0, [r0];
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
%i = getelementptr i64* %p, i32 %q
%x = load i64* %i
ret i64 %x
@ -178,9 +178,9 @@ entry:
define ptx_device float @t3_f32(float* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 2;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: ld.global.f32 r0, [r0];
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
%i = getelementptr float* %p, i32 %q
%x = load float* %i
ret float %x
@ -188,9 +188,9 @@ entry:
define ptx_device double @t3_f64(double* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 3;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: ld.global.f64 rd0, [r0];
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
%i = getelementptr double* %p, i32 %q
%x = load double* %i
ret double %x
@ -198,8 +198,8 @@ entry:
define ptx_device i16 @t4_global_u16() {
entry:
;CHECK: mov.u32 r0, array_i16;
;CHECK-NEXT: ld.global.u16 rh0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
%x = load i16* %i
@ -208,8 +208,8 @@ entry:
define ptx_device i32 @t4_global_u32() {
entry:
;CHECK: mov.u32 r0, array_i32;
;CHECK-NEXT: ld.global.u32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
%x = load i32* %i
@ -218,8 +218,8 @@ entry:
define ptx_device i64 @t4_global_u64() {
entry:
;CHECK: mov.u32 r0, array_i64;
;CHECK-NEXT: ld.global.u64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
%x = load i64* %i
@ -228,8 +228,8 @@ entry:
define ptx_device float @t4_global_f32() {
entry:
;CHECK: mov.u32 r0, array_float;
;CHECK-NEXT: ld.global.f32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
%x = load float* %i
@ -238,8 +238,8 @@ entry:
define ptx_device double @t4_global_f64() {
entry:
;CHECK: mov.u32 r0, array_double;
;CHECK-NEXT: ld.global.f64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
%x = load double* %i
@ -248,8 +248,8 @@ entry:
define ptx_device i16 @t4_const_u16() {
entry:
;CHECK: mov.u32 r0, array_constant_i16;
;CHECK-NEXT: ld.const.u16 rh0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16;
;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
%x = load i16 addrspace(1)* %i
@ -258,8 +258,8 @@ entry:
define ptx_device i32 @t4_const_u32() {
entry:
;CHECK: mov.u32 r0, array_constant_i32;
;CHECK-NEXT: ld.const.u32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32;
;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
%x = load i32 addrspace(1)* %i
@ -268,8 +268,8 @@ entry:
define ptx_device i64 @t4_const_u64() {
entry:
;CHECK: mov.u32 r0, array_constant_i64;
;CHECK-NEXT: ld.const.u64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64;
;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
%x = load i64 addrspace(1)* %i
@ -278,8 +278,8 @@ entry:
define ptx_device float @t4_const_f32() {
entry:
;CHECK: mov.u32 r0, array_constant_float;
;CHECK-NEXT: ld.const.f32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float;
;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
%x = load float addrspace(1)* %i
@ -288,8 +288,8 @@ entry:
define ptx_device double @t4_const_f64() {
entry:
;CHECK: mov.u32 r0, array_constant_double;
;CHECK-NEXT: ld.const.f64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double;
;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
%x = load double addrspace(1)* %i
@ -298,8 +298,8 @@ entry:
define ptx_device i16 @t4_local_u16() {
entry:
;CHECK: mov.u32 r0, array_local_i16;
;CHECK-NEXT: ld.local.u16 rh0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
%x = load i16 addrspace(2)* %i
@ -308,8 +308,8 @@ entry:
define ptx_device i32 @t4_local_u32() {
entry:
;CHECK: mov.u32 r0, array_local_i32;
;CHECK-NEXT: ld.local.u32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
%x = load i32 addrspace(2)* %i
@ -318,8 +318,8 @@ entry:
define ptx_device i64 @t4_local_u64() {
entry:
;CHECK: mov.u32 r0, array_local_i64;
;CHECK-NEXT: ld.local.u64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
%x = load i64 addrspace(2)* %i
@ -328,8 +328,8 @@ entry:
define ptx_device float @t4_local_f32() {
entry:
;CHECK: mov.u32 r0, array_local_float;
;CHECK-NEXT: ld.local.f32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
%x = load float addrspace(2)* %i
@ -338,8 +338,8 @@ entry:
define ptx_device double @t4_local_f64() {
entry:
;CHECK: mov.u32 r0, array_local_double;
;CHECK-NEXT: ld.local.f64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
%x = load double addrspace(2)* %i
@ -348,8 +348,8 @@ entry:
define ptx_device i16 @t4_shared_u16() {
entry:
;CHECK: mov.u32 r0, array_shared_i16;
;CHECK-NEXT: ld.shared.u16 rh0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
%x = load i16 addrspace(4)* %i
@ -358,8 +358,8 @@ entry:
define ptx_device i32 @t4_shared_u32() {
entry:
;CHECK: mov.u32 r0, array_shared_i32;
;CHECK-NEXT: ld.shared.u32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
%x = load i32 addrspace(4)* %i
@ -368,8 +368,8 @@ entry:
define ptx_device i64 @t4_shared_u64() {
entry:
;CHECK: mov.u32 r0, array_shared_i64;
;CHECK-NEXT: ld.shared.u64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
%x = load i64 addrspace(4)* %i
@ -378,8 +378,8 @@ entry:
define ptx_device float @t4_shared_f32() {
entry:
;CHECK: mov.u32 r0, array_shared_float;
;CHECK-NEXT: ld.shared.f32 r0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
%x = load float addrspace(4)* %i
@ -388,8 +388,8 @@ entry:
define ptx_device double @t4_shared_f64() {
entry:
;CHECK: mov.u32 r0, array_shared_double;
;CHECK-NEXT: ld.shared.f64 rd0, [r0];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]];
;CHECK-NEXT: ret;
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
%x = load double addrspace(4)* %i
@ -398,8 +398,8 @@ entry:
define ptx_device i16 @t5_u16() {
entry:
;CHECK: mov.u32 r0, array_i16;
;CHECK-NEXT: ld.global.u16 rh0, [r0+2];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
%x = load i16* %i
@ -408,8 +408,8 @@ entry:
define ptx_device i32 @t5_u32() {
entry:
;CHECK: mov.u32 r0, array_i32;
;CHECK-NEXT: ld.global.u32 r0, [r0+4];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
%x = load i32* %i
@ -418,8 +418,8 @@ entry:
define ptx_device i64 @t5_u64() {
entry:
;CHECK: mov.u32 r0, array_i64;
;CHECK-NEXT: ld.global.u64 rd0, [r0+8];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8];
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
%x = load i64* %i
@ -428,8 +428,8 @@ entry:
define ptx_device float @t5_f32() {
entry:
;CHECK: mov.u32 r0, array_float;
;CHECK-NEXT: ld.global.f32 r0, [r0+4];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4];
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
%x = load float* %i
@ -438,8 +438,8 @@ entry:
define ptx_device double @t5_f64() {
entry:
;CHECK: mov.u32 r0, array_double;
;CHECK-NEXT: ld.global.f64 rd0, [r0+8];
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8];
;CHECK-NEXT: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
%x = load double* %i

View File

@ -2,7 +2,7 @@
define ptx_device float @test_sqrt_f32(float %x) {
entry:
; CHECK: sqrt.rn.f32 r0, r1;
; CHECK: sqrt.rn.f32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%y = call float @llvm.sqrt.f32(float %x)
ret float %y
@ -10,7 +10,7 @@ entry:
define ptx_device double @test_sqrt_f64(double %x) {
entry:
; CHECK: sqrt.rn.f64 rd0, rd1;
; CHECK: sqrt.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%y = call double @llvm.sqrt.f64(double %x)
ret double %y
@ -18,7 +18,7 @@ entry:
define ptx_device float @test_sin_f32(float %x) {
entry:
; CHECK: sin.approx.f32 r0, r1;
; CHECK: sin.approx.f32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%y = call float @llvm.sin.f32(float %x)
ret float %y
@ -26,7 +26,7 @@ entry:
define ptx_device double @test_sin_f64(double %x) {
entry:
; CHECK: sin.approx.f64 rd0, rd1;
; CHECK: sin.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%y = call double @llvm.sin.f64(double %x)
ret double %y
@ -34,7 +34,7 @@ entry:
define ptx_device float @test_cos_f32(float %x) {
entry:
; CHECK: cos.approx.f32 r0, r1;
; CHECK: cos.approx.f32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%y = call float @llvm.cos.f32(float %x)
ret float %y
@ -42,7 +42,7 @@ entry:
define ptx_device double @test_cos_f64(double %x) {
entry:
; CHECK: cos.approx.f64 rd0, rd1;
; CHECK: cos.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%y = call double @llvm.cos.f64(double %x)
ret double %y

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y, float %z) {
; CHECK: mad.rn.f32 r0, r1, r2, r3;
; CHECK: mad.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fmul float %x, %y
%b = fadd float %a, %z
@ -9,7 +9,7 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) {
}
define ptx_device double @t1_f64(double %x, double %y, double %z) {
; CHECK: mad.rn.f64 rd0, rd1, rd2, rd3;
; CHECK: mad.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%a = fmul double %x, %y
%b = fadd double %a, %z

View File

@ -1,61 +1,61 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i16 @t1_u16() {
; CHECK: mov.u16 rh0, 0;
; CHECK: mov.u16 rh{{[0-9]+}}, 0;
; CHECK: ret;
ret i16 0
}
define ptx_device i32 @t1_u32() {
; CHECK: mov.u32 r0, 0;
; CHECK: mov.u32 r{{[0-9]+}}, 0;
; CHECK: ret;
ret i32 0
}
define ptx_device i64 @t1_u64() {
; CHECK: mov.u64 rd0, 0;
; CHECK: mov.u64 rd{{[0-9]+}}, 0;
; CHECK: ret;
ret i64 0
}
define ptx_device float @t1_f32() {
; CHECK: mov.f32 r0, 0F00000000;
; CHECK: mov.f32 r{{[0-9]+}}, 0F00000000;
; CHECK: ret;
ret float 0.0
}
define ptx_device double @t1_f64() {
; CHECK: mov.f64 rd0, 0D0000000000000000;
; CHECK: mov.f64 rd{{[0-9]+}}, 0D0000000000000000;
; CHECK: ret;
ret double 0.0
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: mov.u16 rh0, rh1;
; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}};
; CHECK: ret;
ret i16 %x
}
define ptx_device i32 @t2_u32(i32 %x) {
; CHECK: mov.u32 r0, r1;
; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK: ret;
ret i32 %x
}
define ptx_device i64 @t2_u64(i64 %x) {
; CHECK: mov.u64 rd0, rd1;
; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK: ret;
ret i64 %x
}
define ptx_device float @t3_f32(float %x) {
; CHECK: mov.u32 r0, r1;
; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
ret float %x
}
define ptx_device double @t3_f64(double %x) {
; CHECK: mov.u64 rd0, rd1;
; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
ret double %x
}

View File

@ -11,28 +11,28 @@
;}
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: mul.rn.f32 r0, r1, r2
; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NEXT: ret;
%z = fmul float %x, %y
ret float %z
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: mul.rn.f64 rd0, rd1, rd2
; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
; CHECK-NEXT: ret;
%z = fmul double %x, %y
ret double %z
}
define ptx_device float @t2_f32(float %x) {
; CHECK: mul.rn.f32 r0, r1, 0F40A00000;
; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F40A00000;
; CHECK-NEXT: ret;
%z = fmul float %x, 5.0
ret float %z
}
define ptx_device double @t2_f64(double %x) {
; CHECK: mul.rn.f64 rd0, rd1, 0D4014000000000000;
; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D4014000000000000;
; CHECK-NEXT: ret;
%z = fmul double %x, 5.0
ret double %z

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
; CHECK: .func (.reg .b32 r0) test_parameter_order (.reg .b32 r1, .reg .b32 r2, .reg .b32 r3, .reg .b32 r4)
; CHECK: .func (.reg .b32 r{{[0-9]+}}) test_parameter_order (.reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}})
define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
; CHECK: sub.u32 r0, r2, r3
; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
%result = sub i32 %b, %c
ret i32 %result
}

View File

@ -1,25 +1,25 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
; CHECK: selp.u32 r0, r1, r2, p1;
; CHECK: selp.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}};
%a = select i1 %x, i32 %y, i32 %z
ret i32 %a
}
define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
; CHECK: selp.u64 rd0, rd1, rd2, p1;
; CHECK: selp.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}};
%a = select i1 %x, i64 %y, i64 %z
ret i64 %a
}
define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
; CHECK: selp.f32 r0, r1, r2, p1;
; CHECK: selp.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}};
%a = select i1 %x, float %y, float %z
ret float %a
}
define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
; CHECK: selp.f64 rd0, rd1, rd2, p1;
; CHECK: selp.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}};
%a = select i1 %x, double %y, double %z
ret double %a
}

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.eq.u32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp eq i32 %x, %y
%z = zext i1 %p to i32
@ -10,8 +10,8 @@ define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.ne.u32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ne i32 %x, %y
%z = zext i1 %p to i32
@ -19,8 +19,8 @@ define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.lt.u32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ult i32 %x, %y
%z = zext i1 %p to i32
@ -28,8 +28,8 @@ define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.le.u32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.le.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ule i32 %x, %y
%z = zext i1 %p to i32
@ -37,8 +37,8 @@ define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.gt.u32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ugt i32 %x, %y
%z = zext i1 %p to i32
@ -46,8 +46,8 @@ define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.ge.u32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.ge.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp uge i32 %x, %y
%z = zext i1 %p to i32
@ -55,8 +55,8 @@ define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.lt.s32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp slt i32 %x, %y
%z = zext i1 %p to i32
@ -64,8 +64,8 @@ define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.le.s32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.le.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp sle i32 %x, %y
%z = zext i1 %p to i32
@ -73,8 +73,8 @@ define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.gt.s32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp sgt i32 %x, %y
%z = zext i1 %p to i32
@ -82,8 +82,8 @@ define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.ge.s32 p0, r1, r2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.ge.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp sge i32 %x, %y
%z = zext i1 %p to i32
@ -91,8 +91,8 @@ define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
}
define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
; CHECK: setp.eq.u32 p0, r1, 1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp eq i32 %x, 1
%z = zext i1 %p to i32
@ -100,8 +100,8 @@ define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
; CHECK: setp.ne.u32 p0, r1, 1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ne i32 %x, 1
%z = zext i1 %p to i32
@ -109,8 +109,8 @@ define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
; CHECK: setp.eq.u32 p0, r1, 0;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ult i32 %x, 1
%z = zext i1 %p to i32
@ -118,8 +118,8 @@ define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
; CHECK: setp.lt.u32 p0, r1, 2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ule i32 %x, 1
%z = zext i1 %p to i32
@ -127,8 +127,8 @@ define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
; CHECK: setp.gt.u32 p0, r1, 1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp ugt i32 %x, 1
%z = zext i1 %p to i32
@ -136,8 +136,8 @@ define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
; CHECK: setp.ne.u32 p0, r1, 0;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp uge i32 %x, 1
%z = zext i1 %p to i32
@ -145,8 +145,8 @@ define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
; CHECK: setp.lt.s32 p0, r1, 1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp slt i32 %x, 1
%z = zext i1 %p to i32
@ -154,8 +154,8 @@ define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
; CHECK: setp.lt.s32 p0, r1, 2;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp sle i32 %x, 1
%z = zext i1 %p to i32
@ -163,8 +163,8 @@ define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
; CHECK: setp.gt.s32 p0, r1, 1;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp sgt i32 %x, 1
%z = zext i1 %p to i32
@ -172,8 +172,8 @@ define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
; CHECK: setp.gt.s32 p0, r1, 0;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%p = icmp sge i32 %x, 1
%z = zext i1 %p to i32
@ -181,9 +181,9 @@ define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
}
define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
; CHECK: setp.gt.u32 p0, r3, r4;
; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, p[[P0]];
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%c = icmp eq i32 %x, %y
%d = icmp ugt i32 %u, %v
@ -193,9 +193,9 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
}
define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
; CHECK: setp.gt.b32 p0, r3, 0;
; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0;
; CHECK-NEXT: selp.u32 r0, 1, 0, p0;
; CHECK: setp.gt.b32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, !p[[P0]];
; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
; CHECK-NEXT: ret;
%c = trunc i32 %w to i1
%d = icmp eq i32 %x, %y

View File

@ -1,21 +1,21 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @t1(i32 %x, i32 %y) {
; CHECK: shl.b32 r0, r1, r2
; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
%z = shl i32 %x, %y
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t2(i32 %x) {
; CHECK: shl.b32 r0, r1, 3
; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, 3
%z = shl i32 %x, 3
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t3(i32 %x) {
; CHECK: shl.b32 r0, 3, r1
; CHECK: shl.b32 r{{[0-9]+}}, 3, r{{[0-9]+}}
%z = shl i32 3, %x
; CHECK: ret;
ret i32 %z

View File

@ -1,42 +1,42 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @t1(i32 %x, i32 %y) {
; CHECK: shr.u32 r0, r1, r2
; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
%z = lshr i32 %x, %y
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t2(i32 %x) {
; CHECK: shr.u32 r0, r1, 3
; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, 3
%z = lshr i32 %x, 3
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t3(i32 %x) {
; CHECK: shr.u32 r0, 3, r1
; CHECK: shr.u32 r{{[0-9]+}}, 3, r{{[0-9]+}}
%z = lshr i32 3, %x
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t4(i32 %x, i32 %y) {
; CHECK: shr.s32 r0, r1, r2
; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
%z = ashr i32 %x, %y
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t5(i32 %x) {
; CHECK: shr.s32 r0, r1, 3
; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, 3
%z = ashr i32 %x, 3
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t6(i32 %x) {
; CHECK: shr.s32 r0, -3, r1
; CHECK: shr.s32 r{{[0-9]+}}, -3, r{{[0-9]+}}
%z = ashr i32 -3, %x
; CHECK: ret;
ret i32 %z

View File

@ -63,7 +63,7 @@
define ptx_device void @t1_u16(i16* %p, i16 %x) {
entry:
;CHECK: st.global.u16 [r1], rh1;
;CHECK: st.global.u16 [r{{[0-9]+}}], rh{{[0-9]+}};
;CHECK-NEXT: ret;
store i16 %x, i16* %p
ret void
@ -71,7 +71,7 @@ entry:
define ptx_device void @t1_u32(i32* %p, i32 %x) {
entry:
;CHECK: st.global.u32 [r1], r2;
;CHECK: st.global.u32 [r{{[0-9]+}}], r{{[0-9]+}};
;CHECK-NEXT: ret;
store i32 %x, i32* %p
ret void
@ -79,7 +79,7 @@ entry:
define ptx_device void @t1_u64(i64* %p, i64 %x) {
entry:
;CHECK: st.global.u64 [r1], rd1;
;CHECK: st.global.u64 [r{{[0-9]+}}], rd{{[0-9]+}};
;CHECK-NEXT: ret;
store i64 %x, i64* %p
ret void
@ -87,7 +87,7 @@ entry:
define ptx_device void @t1_f32(float* %p, float %x) {
entry:
;CHECK: st.global.f32 [r1], r2;
;CHECK: st.global.f32 [r{{[0-9]+}}], r{{[0-9]+}};
;CHECK-NEXT: ret;
store float %x, float* %p
ret void
@ -95,7 +95,7 @@ entry:
define ptx_device void @t1_f64(double* %p, double %x) {
entry:
;CHECK: st.global.f64 [r1], rd1;
;CHECK: st.global.f64 [r{{[0-9]+}}], rd{{[0-9]+}};
;CHECK-NEXT: ret;
store double %x, double* %p
ret void
@ -103,7 +103,7 @@ entry:
define ptx_device void @t2_u16(i16* %p, i16 %x) {
entry:
;CHECK: st.global.u16 [r1+2], rh1;
;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i16* %p, i32 1
store i16 %x, i16* %i
@ -112,7 +112,7 @@ entry:
define ptx_device void @t2_u32(i32* %p, i32 %x) {
entry:
;CHECK: st.global.u32 [r1+4], r2;
;CHECK: st.global.u32 [r{{[0-9]+}}+4], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i32* %p, i32 1
store i32 %x, i32* %i
@ -121,7 +121,7 @@ entry:
define ptx_device void @t2_u64(i64* %p, i64 %x) {
entry:
;CHECK: st.global.u64 [r1+8], rd1;
;CHECK: st.global.u64 [r{{[0-9]+}}+8], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i64* %p, i32 1
store i64 %x, i64* %i
@ -130,7 +130,7 @@ entry:
define ptx_device void @t2_f32(float* %p, float %x) {
entry:
;CHECK: st.global.f32 [r1+4], r2;
;CHECK: st.global.f32 [r{{[0-9]+}}+4], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 1
store float %x, float* %i
@ -139,7 +139,7 @@ entry:
define ptx_device void @t2_f64(double* %p, double %x) {
entry:
;CHECK: st.global.f64 [r1+8], rd1;
;CHECK: st.global.f64 [r{{[0-9]+}}+8], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr double* %p, i32 1
store double %x, double* %i
@ -148,9 +148,9 @@ entry:
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
entry:
;CHECK: shl.b32 r0, r2, 1;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: st.global.u16 [r0], rh1;
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i16* %p, i32 %q
store i16 %x, i16* %i
@ -159,9 +159,9 @@ entry:
define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
entry:
;CHECK: shl.b32 r0, r2, 2;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: st.global.u32 [r0], r3;
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i32* %p, i32 %q
store i32 %x, i32* %i
@ -170,9 +170,9 @@ entry:
define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
entry:
;CHECK: shl.b32 r0, r2, 3;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: st.global.u64 [r0], rd1;
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr i64* %p, i32 %q
store i64 %x, i64* %i
@ -181,9 +181,9 @@ entry:
define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
entry:
;CHECK: shl.b32 r0, r2, 2;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: st.global.f32 [r0], r3;
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr float* %p, i32 %q
store float %x, float* %i
@ -192,9 +192,9 @@ entry:
define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
entry:
;CHECK: shl.b32 r0, r2, 3;
;CHECK-NEXT: add.u32 r0, r1, r0;
;CHECK-NEXT: st.global.f64 [r0], rd1;
;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr double* %p, i32 %q
store double %x, double* %i
@ -203,8 +203,8 @@ entry:
define ptx_device void @t4_global_u16(i16 %x) {
entry:
;CHECK: mov.u32 r0, array_i16;
;CHECK-NEXT: st.global.u16 [r0], rh1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
store i16 %x, i16* %i
@ -213,8 +213,8 @@ entry:
define ptx_device void @t4_global_u32(i32 %x) {
entry:
;CHECK: mov.u32 r0, array_i32;
;CHECK-NEXT: st.global.u32 [r0], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
store i32 %x, i32* %i
@ -223,8 +223,8 @@ entry:
define ptx_device void @t4_global_u64(i64 %x) {
entry:
;CHECK: mov.u32 r0, array_i64;
;CHECK-NEXT: st.global.u64 [r0], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
store i64 %x, i64* %i
@ -233,8 +233,8 @@ entry:
define ptx_device void @t4_global_f32(float %x) {
entry:
;CHECK: mov.u32 r0, array_float;
;CHECK-NEXT: st.global.f32 [r0], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
store float %x, float* %i
@ -243,8 +243,8 @@ entry:
define ptx_device void @t4_global_f64(double %x) {
entry:
;CHECK: mov.u32 r0, array_double;
;CHECK-NEXT: st.global.f64 [r0], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
store double %x, double* %i
@ -253,8 +253,8 @@ entry:
define ptx_device void @t4_local_u16(i16 %x) {
entry:
;CHECK: mov.u32 r0, array_local_i16;
;CHECK-NEXT: st.local.u16 [r0], rh1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
;CHECK-NEXT: st.local.u16 [r[[R0]]], rh{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
store i16 %x, i16 addrspace(2)* %i
@ -263,8 +263,8 @@ entry:
define ptx_device void @t4_local_u32(i32 %x) {
entry:
;CHECK: mov.u32 r0, array_local_i32;
;CHECK-NEXT: st.local.u32 [r0], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
;CHECK-NEXT: st.local.u32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
store i32 %x, i32 addrspace(2)* %i
@ -273,8 +273,8 @@ entry:
define ptx_device void @t4_local_u64(i64 %x) {
entry:
;CHECK: mov.u32 r0, array_local_i64;
;CHECK-NEXT: st.local.u64 [r0], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
;CHECK-NEXT: st.local.u64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
store i64 %x, i64 addrspace(2)* %i
@ -283,8 +283,8 @@ entry:
define ptx_device void @t4_local_f32(float %x) {
entry:
;CHECK: mov.u32 r0, array_local_float;
;CHECK-NEXT: st.local.f32 [r0], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
;CHECK-NEXT: st.local.f32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
store float %x, float addrspace(2)* %i
@ -293,8 +293,8 @@ entry:
define ptx_device void @t4_local_f64(double %x) {
entry:
;CHECK: mov.u32 r0, array_local_double;
;CHECK-NEXT: st.local.f64 [r0], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
;CHECK-NEXT: st.local.f64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
store double %x, double addrspace(2)* %i
@ -303,8 +303,8 @@ entry:
define ptx_device void @t4_shared_u16(i16 %x) {
entry:
;CHECK: mov.u32 r0, array_shared_i16;
;CHECK-NEXT: st.shared.u16 [r0], rh1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
;CHECK-NEXT: st.shared.u16 [r[[R0]]], rh{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
store i16 %x, i16 addrspace(4)* %i
@ -313,8 +313,8 @@ entry:
define ptx_device void @t4_shared_u32(i32 %x) {
entry:
;CHECK: mov.u32 r0, array_shared_i32;
;CHECK-NEXT: st.shared.u32 [r0], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
;CHECK-NEXT: st.shared.u32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
store i32 %x, i32 addrspace(4)* %i
@ -323,8 +323,8 @@ entry:
define ptx_device void @t4_shared_u64(i64 %x) {
entry:
;CHECK: mov.u32 r0, array_shared_i64;
;CHECK-NEXT: st.shared.u64 [r0], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
;CHECK-NEXT: st.shared.u64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
store i64 %x, i64 addrspace(4)* %i
@ -333,8 +333,8 @@ entry:
define ptx_device void @t4_shared_f32(float %x) {
entry:
;CHECK: mov.u32 r0, array_shared_float;
;CHECK-NEXT: st.shared.f32 [r0], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
;CHECK-NEXT: st.shared.f32 [r[[R0]]], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
store float %x, float addrspace(4)* %i
@ -343,8 +343,8 @@ entry:
define ptx_device void @t4_shared_f64(double %x) {
entry:
;CHECK: mov.u32 r0, array_shared_double;
;CHECK-NEXT: st.shared.f64 [r0], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
;CHECK-NEXT: st.shared.f64 [r[[R0]]], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
store double %x, double addrspace(4)* %i
@ -353,8 +353,8 @@ entry:
define ptx_device void @t5_u16(i16 %x) {
entry:
;CHECK: mov.u32 r0, array_i16;
;CHECK-NEXT: st.global.u16 [r0+2], rh1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
;CHECK-NEXT: st.global.u16 [r[[R0]]+2], rh{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
store i16 %x, i16* %i
@ -363,8 +363,8 @@ entry:
define ptx_device void @t5_u32(i32 %x) {
entry:
;CHECK: mov.u32 r0, array_i32;
;CHECK-NEXT: st.global.u32 [r0+4], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
;CHECK-NEXT: st.global.u32 [r[[R0]]+4], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
store i32 %x, i32* %i
@ -373,8 +373,8 @@ entry:
define ptx_device void @t5_u64(i64 %x) {
entry:
;CHECK: mov.u32 r0, array_i64;
;CHECK-NEXT: st.global.u64 [r0+8], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
;CHECK-NEXT: st.global.u64 [r[[R0]]+8], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
store i64 %x, i64* %i
@ -383,8 +383,8 @@ entry:
define ptx_device void @t5_f32(float %x) {
entry:
;CHECK: mov.u32 r0, array_float;
;CHECK-NEXT: st.global.f32 [r0+4], r1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
;CHECK-NEXT: st.global.f32 [r[[R0]]+4], r{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
store float %x, float* %i
@ -393,8 +393,8 @@ entry:
define ptx_device void @t5_f64(double %x) {
entry:
;CHECK: mov.u32 r0, array_double;
;CHECK-NEXT: st.global.f64 [r0+8], rd1;
;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
;CHECK-NEXT: st.global.f64 [r[[R0]]+8], rd{{[0-9]+}};
;CHECK-NEXT: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
store double %x, double* %i

View File

@ -1,70 +1,70 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
; CHECK: sub.u16 rh0, rh1, rh2;
; CHECK: sub.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}};
; CHECK-NEXT: ret;
%z = sub i16 %x, %y
ret i16 %z
}
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
; CHECK: sub.u32 r0, r1, r2;
; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
; CHECK-NEXT: ret;
%z = sub i32 %x, %y
ret i32 %z
}
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
; CHECK: sub.u64 rd0, rd1, rd2;
; CHECK: sub.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
; CHECK-NEXT: ret;
%z = sub i64 %x, %y
ret i64 %z
}
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: sub.rn.f32 r0, r1, r2
; CHECK: sub.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NEXT: ret;
%z = fsub float %x, %y
ret float %z
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: sub.rn.f64 rd0, rd1, rd2
; CHECK: sub.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
; CHECK-NEXT: ret;
%z = fsub double %x, %y
ret double %z
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: add.u16 rh0, rh1, -1;
; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, -1;
; CHECK-NEXT: ret;
%z = sub i16 %x, 1
ret i16 %z
}
define ptx_device i32 @t2_u32(i32 %x) {
; CHECK: add.u32 r0, r1, -1;
; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, -1;
; CHECK-NEXT: ret;
%z = sub i32 %x, 1
ret i32 %z
}
define ptx_device i64 @t2_u64(i64 %x) {
; CHECK: add.u64 rd0, rd1, -1;
; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, -1;
; CHECK-NEXT: ret;
%z = sub i64 %x, 1
ret i64 %z
}
define ptx_device float @t2_f32(float %x) {
; CHECK: add.rn.f32 r0, r1, 0FBF800000;
; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0FBF800000;
; CHECK-NEXT: ret;
%z = fsub float %x, 1.0
ret float %z
}
define ptx_device double @t2_f64(double %x) {
; CHECK: add.rn.f64 rd0, rd1, 0DBFF0000000000000;
; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0DBFF0000000000000;
; CHECK-NEXT: ret;
%z = fsub double %x, 1.0
ret double %z