mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
R600: Const/Neg/Abs can be folded to dot4
llvm-svn: 183278
This commit is contained in:
parent
1c010771f4
commit
7c89765008
@ -49,7 +49,10 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||||
|
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
|
||||||
|
const R600InstrInfo *TII, std::vector<unsigned> Cst);
|
||||||
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
||||||
|
bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
||||||
|
|
||||||
// Complex pattern selectors
|
// Complex pattern selectors
|
||||||
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
||||||
@ -318,6 +321,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||||
const R600InstrInfo *TII =
|
const R600InstrInfo *TII =
|
||||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||||
|
if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
|
||||||
|
bool IsModified = false;
|
||||||
|
do {
|
||||||
|
std::vector<SDValue> Ops;
|
||||||
|
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
|
||||||
|
I != E; ++I)
|
||||||
|
Ops.push_back(*I);
|
||||||
|
IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
|
||||||
|
if (IsModified) {
|
||||||
|
Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
|
||||||
|
}
|
||||||
|
} while (IsModified);
|
||||||
|
|
||||||
|
}
|
||||||
if (Result && Result->isMachineOpcode() &&
|
if (Result && Result->isMachineOpcode() &&
|
||||||
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
|
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
|
||||||
&& TII->isALUInstr(Result->getMachineOpcode())) {
|
&& TII->isALUInstr(Result->getMachineOpcode())) {
|
||||||
@ -360,6 +377,43 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
|
||||||
|
SDValue &Abs, const R600InstrInfo *TII,
|
||||||
|
std::vector<unsigned> Consts) {
|
||||||
|
switch (Src.getOpcode()) {
|
||||||
|
case AMDGPUISD::CONST_ADDRESS: {
|
||||||
|
SDValue CstOffset;
|
||||||
|
if (Src.getValueType().isVector() ||
|
||||||
|
!SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
if (!TII->fitsConstReadLimitations(Consts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||||
|
Sel = CstOffset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case ISD::FNEG:
|
||||||
|
Src = Src.getOperand(0);
|
||||||
|
Neg = CurDAG->getTargetConstant(1, MVT::i32);
|
||||||
|
return true;
|
||||||
|
case ISD::FABS:
|
||||||
|
if (!Abs.getNode())
|
||||||
|
return false;
|
||||||
|
Src = Src.getOperand(0);
|
||||||
|
Abs = CurDAG->getTargetConstant(1, MVT::i32);
|
||||||
|
return true;
|
||||||
|
case ISD::BITCAST:
|
||||||
|
Src = Src.getOperand(0);
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
||||||
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
||||||
int OperandIdx[] = {
|
int OperandIdx[] = {
|
||||||
@ -383,59 +437,101 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
|||||||
-1
|
-1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Gather constants values
|
||||||
|
std::vector<unsigned> Consts;
|
||||||
|
for (unsigned j = 0; j < 3; j++) {
|
||||||
|
int SrcIdx = OperandIdx[j];
|
||||||
|
if (SrcIdx < 0)
|
||||||
|
break;
|
||||||
|
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||||
|
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
if (OperandIdx[i] < 0)
|
if (OperandIdx[i] < 0)
|
||||||
return false;
|
return false;
|
||||||
SDValue Operand = Ops[OperandIdx[i] - 1];
|
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||||
switch (Operand.getOpcode()) {
|
SDValue &Sel = Ops[SelIdx[i] - 1];
|
||||||
case AMDGPUISD::CONST_ADDRESS: {
|
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||||
SDValue CstOffset;
|
SDValue FakeAbs;
|
||||||
if (Operand.getValueType().isVector() ||
|
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
||||||
!SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
|
if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
|
||||||
break;
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Gather others constants values
|
bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
|
||||||
std::vector<unsigned> Consts;
|
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
||||||
for (unsigned j = 0; j < 3; j++) {
|
int OperandIdx[] = {
|
||||||
int SrcIdx = OperandIdx[j];
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_X),
|
||||||
if (SrcIdx < 0)
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_Y),
|
||||||
break;
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_Z),
|
||||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_W),
|
||||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_X),
|
||||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_Y),
|
||||||
Consts.push_back(Cst->getZExtValue());
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_Z),
|
||||||
}
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_W)
|
||||||
}
|
};
|
||||||
}
|
int SelIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W)
|
||||||
|
};
|
||||||
|
int NegIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W)
|
||||||
|
};
|
||||||
|
int AbsIdx[] = {
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z),
|
||||||
|
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W)
|
||||||
|
};
|
||||||
|
|
||||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
// Gather constants values
|
||||||
Consts.push_back(Cst->getZExtValue());
|
std::vector<unsigned> Consts;
|
||||||
if (!TII->fitsConstReadLimitations(Consts))
|
for (unsigned j = 0; j < 8; j++) {
|
||||||
break;
|
int SrcIdx = OperandIdx[j];
|
||||||
|
if (SrcIdx < 0)
|
||||||
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
|
||||||
Ops[SelIdx[i] - 1] = CstOffset;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
case ISD::FNEG:
|
|
||||||
if (NegIdx[i] < 0)
|
|
||||||
break;
|
|
||||||
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
|
|
||||||
Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
|
|
||||||
return true;
|
|
||||||
case ISD::FABS:
|
|
||||||
if (AbsIdx[i] < 0)
|
|
||||||
break;
|
|
||||||
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
|
|
||||||
Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
|
|
||||||
return true;
|
|
||||||
case ISD::BITCAST:
|
|
||||||
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
break;
|
break;
|
||||||
|
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||||
|
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
if (OperandIdx[i] < 0)
|
||||||
|
return false;
|
||||||
|
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||||
|
SDValue &Sel = Ops[SelIdx[i] - 1];
|
||||||
|
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||||
|
SDValue &Abs = Ops[AbsIdx[i] - 1];
|
||||||
|
if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,7 +108,8 @@ private:
|
|||||||
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
|
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
|
||||||
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
|
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
|
||||||
TII->getSrcs(MI);
|
TII->getSrcs(MI);
|
||||||
assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
|
assert((TII->isALUInstr(MI->getOpcode()) ||
|
||||||
|
MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
|
||||||
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
|
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
|
||||||
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
|
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
|
||||||
continue;
|
continue;
|
||||||
@ -183,6 +184,9 @@ private:
|
|||||||
if (TII->isALUInstr(I->getOpcode()) &&
|
if (TII->isALUInstr(I->getOpcode()) &&
|
||||||
!SubstituteKCacheBank(I, KCacheBanks))
|
!SubstituteKCacheBank(I, KCacheBanks))
|
||||||
break;
|
break;
|
||||||
|
if (I->getOpcode() == AMDGPU::DOT_4 &&
|
||||||
|
!SubstituteKCacheBank(I, KCacheBanks))
|
||||||
|
break;
|
||||||
AluInstCount += OccupiedDwords(I);
|
AluInstCount += OccupiedDwords(I);
|
||||||
}
|
}
|
||||||
unsigned Opcode = PushBeforeModifier ?
|
unsigned Opcode = PushBeforeModifier ?
|
||||||
|
@ -214,7 +214,9 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
.getReg();
|
.getReg();
|
||||||
(void) Src0;
|
(void) Src0;
|
||||||
(void) Src1;
|
(void) Src1;
|
||||||
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
|
if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
|
||||||
|
(TRI.getEncodingValue(Src1) & 0xff) < 127)
|
||||||
|
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
|
||||||
}
|
}
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
continue;
|
continue;
|
||||||
|
@ -169,6 +169,31 @@ SmallVector<std::pair<MachineOperand *, int64_t>, 3>
|
|||||||
R600InstrInfo::getSrcs(MachineInstr *MI) const {
|
R600InstrInfo::getSrcs(MachineInstr *MI) const {
|
||||||
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
|
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
|
||||||
|
|
||||||
|
if (MI->getOpcode() == AMDGPU::DOT_4) {
|
||||||
|
static const R600Operands::VecOps OpTable[8][2] = {
|
||||||
|
{R600Operands::SRC0_X, R600Operands::SRC0_SEL_X},
|
||||||
|
{R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y},
|
||||||
|
{R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z},
|
||||||
|
{R600Operands::SRC0_W, R600Operands::SRC0_SEL_W},
|
||||||
|
{R600Operands::SRC1_X, R600Operands::SRC1_SEL_X},
|
||||||
|
{R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y},
|
||||||
|
{R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z},
|
||||||
|
{R600Operands::SRC1_W, R600Operands::SRC1_SEL_W},
|
||||||
|
};
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < 8; j++) {
|
||||||
|
MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1);
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
if (Reg == AMDGPU::ALU_CONST) {
|
||||||
|
unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm();
|
||||||
|
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
static const R600Operands::Ops OpTable[3][2] = {
|
static const R600Operands::Ops OpTable[3][2] = {
|
||||||
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
||||||
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
||||||
@ -967,6 +992,11 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
|
|||||||
return getOperandIdx(MI.getOpcode(), Op);
|
return getOperandIdx(MI.getOpcode(), Op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
|
||||||
|
R600Operands::VecOps Op) const {
|
||||||
|
return getOperandIdx(MI.getOpcode(), Op);
|
||||||
|
}
|
||||||
|
|
||||||
int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
||||||
R600Operands::Ops Op) const {
|
R600Operands::Ops Op) const {
|
||||||
unsigned TargetFlags = get(Opcode).TSFlags;
|
unsigned TargetFlags = get(Opcode).TSFlags;
|
||||||
@ -997,6 +1027,11 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
|||||||
return R600Operands::ALUOpTable[OpTableIdx][Op];
|
return R600Operands::ALUOpTable[OpTableIdx][Op];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
||||||
|
R600Operands::VecOps Op) const {
|
||||||
|
return Op + 1;
|
||||||
|
}
|
||||||
|
|
||||||
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
|
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
|
||||||
int64_t Imm) const {
|
int64_t Imm) const {
|
||||||
int Idx = getOperandIdx(*MI, Op);
|
int Idx = getOperandIdx(*MI, Op);
|
||||||
|
@ -212,11 +212,13 @@ namespace llvm {
|
|||||||
///
|
///
|
||||||
/// \returns -1 if the Instruction does not contain the specified \p Op.
|
/// \returns -1 if the Instruction does not contain the specified \p Op.
|
||||||
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
|
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
|
||||||
|
int getOperandIdx(const MachineInstr &MI, R600Operands::VecOps Op) const;
|
||||||
|
|
||||||
/// \brief Get the index of \p Op for the given Opcode.
|
/// \brief Get the index of \p Op for the given Opcode.
|
||||||
///
|
///
|
||||||
/// \returns -1 if the Instruction does not contain the specified \p Op.
|
/// \returns -1 if the Instruction does not contain the specified \p Op.
|
||||||
int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
|
int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
|
||||||
|
int getOperandIdx(unsigned Opcode, R600Operands::VecOps Op) const;
|
||||||
|
|
||||||
/// \brief Helper function for setting instruction flag values.
|
/// \brief Helper function for setting instruction flag values.
|
||||||
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
|
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
|
||||||
|
27
test/CodeGen/R600/dot4-folding.ll
Normal file
27
test/CodeGen/R600/dot4-folding.ll
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
|
; Exactly one constant vector can be folded into dot4, which means exactly
|
||||||
|
; 4 MOV instructions
|
||||||
|
; CHECK: @main
|
||||||
|
; CHECK: MOV
|
||||||
|
; CHECK: MOV
|
||||||
|
; CHECK: MOV
|
||||||
|
; CHECK: MOV
|
||||||
|
; CHECK-NOT: MOV
|
||||||
|
; CHECK-NOT: MOV
|
||||||
|
; CHECK-NOT: MOV
|
||||||
|
; CHECK-NOT: MOV
|
||||||
|
|
||||||
|
define void @main(float addrspace(1)* %out) {
|
||||||
|
main_body:
|
||||||
|
%0 = load <4 x float> addrspace(8)* null
|
||||||
|
%1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||||
|
%2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
|
||||||
|
%3 = insertelement <4 x float> undef, float %2, i32 0
|
||||||
|
call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||||
|
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||||
|
attributes #1 = { readnone }
|
Loading…
Reference in New Issue
Block a user