1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[AMDGPU][MC] New syntax for ds_swizzle_b32 offset

See Bug 28601: https://bugs.llvm.org//show_bug.cgi?id=28601

Reviewers: artem.tamazov, vpykhtin

Differential Revision: https://reviews.llvm.org/D33542

llvm-svn: 304309
This commit is contained in:
Dmitry Preobrazhensky 2017-05-31 16:26:47 +00:00
parent 87c40bfe25
commit 4d8a23e13b
12 changed files with 746 additions and 20 deletions

View File

@ -161,7 +161,8 @@ public:
ImmTyOpSel,
ImmTyOpSelHi,
ImmTyNegLo,
ImmTyNegHi
ImmTyNegHi,
ImmTySwizzle
};
struct TokOp {
@ -474,6 +475,7 @@ public:
bool isSWaitCnt() const;
bool isHwreg() const;
bool isSendMsg() const;
bool isSwizzle() const;
bool isSMRDOffset8() const;
bool isSMRDOffset20() const;
bool isSMRDLiteralOffset() const;
@ -659,6 +661,7 @@ public:
case ImmTyOpSelHi: OS << "OpSelHi"; break;
case ImmTyNegLo: OS << "NegLo"; break;
case ImmTyNegHi: OS << "NegHi"; break;
case ImmTySwizzle: OS << "Swizzle"; break;
}
}
@ -994,6 +997,12 @@ private:
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
bool trySkipId(const StringRef Id);
bool trySkipToken(const AsmToken::TokenKind Kind);
bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
bool parseExpr(int64_t &Imm);
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@ -1003,6 +1012,19 @@ public:
OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
const unsigned MaxVal,
const StringRef ErrMsg);
OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
bool parseSwizzleOffset(int64_t &Imm);
bool parseSwizzleMacro(int64_t &Imm);
bool parseSwizzleQuadPerm(int64_t &Imm);
bool parseSwizzleBitmaskPerm(int64_t &Imm);
bool parseSwizzleBroadcast(int64_t &Imm);
bool parseSwizzleSwap(int64_t &Imm);
bool parseSwizzleReverse(int64_t &Imm);
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
@ -2785,7 +2807,13 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
OptionalIdx[Op.getImmTy()] = i;
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
AMDGPUOperand::ImmTy OffsetType =
(Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
AMDGPUOperand::ImmTyOffset;
addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
if (!IsGdsHardcoded) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
}
@ -3383,6 +3411,298 @@ bool AMDGPUOperand::isSendMsg() const {
return isImmTy(ImmTySendMsg);
}
//===----------------------------------------------------------------------===//
// parser helpers
//===----------------------------------------------------------------------===//
bool
AMDGPUAsmParser::trySkipId(const StringRef Id) {
if (getLexer().getKind() == AsmToken::Identifier &&
Parser.getTok().getString() == Id) {
Parser.Lex();
return true;
}
return false;
}
bool
AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
if (getLexer().getKind() == Kind) {
Parser.Lex();
return true;
}
return false;
}
bool
AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
const StringRef ErrMsg) {
if (!trySkipToken(Kind)) {
Error(Parser.getTok().getLoc(), ErrMsg);
return false;
}
return true;
}
bool
AMDGPUAsmParser::parseExpr(int64_t &Imm) {
return !getParser().parseAbsoluteExpression(Imm);
}
bool
AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
SMLoc S = Parser.getTok().getLoc();
if (getLexer().getKind() == AsmToken::String) {
Val = Parser.getTok().getStringContents();
Parser.Lex();
return true;
} else {
Error(S, ErrMsg);
return false;
}
}
//===----------------------------------------------------------------------===//
// swizzle
//===----------------------------------------------------------------------===//
LLVM_READNONE
static unsigned
encodeBitmaskPerm(const unsigned AndMask,
const unsigned OrMask,
const unsigned XorMask) {
using namespace llvm::AMDGPU::Swizzle;
return BITMASK_PERM_ENC |
(AndMask << BITMASK_AND_SHIFT) |
(OrMask << BITMASK_OR_SHIFT) |
(XorMask << BITMASK_XOR_SHIFT);
}
bool
AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
const unsigned MaxVal,
const StringRef ErrMsg) {
for (unsigned i = 0; i < OpNum; ++i) {
if (!skipToken(AsmToken::Comma, "expected a comma")){
return false;
}
SMLoc ExprLoc = Parser.getTok().getLoc();
if (!parseExpr(Op[i])) {
return false;
}
if (Op[i] < MinVal || Op[i] > MaxVal) {
Error(ExprLoc, ErrMsg);
return false;
}
}
return true;
}
bool
AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
int64_t Lane[LANE_NUM];
if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
"expected a 2-bit lane id")) {
Imm = QUAD_PERM_ENC;
for (auto i = 0; i < LANE_NUM; ++i) {
Imm |= Lane[i] << (LANE_SHIFT * i);
}
return true;
}
return false;
}
bool
AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
SMLoc S = Parser.getTok().getLoc();
int64_t GroupSize;
int64_t LaneIdx;
if (!parseSwizzleOperands(1, &GroupSize,
2, 32,
"group size must be in the interval [2,32]")) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
Error(S, "group size must be a power of two");
return false;
}
if (parseSwizzleOperands(1, &LaneIdx,
0, GroupSize - 1,
"lane id must be in the interval [0,group size - 1]")) {
Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
return true;
}
return false;
}
bool
AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
SMLoc S = Parser.getTok().getLoc();
int64_t GroupSize;
if (!parseSwizzleOperands(1, &GroupSize,
2, 32, "group size must be in the interval [2,32]")) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
Error(S, "group size must be a power of two");
return false;
}
Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
return true;
}
bool
AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
SMLoc S = Parser.getTok().getLoc();
int64_t GroupSize;
if (!parseSwizzleOperands(1, &GroupSize,
1, 16, "group size must be in the interval [1,16]")) {
return false;
}
if (!isPowerOf2_64(GroupSize)) {
Error(S, "group size must be a power of two");
return false;
}
Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
return true;
}
bool
AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
if (!skipToken(AsmToken::Comma, "expected a comma")) {
return false;
}
StringRef Ctl;
SMLoc StrLoc = Parser.getTok().getLoc();
if (!parseString(Ctl)) {
return false;
}
if (Ctl.size() != BITMASK_WIDTH) {
Error(StrLoc, "expected a 5-character mask");
return false;
}
unsigned AndMask = 0;
unsigned OrMask = 0;
unsigned XorMask = 0;
for (size_t i = 0; i < Ctl.size(); ++i) {
unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
switch(Ctl[i]) {
default:
Error(StrLoc, "invalid mask");
return false;
case '0':
break;
case '1':
OrMask |= Mask;
break;
case 'p':
AndMask |= Mask;
break;
case 'i':
AndMask |= Mask;
XorMask |= Mask;
break;
}
}
Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
return true;
}
bool
AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
SMLoc OffsetLoc = Parser.getTok().getLoc();
if (!parseExpr(Imm)) {
return false;
}
if (!isUInt<16>(Imm)) {
Error(OffsetLoc, "expected a 16-bit offset");
return false;
}
return true;
}
bool
AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
using namespace llvm::AMDGPU::Swizzle;
if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
SMLoc ModeLoc = Parser.getTok().getLoc();
bool Ok = false;
if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
Ok = parseSwizzleQuadPerm(Imm);
} else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
Ok = parseSwizzleBitmaskPerm(Imm);
} else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
Ok = parseSwizzleBroadcast(Imm);
} else if (trySkipId(IdSymbolic[ID_SWAP])) {
Ok = parseSwizzleSwap(Imm);
} else if (trySkipId(IdSymbolic[ID_REVERSE])) {
Ok = parseSwizzleReverse(Imm);
} else {
Error(ModeLoc, "expected a swizzle mode");
}
return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
}
return false;
}
OperandMatchResultTy
AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
int64_t Imm = 0;
if (trySkipId("offset")) {
bool Ok = false;
if (skipToken(AsmToken::Colon, "expected a colon")) {
if (trySkipId("swizzle")) {
Ok = parseSwizzleMacro(Imm);
} else {
Ok = parseSwizzleOffset(Imm);
}
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
return Ok? MatchOperand_Success : MatchOperand_ParseFail;
} else {
return MatchOperand_NoMatch;
}
}
bool
AMDGPUOperand::isSwizzle() const {
return isImmTy(ImmTySwizzle);
}
//===----------------------------------------------------------------------===//
// sopp branch targets
//===----------------------------------------------------------------------===//

View File

@ -145,10 +145,10 @@ class DS_1A2D_Off8_RET<string opName,
let hasPostISelHook = 1;
}
class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, Operand ofs = offset>
: DS_Pseudo<opName,
(outs rc:$vdst),
(ins VGPR_32:$addr, offset:$offset, gds:$gds),
(ins VGPR_32:$addr, ofs:$offset, gds:$gds),
"$vdst, $addr$offset$gds"> {
let has_data0 = 0;
@ -440,7 +440,7 @@ def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">;
def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, SwizzleImm>;
}
let mayStore = 0 in {

View File

@ -1160,6 +1160,112 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
O << SImm16; // Unknown simm16 code.
}
static void printSwizzleBitmask(const uint16_t AndMask,
const uint16_t OrMask,
const uint16_t XorMask,
raw_ostream &O) {
using namespace llvm::AMDGPU::Swizzle;
uint16_t Probe0 = ((0 & AndMask) | OrMask) ^ XorMask;
uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
O << "\"";
for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
uint16_t p0 = Probe0 & Mask;
uint16_t p1 = Probe1 & Mask;
if (p0 == p1) {
if (p0 == 0) {
O << "0";
} else {
O << "1";
}
} else {
if (p0 == 0) {
O << "p";
} else {
O << "i";
}
}
}
O << "\"";
}
void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
using namespace llvm::AMDGPU::Swizzle;
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == 0) {
return;
}
O << " offset:";
if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
for (auto i = 0; i < LANE_NUM; ++i) {
O << ",";
O << formatDec(Imm & LANE_MASK);
Imm >>= LANE_SHIFT;
}
O << ")";
} else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
uint16_t OrMask = (Imm >> BITMASK_OR_SHIFT) & BITMASK_MASK;
uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
if (AndMask == BITMASK_MAX &&
OrMask == 0 &&
countPopulation(XorMask) == 1) {
O << "swizzle(" << IdSymbolic[ID_SWAP];
O << ",";
O << formatDec(XorMask);
O << ")";
} else if (AndMask == BITMASK_MAX &&
OrMask == 0 && XorMask > 0 &&
isPowerOf2_64(XorMask + 1)) {
O << "swizzle(" << IdSymbolic[ID_REVERSE];
O << ",";
O << formatDec(XorMask + 1);
O << ")";
} else {
uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
if (GroupSize > 1 &&
isPowerOf2_64(GroupSize) &&
OrMask < GroupSize &&
XorMask == 0) {
O << "swizzle(" << IdSymbolic[ID_BROADCAST];
O << ",";
O << formatDec(GroupSize);
O << ",";
O << formatDec(OrMask);
O << ")";
} else {
O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
O << ",";
printSwizzleBitmask(AndMask, OrMask, XorMask, O);
O << ")";
}
}
} else {
printU16ImmDecOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {

View File

@ -193,6 +193,8 @@ private:
raw_ostream &O);
void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,

View File

@ -281,6 +281,46 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
} // namespace Hwreg
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
enum Id { // id of symbolic names
ID_QUAD_PERM = 0,
ID_BITMASK_PERM,
ID_SWAP,
ID_REVERSE,
ID_BROADCAST
};
enum EncBits {
// swizzle mode encodings
QUAD_PERM_ENC = 0x8000,
QUAD_PERM_ENC_MASK = 0xFF00,
BITMASK_PERM_ENC = 0x0000,
BITMASK_PERM_ENC_MASK = 0x8000,
// QUAD_PERM encodings
LANE_MASK = 0x3,
LANE_MAX = LANE_MASK,
LANE_SHIFT = 2,
LANE_NUM = 4,
// BITMASK_PERM encodings
BITMASK_MASK = 0x1F,
BITMASK_MAX = BITMASK_MASK,
BITMASK_WIDTH = 5,
BITMASK_AND_SHIFT = 0,
BITMASK_OR_SHIFT = 5,
BITMASK_XOR_SHIFT = 10
};
} // namespace Swizzle
namespace SDWA {
enum SdwaSel {

View File

@ -383,6 +383,14 @@ def SendMsgMatchClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
def SwizzleMatchClass : AsmOperandClass {
let Name = "Swizzle";
let PredicateMethod = "isSwizzle";
let ParserMethod = "parseSwizzleOp";
let RenderMethod = "addImmOperands";
let IsOptional = 1;
}
def ExpTgtMatchClass : AsmOperandClass {
let Name = "ExpTgt";
let PredicateMethod = "isExpTgt";
@ -395,6 +403,11 @@ def SendMsgImm : Operand<i32> {
let ParserMatchClass = SendMsgMatchClass;
}
def SwizzleImm : Operand<i16> {
let PrintMethod = "printSwizzle";
let ParserMatchClass = SwizzleMatchClass;
}
def SWaitMatchClass : AsmOperandClass {
let Name = "SWaitCnt";
let RenderMethod = "addImmOperands";

View File

@ -65,5 +65,18 @@ const char* const IdSymbolic[] = {
};
} // namespace Hwreg
namespace Swizzle {
// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h.
const char* const IdSymbolic[] = {
"QUAD_PERM",
"BITMASK_PERM",
"SWAP",
"REVERSE",
"BROADCAST",
};
} // namespace Swizzle
} // namespace AMDGPU
} // namespace llvm

View File

@ -25,6 +25,12 @@ namespace Hwreg { // Symbolic names for the hwreg(...) syntax.
extern const char* const IdSymbolic[];
} // namespace Hwreg
namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
extern const char* const IdSymbolic[];
} // namespace Swizzle
} // namespace AMDGPU
} // namespace llvm

View File

@ -4,7 +4,7 @@
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0
; FUNC-LABEL: {{^}}ds_swizzle:
; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:100
; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11")
; CHECK: s_waitcnt lgkmcnt
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind {
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0

View File

@ -21,3 +21,93 @@ ds_write2_b32 v2, v4, v6 offset0:1000000000
// CHECK: invalid operand for instruction
ds_write2_b32 v2, v4, v6 offset1:1000000000
//===----------------------------------------------------------------------===//
// swizzle
//===----------------------------------------------------------------------===//
// CHECK: error: expected a colon
ds_swizzle_b32 v8, v2 offset
// CHECK: error: failed parsing operand
ds_swizzle_b32 v8, v2 offset:
// CHECK: error: expected a colon
ds_swizzle_b32 v8, v2 offset-
// CHECK: error: expected absolute expression
ds_swizzle_b32 v8, v2 offset:SWIZZLE(QUAD_PERM, 0, 1, 2, 3)
// CHECK: error: expected a swizzle mode
ds_swizzle_b32 v8, v2 offset:swizzle(quad_perm, 0, 1, 2, 3)
// CHECK: error: expected a swizzle mode
ds_swizzle_b32 v8, v2 offset:swizzle(XXX,1)
// CHECK: error: expected a comma
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM
// CHECK: error: expected a comma
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2)
// CHECK: error: expected a closing parentheses
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2, 3
// CHECK: error: expected a closing parentheses
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2, 3, 4)
// CHECK: error: expected a 2-bit lane id
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, -1, 1, 2, 3)
// CHECK: error: expected a 2-bit lane id
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 4, 1, 2, 3)
// CHECK: error: group size must be in the interval [1,16]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,0)
// CHECK: error: group size must be a power of two
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,3)
// CHECK: error: group size must be in the interval [1,16]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,17)
// CHECK: error: group size must be in the interval [1,16]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,32)
// CHECK: error: group size must be in the interval [2,32]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,1)
// CHECK: error: group size must be a power of two
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,3)
// CHECK: error: group size must be in the interval [2,32]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,33)
// CHECK: error: group size must be in the interval [2,32]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,1,0)
// CHECK: error: group size must be a power of two
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,3,1)
// CHECK: error: group size must be in the interval [2,32]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,33,1)
// CHECK: error: lane id must be in the interval [0,group size - 1]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,-1)
// CHECK: error: lane id must be in the interval [0,group size - 1]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,2)
// CHECK: error: expected a string
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, pppii)
// CHECK: error: expected a 5-character mask
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "")
// CHECK: error: expected a 5-character mask
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "ppii")
// CHECK: error: expected a 5-character mask
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppiii")
// CHECK: invalid mask
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppi2")

View File

@ -267,10 +267,6 @@ ds_max_rtn_f32 v8, v2, v4
// SICI: ds_max_rtn_f32 v8, v2, v4 ; encoding: [0x00,0x00,0xcc,0xd8,0x02,0x04,0x00,0x08]
// VI: ds_max_rtn_f32 v8, v2, v4 ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x04,0x00,0x08]
ds_swizzle_b32 v8, v2
// SICI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_read_b32 v8, v2
// SICI: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0xd8,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_read_b32 v8, v2 ; encoding: [0x00,0x00,0x6c,0xd8,0x02,0x00,0x00,0x08]
@ -506,3 +502,143 @@ ds_nop
// NOSI: error: instruction not supported on this GPU
// CI: ds_nop ; encoding: [0x00,0x00,0x50,0xd8,0x00,0x00,0x00,0x00]
// VI: ds_nop ; encoding: [0x00,0x00,0x28,0xd8,0x00,0x00,0x00,0x00]
//===----------------------------------------------------------------------===//
// swizzle
//===----------------------------------------------------------------------===//
ds_swizzle_b32 v8, v2
// SICI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0xFFFF
// SICI: ds_swizzle_b32 v8, v2 offset:65535 ; encoding: [0xff,0xff,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 0, 1, 2, 3)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,0,1,2,3) ; encoding: [0xe4,0x80,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,0,1,2,3) ; encoding: [0xe4,0x80,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM, 2, 1, 3, 3)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,2,1,3,3) ; encoding: [0xf6,0x80,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(QUAD_PERM,2,1,3,3) ; encoding: [0xf6,0x80,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,2)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,2) ; encoding: [0x1f,0x08,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,2) ; encoding: [0x1f,0x08,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,4)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,4) ; encoding: [0x1f,0x10,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,4) ; encoding: [0x1f,0x10,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,8)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,8) ; encoding: [0x1f,0x20,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,8) ; encoding: [0x1f,0x20,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,16)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,16) ; encoding: [0x1f,0x40,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,16) ; encoding: [0x1f,0x40,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,2)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(SWAP,1) ; encoding: [0x1f,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,8)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,8) ; encoding: [0x1f,0x1c,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,8) ; encoding: [0x1f,0x1c,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,16)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,16) ; encoding: [0x1f,0x3c,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,16) ; encoding: [0x1f,0x3c,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,32)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,32) ; encoding: [0x1f,0x7c,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,32) ; encoding: [0x1f,0x7c,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,1)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,1) ; encoding: [0x3e,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,1) ; encoding: [0x3e,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,1)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,1) ; encoding: [0x3c,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,1) ; encoding: [0x3c,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,1)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,1) ; encoding: [0x38,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,1) ; encoding: [0x38,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,1)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,1) ; encoding: [0x30,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,1) ; encoding: [0x30,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,0)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,0) ; encoding: [0x1e,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,2,0) ; encoding: [0x1e,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,3)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,3) ; encoding: [0x7c,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,4,3) ; encoding: [0x7c,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,7)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,7) ; encoding: [0xf8,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,8,7) ; encoding: [0xf8,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,15)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,15) ; encoding: [0xf0,0x01,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,16,15) ; encoding: [0xf0,0x01,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,31)
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,31) ; encoding: [0xe0,0x03,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,31) ; encoding: [0xe0,0x03,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "pppii")
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(REVERSE,4) ; encoding: [0x1f,0x0c,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM, "01pip")
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"01pip") ; encoding: [0x07,0x09,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"01pip") ; encoding: [0x07,0x09,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x000
// SICI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 ; encoding: [0x00,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x001
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000p") ; encoding: [0x01,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000p") ; encoding: [0x01,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x020
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BROADCAST,32,1) ; encoding: [0x20,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x021
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x21,0x00,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x21,0x00,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x400
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x00,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00001") ; encoding: [0x00,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x401
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000i") ; encoding: [0x01,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"0000i") ; encoding: [0x01,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x420
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x20,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x20,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08]
ds_swizzle_b32 v8, v2 offset:0x421
// SICI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x21,0x04,0xd4,0xd8,0x02,0x00,0x00,0x08]
// VI: ds_swizzle_b32 v8, v2 offset:swizzle(BITMASK_PERM,"00000") ; encoding: [0x21,0x04,0x7a,0xd8,0x02,0x00,0x00,0x08]

View File

@ -990,23 +990,23 @@
# CHECK: ds_read_u16 v5, v1 offset:65535 gds ; encoding: [0xff,0xff,0x79,0xd8,0x01,0x00,0x00,0x05]
0xff,0xff,0x79,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0x05]
0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05]
0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v255, v1 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0xff]
0xff,0xff,0x7a,0xd8,0x01,0x00,0x00,0xff
# CHECK: ds_swizzle_b32 v255, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0xff]
0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0xff
# CHECK: ds_swizzle_b32 v5, v255 offset:65535 ; encoding: [0xff,0xff,0x7a,0xd8,0xff,0x00,0x00,0x05]
0xff,0xff,0x7a,0xd8,0xff,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v255 ; encoding: [0x00,0x00,0x7a,0xd8,0xff,0x00,0x00,0x05]
0x00,0x00,0x7a,0xd8,0xff,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05]
0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 offset:4 ; encoding: [0x04,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05]
0x04,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 ; encoding: [0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05]
0x00,0x00,0x7a,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 offset:65535 gds ; encoding: [0xff,0xff,0x7b,0xd8,0x01,0x00,0x00,0x05]
0xff,0xff,0x7b,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_swizzle_b32 v5, v1 gds ; encoding: [0x00,0x00,0x7b,0xd8,0x01,0x00,0x00,0x05]
0x00,0x00,0x7b,0xd8,0x01,0x00,0x00,0x05
# CHECK: ds_permute_b32 v5, v1, v2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xd8,0x01,0x02,0x00,0x05]
0xff,0xff,0x7c,0xd8,0x01,0x02,0x00,0x05