From 0c663b698a2721aee8a94b78379f45d4c623fbd8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 17 May 2013 16:50:20 +0000 Subject: [PATCH] R600: Improve texture handling llvm-svn: 182125 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp | 2 + lib/Target/R600/CMakeLists.txt | 1 + .../R600/InstPrinter/AMDGPUInstPrinter.cpp | 45 +++ .../R600/InstPrinter/AMDGPUInstPrinter.h | 2 + .../R600/MCTargetDesc/R600MCCodeEmitter.cpp | 89 ++---- lib/Target/R600/R600ISelLowering.cpp | 258 ++++++++++++++-- lib/Target/R600/R600Instructions.td | 198 ++++++------ lib/Target/R600/R600Intrinsics.td | 37 +++ .../R600/R600TextureIntrinsicsReplacer.cpp | 286 ++++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.tex.ll | 32 +- 12 files changed, 738 insertions(+), 214 deletions(-) create mode 100644 lib/Target/R600/R600TextureIntrinsicsReplacer.cpp diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 9792bd86f29..f9d70c9c5a3 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -21,6 +21,7 @@ class FunctionPass; class AMDGPUTargetMachine; // R600 Passes +FunctionPass* createR600TextureIntrinsicsReplacer(); FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index c2a79ea9995..f108fbc16ee 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -126,6 +126,7 @@ enum { SMIN, UMIN, URECIP, + TEXTURE_FETCH, EXPORT, CONST_ADDRESS, REGISTER_LOAD, diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 7175ec941a2..71be02a86fe 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -111,6 +111,8 @@ AMDGPUPassConfig::addPreISel() { if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { addPass(createAMDGPUStructurizeCFGPass()); addPass(createSIAnnotateControlFlowPass()); + } else { + addPass(createR600TextureIntrinsicsReplacer()); } return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 97f0a40c298..c5ce9dc789e 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_target(R600CodeGen R600MachineScheduler.cpp R600Packetizer.cpp R600RegisterInfo.cpp + R600TextureIntrinsicsReplacer.cpp SIAnnotateControlFlow.cpp SIInsertWaits.cpp SIInstrInfo.cpp diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index bbc6cc599dd..8de644b071f 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -198,6 +198,51 @@ void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, return; } +void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Sel = MI->getOperand(OpNo).getImm(); + switch (Sel) { + case 0: + O << "X"; + break; + case 1: + O << "Y"; + break; + case 2: + O << "Z"; + break; + case 3: + O << "W"; + break; + case 4: + O << "0"; + break; + case 5: + O << "1"; + break; + case 7: + O << "_"; + break; + default: + break; + } +} + +void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned CT = MI->getOperand(OpNo).getImm(); + switch (CT) { + case 0: + O << "U"; + break; + case 1: + O << "N"; + break; + default: + break; + } +} + void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O) { int KCacheMode = MI->getOperand(OpNo).getImm(); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index c6fd053f6dc..4c1dfa68ae2 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -49,6 +49,8 @@ private: void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index cb4cf0ce388..59f717ac5f0 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -81,21 +81,6 @@ enum FCInstr { FC_CONTINUE }; -enum TextureTypes { - TEXTURE_1D = 1, - TEXTURE_2D, - TEXTURE_3D, - TEXTURE_CUBE, - TEXTURE_RECT, - TEXTURE_SHADOW1D, - TEXTURE_SHADOW2D, - TEXTURE_SHADOWRECT, - TEXTURE_1D_ARRAY, - TEXTURE_2D_ARRAY, - TEXTURE_SHADOW1D_ARRAY, - TEXTURE_SHADOW2D_ARRAY -}; - MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { @@ -120,63 +105,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(InstWord2, OS); Emit((u_int32_t) 0, OS); } else if (IS_TEX(Desc)) { - unsigned Opcode = MI.getOpcode(); - bool HasOffsets = (Opcode == AMDGPU::TEX_LD); - unsigned OpOffset = HasOffsets ? 3 : 0; - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); + int64_t Sampler = MI.getOperand(14).getImm(); - uint32_t SrcSelect[4] = {0, 1, 2, 3}; - uint32_t Offsets[3] = {0, 0, 0}; - uint64_t CoordType[4] = {1, 1, 1, 1}; + uint32_t SrcSelect[4] = { + MI.getOperand(2).getImm(), + MI.getOperand(3).getImm(), + MI.getOperand(4).getImm(), + MI.getOperand(5).getImm() + }; + uint32_t Offsets[3] = { + MI.getOperand(6).getImm() & 0x1F, + MI.getOperand(7).getImm() & 0x1F, + MI.getOperand(8).getImm() & 0x1F + }; - if (HasOffsets) - for (unsigned i = 0; i < 3; i++) { - int SignedOffset = MI.getOperand(i + 2).getImm(); - Offsets[i] = (SignedOffset & 0x1F); - } + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups); + uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | + SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | + SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | + Offsets[2] << 10; - if (TextureType == TEXTURE_RECT || - TextureType == TEXTURE_SHADOWRECT) { - CoordType[ELEMENT_X] = 0; - CoordType[ELEMENT_Y] = 0; - } - - if (TextureType == TEXTURE_1D_ARRAY || - TextureType == TEXTURE_SHADOW1D_ARRAY) { - if (Opcode == AMDGPU::TEX_SAMPLE_C_L || - Opcode == AMDGPU::TEX_SAMPLE_C_LB) { - CoordType[ELEMENT_Y] = 0; - } else { - CoordType[ELEMENT_Z] = 0; - SrcSelect[ELEMENT_Z] = ELEMENT_Y; - } - } else if (TextureType == TEXTURE_2D_ARRAY || - TextureType == TEXTURE_SHADOW2D_ARRAY) { - CoordType[ELEMENT_Z] = 0; - } - - - if ((TextureType == TEXTURE_SHADOW1D || - TextureType == TEXTURE_SHADOW2D || - TextureType == TEXTURE_SHADOWRECT || - TextureType == TEXTURE_SHADOW1D_ARRAY) && - Opcode != AMDGPU::TEX_SAMPLE_C_L && - Opcode != AMDGPU::TEX_SAMPLE_C_LB) { - SrcSelect[ELEMENT_W] = ELEMENT_Z; - } - - uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | - CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | - CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; - uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | - SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | - SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | - Offsets[2] << 10; - - Emit(Word01, OS); - Emit(Word2, OS); - Emit((u_int32_t) 0, OS); + Emit(Word01, OS); + Emit(Word2, OS); + Emit((u_int32_t) 0, OS); } else { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 7252235d5ba..91165a1fd9d 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -188,23 +188,99 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::TXD: { unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + MachineOperand &RID = MI->getOperand(4); + MachineOperand &SID = MI->getOperand(5); + unsigned TextureId = MI->getOperand(6).getImm(); + unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; + unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; + switch (TextureId) { + case 5: // Rect + CTX = CTY = 0; + break; + case 6: // Shadow1D + SrcW = SrcZ; + break; + case 7: // Shadow2D + SrcW = SrcZ; + break; + case 8: // ShadowRect + CTX = CTY = 0; + SrcW = SrcZ; + break; + case 9: // 1DArray + SrcZ = SrcY; + CTZ = 0; + break; + case 10: // 2DArray + CTZ = 0; + break; + case 11: // Shadow1DArray + SrcZ = SrcY; + CTZ = 0; + break; + case 12: // Shadow2DArray + CTZ = 0; + break; + } BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addOperand(MI->getOperand(6)); + .addImm(SrcX) + .addImm(SrcY) + .addImm(SrcZ) + .addImm(SrcW) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(1) + .addImm(2) + .addImm(3) + .addOperand(RID) + .addOperand(SID) + .addImm(CTX) + .addImm(CTY) + .addImm(CTZ) + .addImm(CTW); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) .addOperand(MI->getOperand(2)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addOperand(MI->getOperand(6)); + .addImm(SrcX) + .addImm(SrcY) + .addImm(SrcZ) + .addImm(SrcW) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(1) + .addImm(2) + .addImm(3) + .addOperand(RID) + .addOperand(SID) + .addImm(CTX) + .addImm(CTY) + .addImm(CTZ) + .addImm(CTW); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addOperand(MI->getOperand(6)) + .addImm(SrcX) + .addImm(SrcY) + .addImm(SrcZ) + .addImm(SrcW) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(1) + .addImm(2) + .addImm(3) + .addOperand(RID) + .addOperand(SID) + .addImm(CTX) + .addImm(CTY) + .addImm(CTZ) + .addImm(CTW) .addReg(T0, RegState::Implicit) .addReg(T1, RegState::Implicit); break; @@ -213,23 +289,100 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::TXD_SHADOW: { unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + MachineOperand &RID = MI->getOperand(4); + MachineOperand &SID = MI->getOperand(5); + unsigned TextureId = MI->getOperand(6).getImm(); + unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; + unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; + + switch (TextureId) { + case 5: // Rect + CTX = CTY = 0; + break; + case 6: // Shadow1D + SrcW = SrcZ; + break; + case 7: // Shadow2D + SrcW = SrcZ; + break; + case 8: // ShadowRect + CTX = CTY = 0; + SrcW = SrcZ; + break; + case 9: // 1DArray + SrcZ = SrcY; + CTZ = 0; + break; + case 10: // 2DArray + CTZ = 0; + break; + case 11: // Shadow1DArray + SrcZ = SrcY; + CTZ = 0; + break; + case 12: // Shadow2DArray + CTZ = 0; + break; + } BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) .addOperand(MI->getOperand(3)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addOperand(MI->getOperand(6)); + .addImm(SrcX) + .addImm(SrcY) + .addImm(SrcZ) + .addImm(SrcW) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(1) + .addImm(2) + .addImm(3) + .addOperand(RID) + .addOperand(SID) + .addImm(CTX) + .addImm(CTY) + .addImm(CTZ) + .addImm(CTW); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) .addOperand(MI->getOperand(2)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addOperand(MI->getOperand(6)); + .addImm(SrcX) + .addImm(SrcY) + .addImm(SrcZ) + .addImm(SrcW) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(1) + .addImm(2) + .addImm(3) + .addOperand(RID) + .addOperand(SID) + .addImm(CTX) + .addImm(CTY) + .addImm(CTZ) + .addImm(CTW); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - .addOperand(MI->getOperand(4)) - .addOperand(MI->getOperand(5)) - .addOperand(MI->getOperand(6)) + .addImm(SrcX) + .addImm(SrcY) + .addImm(SrcZ) + .addImm(SrcW) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(1) + .addImm(2) + .addImm(3) + .addOperand(RID) + .addOperand(SID) + .addImm(CTX) + .addImm(CTY) + .addImm(CTZ) + .addImm(CTW) .addReg(T0, RegState::Implicit) .addReg(T1, RegState::Implicit); break; @@ -409,6 +562,75 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return SDValue(interp, slot % 2); } + case AMDGPUIntrinsic::R600_tex: + case AMDGPUIntrinsic::R600_texc: + case AMDGPUIntrinsic::R600_txl: + case AMDGPUIntrinsic::R600_txlc: + case AMDGPUIntrinsic::R600_txb: + case AMDGPUIntrinsic::R600_txbc: + case AMDGPUIntrinsic::R600_txf: + case AMDGPUIntrinsic::R600_txq: + case AMDGPUIntrinsic::R600_ddx: + case AMDGPUIntrinsic::R600_ddy: { + unsigned TextureOp; + switch (IntrinsicID) { + case AMDGPUIntrinsic::R600_tex: + TextureOp = 0; + break; + case AMDGPUIntrinsic::R600_texc: + TextureOp = 1; + break; + case AMDGPUIntrinsic::R600_txl: + TextureOp = 2; + break; + case AMDGPUIntrinsic::R600_txlc: + TextureOp = 3; + break; + case AMDGPUIntrinsic::R600_txb: + TextureOp = 4; + break; + case AMDGPUIntrinsic::R600_txbc: + TextureOp = 5; + break; + case AMDGPUIntrinsic::R600_txf: + TextureOp = 6; + break; + case AMDGPUIntrinsic::R600_txq: + TextureOp = 7; + break; + case AMDGPUIntrinsic::R600_ddx: + TextureOp = 8; + break; + case AMDGPUIntrinsic::R600_ddy: + TextureOp = 9; + break; + default: + llvm_unreachable("Unknow Texture Operation"); + } + + SDValue TexArgs[19] = { + DAG.getConstant(TextureOp, MVT::i32), + Op.getOperand(1), + DAG.getConstant(0, MVT::i32), + DAG.getConstant(1, MVT::i32), + DAG.getConstant(2, MVT::i32), + DAG.getConstant(3, MVT::i32), + Op.getOperand(2), + Op.getOperand(3), + Op.getOperand(4), + DAG.getConstant(0, MVT::i32), + DAG.getConstant(1, MVT::i32), + DAG.getConstant(2, MVT::i32), + DAG.getConstant(3, MVT::i32), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10) + }; + return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19); + } case r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 42510ae4213..22d320e4c7d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -96,6 +96,12 @@ def UP : InstFlag <"printUpdatePred">; // Once we start using the packetizer in this backend we should have this // default to 0. def LAST : InstFlag<"printLast", 1>; +def RSel : Operand { + let PrintMethod = "printRSel"; +} +def CT: Operand { + let PrintMethod = "printCT"; +} def FRAMEri : Operand { let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); @@ -463,38 +469,7 @@ class R600_REDUCTION inst, dag ins, string asm, list pattern, pattern, itin>; -class R600_TEX inst, string opName, list pattern, - InstrItinClass itin = AnyALU> : - InstR600 <(outs R600_Reg128:$DST_GPR), - (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget), - !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"), - pattern, - itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { - let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - let TEX_INST = inst{4-0}; - let SRC_REL = 0; - let DST_REL = 0; - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 2; - let DST_SEL_W = 3; - let LOD_BIAS = 0; - - let INST_MOD = 0; - let FETCH_WHOLE_QUAD = 0; - let ALT_CONST = 0; - let SAMPLER_INDEX_MODE = 0; - let RESOURCE_INDEX_MODE = 0; - - let COORD_TYPE_X = 0; - let COORD_TYPE_Y = 0; - let COORD_TYPE_Z = 0; - let COORD_TYPE_W = 0; - - let TEXInst = 1; - } } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -618,6 +593,29 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", [SDNPVariadic] >; +def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; + +def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; + +multiclass TexPattern TextureOp, Instruction inst, ValueType vt = v4f32> { +def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, + (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), + (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), + (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), + (i32 imm:$DST_SEL_W), + (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID), + (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z), + (i32 imm:$COORD_TYPE_W)), + (inst R600_Reg128:$SRC_GPR, + imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw, + imm:$offsetx, imm:$offsety, imm:$offsetz, + imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z, + imm:$DST_SEL_W, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, + imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z, + imm:$COORD_TYPE_W)>; +} + //===----------------------------------------------------------------------===// // Interpolation Instructions //===----------------------------------------------------------------------===// @@ -1132,92 +1130,70 @@ def CNDGT_INT : R600_3OP < // Texture instructions //===----------------------------------------------------------------------===// -def TEX_LD : R600_TEX < - 0x03, "TEX_LD", - [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR, - imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, - imm:$SAMPLER_ID, imm:$textureTarget))] -> { -let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z," - "$RESOURCE_ID, $SAMPLER_ID, $textureTarget"; -let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, - i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, - i32imm:$textureTarget); +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + +class R600_TEX inst, string opName> : + InstR600 <(outs R600_Reg128:$DST_GPR), + (ins R600_Reg128:$SRC_GPR, + RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw, + i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz, + RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W, + i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, + CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, + CT:$COORD_TYPE_W), + !strconcat(opName, + " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " + "$SRC_GPR.$srcx$srcy$srcz$srcw " + "RID:$RESOURCE_ID SID:$SAMPLER_ID " + "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"), + [], + NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let TEX_INST = inst{4-0}; + let SRC_REL = 0; + let DST_REL = 0; + let LOD_BIAS = 0; + + let INST_MOD = 0; + let FETCH_WHOLE_QUAD = 0; + let ALT_CONST = 0; + let SAMPLER_INDEX_MODE = 0; + let RESOURCE_INDEX_MODE = 0; + + let TEXInst = 1; } -def TEX_GET_TEXTURE_RESINFO : R600_TEX < - 0x04, "TEX_GET_TEXTURE_RESINFO", - [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; +} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 -def TEX_GET_GRADIENTS_H : R600_TEX < - 0x07, "TEX_GET_GRADIENTS_H", - [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; -def TEX_GET_GRADIENTS_V : R600_TEX < - 0x08, "TEX_GET_GRADIENTS_V", - [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; -def TEX_SET_GRADIENTS_H : R600_TEX < - 0x0B, "TEX_SET_GRADIENTS_H", - [] ->; +def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">; +def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">; +def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">; +def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; +def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; +def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; +def TEX_LD : R600_TEX <0x03, "TEX_LD">; +def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; +def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; +def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; +def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">; +def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">; +def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">; +def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">; -def TEX_SET_GRADIENTS_V : R600_TEX < - 0x0C, "TEX_SET_GRADIENTS_V", - [] ->; - -def TEX_SAMPLE : R600_TEX < - 0x10, "TEX_SAMPLE", - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; - -def TEX_SAMPLE_C : R600_TEX < - 0x18, "TEX_SAMPLE_C", - [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] ->; - -def TEX_SAMPLE_L : R600_TEX < - 0x11, "TEX_SAMPLE_L", - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; - -def TEX_SAMPLE_C_L : R600_TEX < - 0x19, "TEX_SAMPLE_C_L", - [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] ->; - -def TEX_SAMPLE_LB : R600_TEX < - 0x12, "TEX_SAMPLE_LB", - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] ->; - -def TEX_SAMPLE_C_LB : R600_TEX < - 0x1A, "TEX_SAMPLE_C_LB", - [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, - imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] ->; - -def TEX_SAMPLE_G : R600_TEX < - 0x14, "TEX_SAMPLE_G", - [] ->; - -def TEX_SAMPLE_C_G : R600_TEX < - 0x1C, "TEX_SAMPLE_C_G", - [] ->; +defm : TexPattern<0, TEX_SAMPLE>; +defm : TexPattern<1, TEX_SAMPLE_C>; +defm : TexPattern<2, TEX_SAMPLE_L>; +defm : TexPattern<3, TEX_SAMPLE_C_L>; +defm : TexPattern<4, TEX_SAMPLE_LB>; +defm : TexPattern<5, TEX_SAMPLE_C_LB>; +defm : TexPattern<6, TEX_LD, v4i32>; +defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; +defm : TexPattern<8, TEX_GET_GRADIENTS_H>; +defm : TexPattern<9, TEX_GET_GRADIENTS_V>; //===----------------------------------------------------------------------===// // Helper classes for common instructions diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index dc8980aef14..58d86b623db 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -12,12 +12,49 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "R600", isTarget = 1 in { + class TextureIntrinsicFloatInput : + Intrinsic<[llvm_v4f32_ty], [ + llvm_v4f32_ty, // Coord + llvm_i32_ty, // offset_x + llvm_i32_ty, // offset_y, + llvm_i32_ty, // offset_z, + llvm_i32_ty, // resource_id + llvm_i32_ty, // samplerid + llvm_i32_ty, // coord_type_x + llvm_i32_ty, // coord_type_y + llvm_i32_ty, // coord_type_z + llvm_i32_ty // coord_type_w + ], [IntrNoMem]>; + class TextureIntrinsicInt32Input : + Intrinsic<[llvm_v4i32_ty], [ + llvm_v4i32_ty, // Coord + llvm_i32_ty, // offset_x + llvm_i32_ty, // offset_y, + llvm_i32_ty, // offset_z, + llvm_i32_ty, // resource_id + llvm_i32_ty, // samplerid + llvm_i32_ty, // coord_type_x + llvm_i32_ty, // coord_type_y + llvm_i32_ty, // coord_type_z + llvm_i32_ty // coord_type_w + ], [IntrNoMem]>; + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_R600_interp_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_R600_load_texbuf : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_tex : TextureIntrinsicFloatInput; + def int_R600_texc : TextureIntrinsicFloatInput; + def int_R600_txl : TextureIntrinsicFloatInput; + def int_R600_txlc : TextureIntrinsicFloatInput; + def int_R600_txb : TextureIntrinsicFloatInput; + def int_R600_txbc : TextureIntrinsicFloatInput; + def int_R600_txf : TextureIntrinsicInt32Input; + def int_R600_txq : TextureIntrinsicInt32Input; + def int_R600_ddx : TextureIntrinsicFloatInput; + def int_R600_ddy : TextureIntrinsicFloatInput; def int_R600_store_swizzle : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_stream_output : diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp new file mode 100644 index 00000000000..938bd51f742 --- /dev/null +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -0,0 +1,286 @@ +//===-- R600TextureIntrinsicsReplacer.cpp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass translates tgsi-like texture intrinsics into R600 texture +/// closer to hardware intrinsics. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/InstVisitor.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/GlobalValue.h" + +using namespace llvm; + +namespace { +class R600TextureIntrinsicsReplacer : + public FunctionPass, public InstVisitor { + static char ID; + + Module *Mod; + Type *FloatType; + Type *Int32Type; + Type *V4f32Type; + Type *V4i32Type; + FunctionType *TexSign; + FunctionType *TexQSign; + + void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD, + unsigned SrcSelect[4], unsigned CT[4], + bool &useShadowVariant) { + enum TextureTypes { + TEXTURE_1D = 1, + TEXTURE_2D, + TEXTURE_3D, + TEXTURE_CUBE, + TEXTURE_RECT, + TEXTURE_SHADOW1D, + TEXTURE_SHADOW2D, + TEXTURE_SHADOWRECT, + TEXTURE_1D_ARRAY, + TEXTURE_2D_ARRAY, + TEXTURE_SHADOW1D_ARRAY, + TEXTURE_SHADOW2D_ARRAY, + TEXTURE_SHADOWCUBE, + TEXTURE_2D_MSAA, + TEXTURE_2D_ARRAY_MSAA, + TEXTURE_CUBE_ARRAY, + TEXTURE_SHADOWCUBE_ARRAY + }; + + switch (TextureType) { + case 0: + return; + case TEXTURE_RECT: + case TEXTURE_1D: + case TEXTURE_2D: + case TEXTURE_3D: + case TEXTURE_CUBE: + case TEXTURE_1D_ARRAY: + case TEXTURE_2D_ARRAY: + case TEXTURE_CUBE_ARRAY: + case TEXTURE_2D_MSAA: + case TEXTURE_2D_ARRAY_MSAA: + useShadowVariant = false; + break; + case TEXTURE_SHADOW1D: + case TEXTURE_SHADOW2D: + case TEXTURE_SHADOWRECT: + case TEXTURE_SHADOW1D_ARRAY: + case TEXTURE_SHADOW2D_ARRAY: + case TEXTURE_SHADOWCUBE: + case TEXTURE_SHADOWCUBE_ARRAY: + useShadowVariant = true; + break; + default: + llvm_unreachable("Unknow Texture Type"); + } + + if (TextureType == TEXTURE_RECT || + TextureType == TEXTURE_SHADOWRECT) { + CT[0] = 0; + CT[1] = 0; + } + + if (TextureType == TEXTURE_CUBE_ARRAY || + TextureType == TEXTURE_SHADOWCUBE_ARRAY) { + CT[2] = 0; + } + + if (TextureType == TEXTURE_1D_ARRAY || + TextureType == TEXTURE_SHADOW1D_ARRAY) { + if (hasLOD && useShadowVariant) { + CT[1] = 0; + } else { + CT[2] = 0; + SrcSelect[2] = 1; + } + } else if (TextureType == TEXTURE_2D_ARRAY || + TextureType == TEXTURE_SHADOW2D_ARRAY) { + CT[2] = 0; + } + + if ((TextureType == TEXTURE_SHADOW1D || + TextureType == TEXTURE_SHADOW2D || + TextureType == TEXTURE_SHADOWRECT || + TextureType == TEXTURE_SHADOW1D_ARRAY) && + !(hasLOD && useShadowVariant)) { + SrcSelect[3] = 2; + } + } + + void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name, + unsigned SrcSelect[4], Value *Offset[3], Value *Resource, + Value *Sampler, unsigned CT[4], Value *Coord) { + IRBuilder<> Builder(&I); + Constant *Mask[] = { + ConstantInt::get(Int32Type, SrcSelect[0]), + ConstantInt::get(Int32Type, SrcSelect[1]), + ConstantInt::get(Int32Type, SrcSelect[2]), + ConstantInt::get(Int32Type, SrcSelect[3]) + }; + Value *SwizzleMask = ConstantVector::get(Mask); + Value *SwizzledCoord = + Builder.CreateShuffleVector(Coord, Coord, SwizzleMask); + + Value *Args[] = { + SwizzledCoord, + Offset[0], + Offset[1], + Offset[2], + Resource, + Sampler, + ConstantInt::get(Int32Type, CT[0]), + ConstantInt::get(Int32Type, CT[1]), + ConstantInt::get(Int32Type, CT[2]), + ConstantInt::get(Int32Type, CT[3]) + }; + + Function *F = Mod->getFunction(Name); + if (!F) { + F = Function::Create(FT, GlobalValue::ExternalLinkage, Name, Mod); + F->addFnAttr(Attribute::ReadNone); + } + I.replaceAllUsesWith(Builder.CreateCall(F, Args)); + I.eraseFromParent(); + } + + void ReplaceTexIntrinsic(CallInst &I, bool hasLOD, FunctionType *FT, + const char *VanillaInt, + const char *ShadowInt) { + Value *Coord = I.getArgOperand(0); + Value *ResourceId = I.getArgOperand(1); + Value *SamplerId = I.getArgOperand(2); + + unsigned TextureType = + dyn_cast(I.getArgOperand(3))->getZExtValue(); + + unsigned SrcSelect[4] = { 0, 1, 2, 3 }; + unsigned CT[4] = {1, 1, 1, 1}; + Value *Offset[3] = { + ConstantInt::get(Int32Type, 0), + ConstantInt::get(Int32Type, 0), + ConstantInt::get(Int32Type, 0) + }; + bool useShadowVariant; + + getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT, + useShadowVariant); + + ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect, + Offset, ResourceId, SamplerId, CT, Coord); + } + + void ReplaceTXF(CallInst &I) { + Value *Coord = I.getArgOperand(0); + Value *ResourceId = I.getArgOperand(4); + Value *SamplerId = I.getArgOperand(5); + + unsigned TextureType = + dyn_cast(I.getArgOperand(6))->getZExtValue(); + + unsigned SrcSelect[4] = { 0, 1, 2, 3 }; + unsigned CT[4] = {1, 1, 1, 1}; + Value *Offset[3] = { + I.getArgOperand(1), + I.getArgOperand(2), + I.getArgOperand(3), + }; + bool useShadowVariant; + + getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT, + useShadowVariant); + + ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect, + Offset, ResourceId, SamplerId, CT, Coord); + } + +public: + R600TextureIntrinsicsReplacer(): + FunctionPass(ID) { + } + + virtual bool doInitialization(Module &M) { + LLVMContext &Ctx = M.getContext(); + Mod = &M; + FloatType = Type::getFloatTy(Ctx); + Int32Type = Type::getInt32Ty(Ctx); + V4f32Type = VectorType::get(FloatType, 4); + V4i32Type = VectorType::get(Int32Type, 4); + Type *ArgsType[] = { + V4f32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + }; + TexSign = FunctionType::get(V4f32Type, ArgsType); + Type *ArgsQType[] = { + V4i32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + Int32Type, + }; + TexQSign = FunctionType::get(V4f32Type, ArgsQType); + return false; + } + + virtual bool runOnFunction(Function &F) { + visit(F); + return false; + } + + virtual const char *getPassName() const { + return "R600 Texture Intrinsics Replacer"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + } + + void visitCallInst(CallInst &I) { + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.tex") + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc"); + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txl") + ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txl", "llvm.R600.txlc"); + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txb") + ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txb", "llvm.R600.txbc"); + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txf") + ReplaceTXF(I); + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txq") + ReplaceTexIntrinsic(I, false, TexQSign, "llvm.R600.txq", "llvm.R600.txq"); + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddx") + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddx", "llvm.R600.ddx"); + if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddy") + ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddy", "llvm.R600.ddy"); + } + +}; + +char R600TextureIntrinsicsReplacer::ID = 0; + +} + +FunctionPass *llvm::createR600TextureIntrinsicsReplacer() { + return new R600TextureIntrinsicsReplacer(); +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll index 81fd43d4692..4ea82bb4c6a 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll @@ -1,21 +1,21 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 1 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 2 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 3 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 4 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 5 -;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 6 -;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 7 -;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 8 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 9 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 10 -;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 11 -;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 12 -;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 13 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 14 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 15 -;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 16 +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN +;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN +;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %addr = load <4 x float> addrspace(1)* %in