mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU: Remove SITypeRewriter
This was an old workaround for using v16i8 in some old intrinsics for resource descriptors. llvm-svn: 306603
This commit is contained in:
parent
b366d853fe
commit
9eac763e7b
@ -36,7 +36,6 @@ FunctionPass *createR600ControlFlowFinalizer();
|
||||
FunctionPass *createAMDGPUCFGStructurizerPass();
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSITypeRewriter();
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
FunctionPass *createSIFoldOperandsPass();
|
||||
FunctionPass *createSIPeepholeSDWAPass();
|
||||
|
@ -720,7 +720,6 @@ bool GCNPassConfig::addPreISel() {
|
||||
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
|
||||
}
|
||||
addPass(createSinkingPass());
|
||||
addPass(createSITypeRewriter());
|
||||
addPass(createAMDGPUAnnotateUniformValues());
|
||||
if (!LateCFGStructurize) {
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
|
@ -96,7 +96,6 @@ add_llvm_target(AMDGPUCodeGen
|
||||
SIPeepholeSDWA.cpp
|
||||
SIRegisterInfo.cpp
|
||||
SIShrinkInstructions.cpp
|
||||
SITypeRewriter.cpp
|
||||
SIWholeQuadMode.cpp
|
||||
GCNIterativeScheduler.cpp
|
||||
GCNMinRegStrategy.cpp
|
||||
|
@ -1,156 +0,0 @@
|
||||
//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This pass removes performs the following type substitution on all
|
||||
/// non-compute shaders:
|
||||
///
|
||||
/// v16i8 => i128
|
||||
/// - v16i8 is used for constant memory resource descriptors. This type is
|
||||
/// legal for some compute APIs, and we don't want to declare it as legal
|
||||
/// in the backend, because we want the legalizer to expand all v16i8
|
||||
/// operations.
|
||||
/// v1* => *
|
||||
/// - Having v1* types complicates the legalizer and we can easily replace
|
||||
/// - them with the element type.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SITypeRewriter : public FunctionPass,
|
||||
public InstVisitor<SITypeRewriter> {
|
||||
|
||||
static char ID;
|
||||
Module *Mod;
|
||||
Type *v16i8;
|
||||
Type *v4i32;
|
||||
|
||||
public:
|
||||
SITypeRewriter() : FunctionPass(ID) { }
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnFunction(Function &F) override;
|
||||
StringRef getPassName() const override { return "SI Type Rewriter"; }
|
||||
void visitLoadInst(LoadInst &I);
|
||||
void visitCallInst(CallInst &I);
|
||||
void visitBitCast(BitCastInst &I);
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SITypeRewriter::ID = 0;
|
||||
|
||||
bool SITypeRewriter::doInitialization(Module &M) {
|
||||
Mod = &M;
|
||||
v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
|
||||
v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SITypeRewriter::runOnFunction(Function &F) {
|
||||
if (!AMDGPU::isShader(F.getCallingConv()))
|
||||
return false;
|
||||
|
||||
visit(F);
|
||||
visit(F);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void SITypeRewriter::visitLoadInst(LoadInst &I) {
|
||||
Value *Ptr = I.getPointerOperand();
|
||||
Type *PtrTy = Ptr->getType();
|
||||
Type *ElemTy = PtrTy->getPointerElementType();
|
||||
IRBuilder<> Builder(&I);
|
||||
if (ElemTy == v16i8) {
|
||||
Value *BitCast = Builder.CreateBitCast(Ptr,
|
||||
PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
|
||||
LoadInst *Load = Builder.CreateLoad(BitCast);
|
||||
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
|
||||
I.getAllMetadataOtherThanDebugLoc(MD);
|
||||
for (unsigned i = 0, e = MD.size(); i != e; ++i) {
|
||||
Load->setMetadata(MD[i].first, MD[i].second);
|
||||
}
|
||||
Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
|
||||
I.replaceAllUsesWith(BitCastLoad);
|
||||
I.eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
void SITypeRewriter::visitCallInst(CallInst &I) {
|
||||
IRBuilder<> Builder(&I);
|
||||
|
||||
SmallVector <Value*, 8> Args;
|
||||
SmallVector <Type*, 8> Types;
|
||||
bool NeedToReplace = false;
|
||||
Function *F = I.getCalledFunction();
|
||||
if (!F)
|
||||
return;
|
||||
|
||||
std::string Name = F->getName();
|
||||
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
|
||||
Value *Arg = I.getArgOperand(i);
|
||||
if (Arg->getType() == v16i8) {
|
||||
Args.push_back(Builder.CreateBitCast(Arg, v4i32));
|
||||
Types.push_back(v4i32);
|
||||
NeedToReplace = true;
|
||||
Name = Name + ".v4i32";
|
||||
} else if (Arg->getType()->isVectorTy() &&
|
||||
Arg->getType()->getVectorNumElements() == 1 &&
|
||||
Arg->getType()->getVectorElementType() ==
|
||||
Type::getInt32Ty(I.getContext())){
|
||||
Type *ElementTy = Arg->getType()->getVectorElementType();
|
||||
std::string TypeName = "i32";
|
||||
InsertElementInst *Def = cast<InsertElementInst>(Arg);
|
||||
Args.push_back(Def->getOperand(1));
|
||||
Types.push_back(ElementTy);
|
||||
std::string VecTypeName = "v1" + TypeName;
|
||||
Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
|
||||
NeedToReplace = true;
|
||||
} else {
|
||||
Args.push_back(Arg);
|
||||
Types.push_back(Arg->getType());
|
||||
}
|
||||
}
|
||||
|
||||
if (!NeedToReplace) {
|
||||
return;
|
||||
}
|
||||
Function *NewF = Mod->getFunction(Name);
|
||||
if (!NewF) {
|
||||
NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
|
||||
NewF->setAttributes(F->getAttributes());
|
||||
}
|
||||
I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
|
||||
I.eraseFromParent();
|
||||
}
|
||||
|
||||
void SITypeRewriter::visitBitCast(BitCastInst &I) {
|
||||
IRBuilder<> Builder(&I);
|
||||
if (I.getDestTy() != v4i32) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
|
||||
if (Op->getSrcTy() == v4i32) {
|
||||
I.replaceAllUsesWith(Op->getOperand(0));
|
||||
I.eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createSITypeRewriter() {
|
||||
return new SITypeRewriter();
|
||||
}
|
@ -8,8 +8,8 @@
|
||||
; of which were in SGPRs.
|
||||
define amdgpu_vs float @main(i32 %v) {
|
||||
main_body:
|
||||
%d1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 960)
|
||||
%d2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 976)
|
||||
%d1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 960)
|
||||
%d2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 976)
|
||||
br i1 undef, label %ENDIF56, label %IF57
|
||||
|
||||
IF57: ; preds = %ENDIF
|
||||
@ -41,7 +41,7 @@ ENDIF62: ; preds = %ENDIF59
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { readnone }
|
||||
|
@ -14,24 +14,24 @@
|
||||
; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
|
||||
|
||||
define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
|
||||
define amdgpu_vs void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <4 x i32>] addrspace(2)* byval %arg3, [17 x <4 x i32>] addrspace(2)* inreg %arg4, [17 x <4 x i32>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
|
||||
main_body:
|
||||
%tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
|
||||
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(2)* %arg3, i64 0, i32 1
|
||||
%tmp10 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp11 = shl i32 %arg6, 2
|
||||
%tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp13 = bitcast i32 %tmp12 to float
|
||||
%tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
%tmp15 = bitcast i32 %tmp14 to float
|
||||
%tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp17 = bitcast i32 %tmp16 to float
|
||||
%tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp19 = bitcast i32 %tmp18 to float
|
||||
|
||||
%tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp21 = bitcast i32 %tmp20 to float
|
||||
|
||||
%tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%tmp23 = bitcast i32 %tmp22 to float
|
||||
|
||||
call void @llvm.amdgcn.exp.f32(i32 15, i32 12, float %tmp13, float %tmp15, float %tmp17, float %tmp19, i1 false, i1 false)
|
||||
@ -40,10 +40,10 @@ main_body:
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 glc slc
|
||||
define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
@ -15,7 +15,7 @@ define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:32 glc slc
|
||||
define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
@ -25,7 +25,7 @@ define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) {
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, {{s[0-9]+}} idxen offset:32 glc slc
|
||||
define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffset) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 %soffset, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
@ -35,7 +35,7 @@ define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffs
|
||||
;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32
|
||||
define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
|
||||
i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 0,
|
||||
i32 0, i32 0)
|
||||
ret void
|
||||
@ -45,7 +45,7 @@ define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) {
|
||||
;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 offen offset:24 glc slc
|
||||
define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
|
||||
call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata,
|
||||
i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
@ -55,7 +55,7 @@ define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
|
||||
;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:11, nfmt:4, 0 offen offset:16 glc slc
|
||||
define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
|
||||
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
|
||||
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
|
||||
call void @llvm.SI.tbuffer.store.v2i32(<4 x i32> undef, <2 x i32> %vdata,
|
||||
i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
@ -64,12 +64,12 @@ define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
|
||||
;CHECK-LABEL: {{^}}test4:
|
||||
;CHECK: tbuffer_store_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:4, nfmt:4, 0 offen offset:8 glc slc
|
||||
define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) {
|
||||
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
|
||||
call void @llvm.SI.tbuffer.store.i32(<4 x i32> undef, i32 %vdata,
|
||||
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
|
||||
i32 1, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v2i32(<4 x i32>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
declare void @llvm.SI.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||
|
@ -55,14 +55,14 @@ entry:
|
||||
|
||||
; CHECK-LABEL: {{^}}soffset_max_imm:
|
||||
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
|
||||
define amdgpu_gs void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
main_body:
|
||||
%tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
|
||||
%tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
|
||||
%tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
|
||||
%tmp2 = shl i32 %6, 2
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp4 = add i32 %6, 16
|
||||
%tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
|
||||
%tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
|
||||
ret void
|
||||
}
|
||||
@ -74,14 +74,14 @@ main_body:
|
||||
; CHECK-LABEL: {{^}}soffset_no_fold:
|
||||
; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
|
||||
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
|
||||
define amdgpu_gs void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
|
||||
main_body:
|
||||
%tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
|
||||
%tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
|
||||
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(2)* %0, i32 0, i32 0
|
||||
%tmp1 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp0
|
||||
%tmp2 = shl i32 %6, 2
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp4 = add i32 %6, 16
|
||||
%tmp1.4xi32 = bitcast <16 x i8> %tmp1 to <4 x i32>
|
||||
%tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32>
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
|
||||
ret void
|
||||
}
|
||||
@ -176,7 +176,7 @@ define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
||||
|
@ -23,7 +23,7 @@
|
||||
; GCN-NEXT: [[RET_BB]]:
|
||||
; GCN-NEXT: ; return
|
||||
; GCN-NEXT: .Lfunc_end0
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
|
||||
entry:
|
||||
%i.i = extractelement <2 x i32> %arg7, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg7, i32 1
|
||||
@ -75,7 +75,7 @@ ret.bb: ; preds = %else, %main_body
|
||||
; GCN-NEXT: s_waitcnt
|
||||
; GCN-NEXT: ; return
|
||||
; GCN-NEXT: .Lfunc_end
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
|
||||
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
|
||||
main_body:
|
||||
%i.i = extractelement <2 x i32> %arg7, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg7, i32 1
|
||||
@ -118,9 +118,6 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
|
||||
|
@ -15,16 +15,16 @@ target triple = "amdgcn--"
|
||||
|
||||
define amdgpu_gs void @main(i32 inreg %arg) #0 {
|
||||
main_body:
|
||||
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 20)
|
||||
%tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 24)
|
||||
%tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 48)
|
||||
%tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 20)
|
||||
%tmp1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 24)
|
||||
%tmp2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 48)
|
||||
%array_vector3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 3
|
||||
%array_vector5 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %tmp, i32 1
|
||||
%array_vector6 = insertelement <4 x float> %array_vector5, float undef, i32 2
|
||||
%array_vector9 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %tmp1, i32 1
|
||||
%array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2
|
||||
%array_vector11 = insertelement <4 x float> %array_vector10, float undef, i32 3
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 36, i32 4, i32 4, i1 1, i1 1)
|
||||
%bc = bitcast <4 x float> %array_vector3 to <4 x i32>
|
||||
%tmp4 = extractelement <4 x i32> %bc, i32 undef
|
||||
@ -45,8 +45,8 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #2
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
|
||||
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="tonga" }
|
||||
|
@ -4,13 +4,13 @@
|
||||
; CHECK-LABEL: {{^}}phi1:
|
||||
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
|
||||
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
|
||||
define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
|
||||
%tmp24 = fptosi float %tmp22 to i32
|
||||
%tmp25 = icmp ne i32 %tmp24, 0
|
||||
br i1 %tmp25, label %ENDIF, label %ELSE
|
||||
@ -28,29 +28,29 @@ ENDIF: ; preds = %ELSE, %main_body
|
||||
|
||||
; Make sure this program doesn't crash
|
||||
; CHECK-LABEL: {{^}}phi2:
|
||||
define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 36)
|
||||
%tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 40)
|
||||
%tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 48)
|
||||
%tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 52)
|
||||
%tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 56)
|
||||
%tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 64)
|
||||
%tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 68)
|
||||
%tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 72)
|
||||
%tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 76)
|
||||
%tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 80)
|
||||
%tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
|
||||
%tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
|
||||
%tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 40)
|
||||
%tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 48)
|
||||
%tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 52)
|
||||
%tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 56)
|
||||
%tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 64)
|
||||
%tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 68)
|
||||
%tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 72)
|
||||
%tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 76)
|
||||
%tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 80)
|
||||
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
|
||||
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
|
||||
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
|
||||
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
|
||||
%tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
|
||||
%tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
|
||||
%tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
|
||||
%tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
|
||||
%tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
@ -85,7 +85,7 @@ main_body:
|
||||
%tmp46 = bitcast float %p2.i24 to i32
|
||||
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
|
||||
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
|
||||
%tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
|
||||
%tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
|
||||
%a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp50 = extractelement <4 x float> %tmp1, i32 2
|
||||
@ -173,14 +173,14 @@ ENDIF24: ; preds = %IF25, %ENDIF
|
||||
|
||||
; We just want ot make sure the program doesn't crash
|
||||
; CHECK-LABEL: {{^}}loop:
|
||||
define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 4)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 8)
|
||||
%tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 12)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 12)
|
||||
%tmp25 = fptosi float %tmp24 to i32
|
||||
%tmp26 = bitcast i32 %tmp25 to float
|
||||
%tmp27 = bitcast float %tmp26 to i32
|
||||
@ -226,17 +226,17 @@ ENDIF: ; preds = %LOOP
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
|
||||
; CHECK: exp
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
entry:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
|
||||
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
|
||||
%tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
|
||||
%tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
|
||||
%tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
|
||||
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
|
||||
%tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
|
||||
%tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
|
||||
br i1 %tmp27, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
@ -290,7 +290,7 @@ endif: ; preds = %if1, %if0, %entry
|
||||
; This test is just checking that we don't crash / assertion fail.
|
||||
; CHECK-LABEL: {{^}}copy2:
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
entry:
|
||||
br label %LOOP68
|
||||
|
||||
@ -326,11 +326,11 @@ ENDIF69: ; preds = %LOOP68
|
||||
; [[END]]:
|
||||
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
|
||||
; CHECK: s_endpgm
|
||||
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
|
||||
bb:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
|
||||
%tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
|
||||
%tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
|
||||
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
|
||||
%tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
|
||||
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
|
||||
@ -420,7 +420,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -6,15 +6,15 @@
|
||||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
|
||||
define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @main(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
%tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
|
||||
%tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
|
||||
%tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
|
||||
%tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0
|
||||
%tmp24 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
|
||||
%tmp25 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp24, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg5, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg5, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
@ -34,9 +34,8 @@ main_body:
|
||||
%tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
|
||||
%tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
|
||||
%tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
|
||||
%tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
|
||||
%tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float>
|
||||
%tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp36 = extractelement <4 x float> %tmp35, i32 0
|
||||
%tmp37 = extractelement <4 x float> %tmp35, i32 1
|
||||
%tmp38 = extractelement <4 x float> %tmp35, i32 2
|
||||
@ -49,7 +48,7 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -24,81 +24,81 @@
|
||||
; GCN: s_endpgm
|
||||
|
||||
; TOVGPR: ScratchSize: 0{{$}}
|
||||
define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
||||
define amdgpu_ps void @main([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
|
||||
main_body:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 96)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 100)
|
||||
%tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 104)
|
||||
%tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 112)
|
||||
%tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 116)
|
||||
%tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 120)
|
||||
%tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 128)
|
||||
%tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 132)
|
||||
%tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 140)
|
||||
%tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 144)
|
||||
%tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 160)
|
||||
%tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 176)
|
||||
%tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 180)
|
||||
%tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 184)
|
||||
%tmp36 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 192)
|
||||
%tmp37 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 196)
|
||||
%tmp38 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 200)
|
||||
%tmp39 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 208)
|
||||
%tmp40 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 212)
|
||||
%tmp41 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 216)
|
||||
%tmp42 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 224)
|
||||
%tmp43 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 240)
|
||||
%tmp44 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 244)
|
||||
%tmp45 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 248)
|
||||
%tmp46 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 256)
|
||||
%tmp47 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 272)
|
||||
%tmp48 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 276)
|
||||
%tmp49 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 280)
|
||||
%tmp50 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 288)
|
||||
%tmp51 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 292)
|
||||
%tmp52 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 296)
|
||||
%tmp53 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 304)
|
||||
%tmp54 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 308)
|
||||
%tmp55 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 312)
|
||||
%tmp56 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 368)
|
||||
%tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 372)
|
||||
%tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 376)
|
||||
%tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 384)
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 96)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 100)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 104)
|
||||
%tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 112)
|
||||
%tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 116)
|
||||
%tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 120)
|
||||
%tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 128)
|
||||
%tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 132)
|
||||
%tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 140)
|
||||
%tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 144)
|
||||
%tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 160)
|
||||
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 176)
|
||||
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 180)
|
||||
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 184)
|
||||
%tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 192)
|
||||
%tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 196)
|
||||
%tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 200)
|
||||
%tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 208)
|
||||
%tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 212)
|
||||
%tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 216)
|
||||
%tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 224)
|
||||
%tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 240)
|
||||
%tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 244)
|
||||
%tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 248)
|
||||
%tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 256)
|
||||
%tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 272)
|
||||
%tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 276)
|
||||
%tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 280)
|
||||
%tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 288)
|
||||
%tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 292)
|
||||
%tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 296)
|
||||
%tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 304)
|
||||
%tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 308)
|
||||
%tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 312)
|
||||
%tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 368)
|
||||
%tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 372)
|
||||
%tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 376)
|
||||
%tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 384)
|
||||
%tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
|
||||
%tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
|
||||
%tmp62 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp63 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp62, !tbaa !0
|
||||
%tmp63.bc = bitcast <16 x i8> %tmp63 to <4 x i32>
|
||||
%tmp62 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp63 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp62, !tbaa !0
|
||||
%tmp63.bc = bitcast <4 x i32> %tmp63 to <4 x i32>
|
||||
%tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
|
||||
%tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
|
||||
%tmp66 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
|
||||
%tmp67 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp66, !tbaa !0
|
||||
%tmp66 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
|
||||
%tmp67 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp66, !tbaa !0
|
||||
%tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
|
||||
%tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
|
||||
%tmp70 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
|
||||
%tmp71 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp70, !tbaa !0
|
||||
%tmp70 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
|
||||
%tmp71 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp70, !tbaa !0
|
||||
%tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
|
||||
%tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
|
||||
%tmp74 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
|
||||
%tmp75 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp74, !tbaa !0
|
||||
%tmp74 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
|
||||
%tmp75 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp74, !tbaa !0
|
||||
%tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
|
||||
%tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
|
||||
%tmp78 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
|
||||
%tmp79 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp78, !tbaa !0
|
||||
%tmp78 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
|
||||
%tmp79 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp78, !tbaa !0
|
||||
%tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
|
||||
%tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
|
||||
%tmp82 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
|
||||
%tmp83 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp82, !tbaa !0
|
||||
%tmp82 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
|
||||
%tmp83 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp82, !tbaa !0
|
||||
%tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
|
||||
%tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
|
||||
%tmp86 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
|
||||
%tmp87 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp86, !tbaa !0
|
||||
%tmp86 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
|
||||
%tmp87 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp86, !tbaa !0
|
||||
%tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
|
||||
%tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
|
||||
%tmp90 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
|
||||
%tmp91 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp90, !tbaa !0
|
||||
%tmp90 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
|
||||
%tmp91 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp90, !tbaa !0
|
||||
%i.i = extractelement <2 x i32> %arg6, i32 0
|
||||
%j.i = extractelement <2 x i32> %arg6, i32 1
|
||||
%i.f.i = bitcast i32 %i.i to float
|
||||
@ -410,7 +410,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
|
||||
%tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
|
||||
%tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
|
||||
%tmp67.bc = bitcast <16 x i8> %tmp67 to <4 x i32>
|
||||
%tmp67.bc = bitcast <4 x i32> %tmp67 to <4 x i32>
|
||||
%tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float>
|
||||
%tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp278 = extractelement <4 x float> %tmp277, i32 0
|
||||
@ -432,7 +432,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
|
||||
%tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
|
||||
%tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
|
||||
%tmp83.bc = bitcast <16 x i8> %tmp83 to <4 x i32>
|
||||
%tmp83.bc = bitcast <4 x i32> %tmp83 to <4 x i32>
|
||||
%tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float>
|
||||
%tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp298 = extractelement <4 x float> %tmp297, i32 0
|
||||
@ -452,7 +452,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
|
||||
%tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
|
||||
%tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
|
||||
%tmp79.bc = bitcast <16 x i8> %tmp79 to <4 x i32>
|
||||
%tmp79.bc = bitcast <4 x i32> %tmp79 to <4 x i32>
|
||||
%tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float>
|
||||
%tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp316 = extractelement <4 x float> %tmp315, i32 0
|
||||
@ -515,7 +515,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
|
||||
%tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
|
||||
%tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
|
||||
%tmp71.bc = bitcast <16 x i8> %tmp71 to <4 x i32>
|
||||
%tmp71.bc = bitcast <4 x i32> %tmp71 to <4 x i32>
|
||||
%tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float>
|
||||
%tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp376 = extractelement <4 x float> %tmp375, i32 0
|
||||
@ -571,7 +571,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
|
||||
%tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
|
||||
%tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
|
||||
%tmp87.bc = bitcast <16 x i8> %tmp87 to <4 x i32>
|
||||
%tmp87.bc = bitcast <4 x i32> %tmp87 to <4 x i32>
|
||||
%tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float>
|
||||
%tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp430 = extractelement <4 x float> %tmp429, i32 0
|
||||
@ -624,7 +624,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
|
||||
%tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
|
||||
%tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
|
||||
%tmp91.bc = bitcast <16 x i8> %tmp91 to <4 x i32>
|
||||
%tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32>
|
||||
%tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float>
|
||||
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
|
||||
%tmp471 = extractelement <4 x float> %tmp470, i32 0
|
||||
@ -727,7 +727,7 @@ IF67: ; preds = %LOOP65
|
||||
%tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
|
||||
%tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
|
||||
%tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
|
||||
%tmp75.bc = bitcast <16 x i8> %tmp75 to <4 x i32>
|
||||
%tmp75.bc = bitcast <4 x i32> %tmp75 to <4 x i32>
|
||||
%tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float>
|
||||
%tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp572 = extractelement <4 x float> %tmp571, i32 0
|
||||
@ -778,149 +778,149 @@ ENDIF66: ; preds = %LOOP65
|
||||
; GCN-LABEL: {{^}}main1:
|
||||
; GCN: s_endpgm
|
||||
; TOVGPR: ScratchSize: 0{{$}}
|
||||
define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 0)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 4)
|
||||
%tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 8)
|
||||
%tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 12)
|
||||
%tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 28)
|
||||
%tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 48)
|
||||
%tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 52)
|
||||
%tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 56)
|
||||
%tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 64)
|
||||
%tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 68)
|
||||
%tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 72)
|
||||
%tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 76)
|
||||
%tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 128)
|
||||
%tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 132)
|
||||
%tmp36 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 144)
|
||||
%tmp37 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 148)
|
||||
%tmp38 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 152)
|
||||
%tmp39 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 160)
|
||||
%tmp40 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 164)
|
||||
%tmp41 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 168)
|
||||
%tmp42 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 172)
|
||||
%tmp43 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 176)
|
||||
%tmp44 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 180)
|
||||
%tmp45 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 184)
|
||||
%tmp46 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 192)
|
||||
%tmp47 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 196)
|
||||
%tmp48 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 200)
|
||||
%tmp49 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 208)
|
||||
%tmp50 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 212)
|
||||
%tmp51 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 216)
|
||||
%tmp52 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 220)
|
||||
%tmp53 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 236)
|
||||
%tmp54 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 240)
|
||||
%tmp55 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 244)
|
||||
%tmp56 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 248)
|
||||
%tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 252)
|
||||
%tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 256)
|
||||
%tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 260)
|
||||
%tmp60 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 264)
|
||||
%tmp61 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 268)
|
||||
%tmp62 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 272)
|
||||
%tmp63 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 276)
|
||||
%tmp64 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 280)
|
||||
%tmp65 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 284)
|
||||
%tmp66 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 288)
|
||||
%tmp67 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 292)
|
||||
%tmp68 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 464)
|
||||
%tmp69 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 468)
|
||||
%tmp70 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 472)
|
||||
%tmp71 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 496)
|
||||
%tmp72 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 500)
|
||||
%tmp73 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 504)
|
||||
%tmp74 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 512)
|
||||
%tmp75 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 516)
|
||||
%tmp76 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 524)
|
||||
%tmp77 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 532)
|
||||
%tmp78 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 536)
|
||||
%tmp79 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 540)
|
||||
%tmp80 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 544)
|
||||
%tmp81 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 548)
|
||||
%tmp82 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 552)
|
||||
%tmp83 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 556)
|
||||
%tmp84 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 560)
|
||||
%tmp85 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 564)
|
||||
%tmp86 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 568)
|
||||
%tmp87 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 572)
|
||||
%tmp88 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 576)
|
||||
%tmp89 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 580)
|
||||
%tmp90 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 584)
|
||||
%tmp91 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 588)
|
||||
%tmp92 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 592)
|
||||
%tmp93 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 596)
|
||||
%tmp94 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 600)
|
||||
%tmp95 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 604)
|
||||
%tmp96 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 608)
|
||||
%tmp97 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 612)
|
||||
%tmp98 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 616)
|
||||
%tmp99 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 624)
|
||||
%tmp100 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 628)
|
||||
%tmp101 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 632)
|
||||
%tmp102 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 636)
|
||||
%tmp103 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 640)
|
||||
%tmp104 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 644)
|
||||
%tmp105 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 648)
|
||||
%tmp106 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 652)
|
||||
%tmp107 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 656)
|
||||
%tmp108 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 660)
|
||||
%tmp109 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 664)
|
||||
%tmp110 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 668)
|
||||
%tmp111 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 672)
|
||||
%tmp112 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 676)
|
||||
%tmp113 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 680)
|
||||
%tmp114 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 684)
|
||||
%tmp115 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 688)
|
||||
%tmp116 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 692)
|
||||
%tmp117 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 696)
|
||||
%tmp118 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 700)
|
||||
%tmp119 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 704)
|
||||
%tmp120 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 708)
|
||||
%tmp121 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 712)
|
||||
%tmp122 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 716)
|
||||
%tmp123 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 864)
|
||||
%tmp124 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 868)
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
|
||||
%tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 0)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 4)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 8)
|
||||
%tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 12)
|
||||
%tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 28)
|
||||
%tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 48)
|
||||
%tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 52)
|
||||
%tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 56)
|
||||
%tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 64)
|
||||
%tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 68)
|
||||
%tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 72)
|
||||
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 76)
|
||||
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 128)
|
||||
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 132)
|
||||
%tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 144)
|
||||
%tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 148)
|
||||
%tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 152)
|
||||
%tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 160)
|
||||
%tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 164)
|
||||
%tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 168)
|
||||
%tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 172)
|
||||
%tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 176)
|
||||
%tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 180)
|
||||
%tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 184)
|
||||
%tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 192)
|
||||
%tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 196)
|
||||
%tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 200)
|
||||
%tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 208)
|
||||
%tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 212)
|
||||
%tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 216)
|
||||
%tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 220)
|
||||
%tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 236)
|
||||
%tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 240)
|
||||
%tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 244)
|
||||
%tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 248)
|
||||
%tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 252)
|
||||
%tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 256)
|
||||
%tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 260)
|
||||
%tmp60 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 264)
|
||||
%tmp61 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 268)
|
||||
%tmp62 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 272)
|
||||
%tmp63 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 276)
|
||||
%tmp64 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 280)
|
||||
%tmp65 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 284)
|
||||
%tmp66 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 288)
|
||||
%tmp67 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 292)
|
||||
%tmp68 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 464)
|
||||
%tmp69 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 468)
|
||||
%tmp70 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 472)
|
||||
%tmp71 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 496)
|
||||
%tmp72 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 500)
|
||||
%tmp73 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 504)
|
||||
%tmp74 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 512)
|
||||
%tmp75 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 516)
|
||||
%tmp76 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 524)
|
||||
%tmp77 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 532)
|
||||
%tmp78 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 536)
|
||||
%tmp79 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 540)
|
||||
%tmp80 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 544)
|
||||
%tmp81 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 548)
|
||||
%tmp82 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 552)
|
||||
%tmp83 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 556)
|
||||
%tmp84 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 560)
|
||||
%tmp85 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 564)
|
||||
%tmp86 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 568)
|
||||
%tmp87 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 572)
|
||||
%tmp88 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 576)
|
||||
%tmp89 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 580)
|
||||
%tmp90 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 584)
|
||||
%tmp91 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 588)
|
||||
%tmp92 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 592)
|
||||
%tmp93 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 596)
|
||||
%tmp94 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 600)
|
||||
%tmp95 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 604)
|
||||
%tmp96 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 608)
|
||||
%tmp97 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 612)
|
||||
%tmp98 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 616)
|
||||
%tmp99 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 624)
|
||||
%tmp100 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 628)
|
||||
%tmp101 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 632)
|
||||
%tmp102 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 636)
|
||||
%tmp103 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 640)
|
||||
%tmp104 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 644)
|
||||
%tmp105 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 648)
|
||||
%tmp106 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 652)
|
||||
%tmp107 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 656)
|
||||
%tmp108 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 660)
|
||||
%tmp109 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 664)
|
||||
%tmp110 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 668)
|
||||
%tmp111 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 672)
|
||||
%tmp112 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 676)
|
||||
%tmp113 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 680)
|
||||
%tmp114 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 684)
|
||||
%tmp115 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 688)
|
||||
%tmp116 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 692)
|
||||
%tmp117 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 696)
|
||||
%tmp118 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 700)
|
||||
%tmp119 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 704)
|
||||
%tmp120 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 708)
|
||||
%tmp121 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 712)
|
||||
%tmp122 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 716)
|
||||
%tmp123 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 864)
|
||||
%tmp124 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 868)
|
||||
%tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
|
||||
%tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
|
||||
%tmp127 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp128 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp127, !tbaa !0
|
||||
%tmp127 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
|
||||
%tmp128 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp127, !tbaa !0
|
||||
%tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
|
||||
%tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
|
||||
%tmp131 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
|
||||
%tmp132 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp131, !tbaa !0
|
||||
%tmp131 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 1
|
||||
%tmp132 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp131, !tbaa !0
|
||||
%tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
|
||||
%tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
|
||||
%tmp135 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
|
||||
%tmp136 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp135, !tbaa !0
|
||||
%tmp135 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 2
|
||||
%tmp136 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp135, !tbaa !0
|
||||
%tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
|
||||
%tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
|
||||
%tmp139 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
|
||||
%tmp140 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp139, !tbaa !0
|
||||
%tmp139 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 3
|
||||
%tmp140 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp139, !tbaa !0
|
||||
%tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
|
||||
%tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
|
||||
%tmp143 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
|
||||
%tmp144 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp143, !tbaa !0
|
||||
%tmp143 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 4
|
||||
%tmp144 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp143, !tbaa !0
|
||||
%tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
|
||||
%tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
|
||||
%tmp147 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
|
||||
%tmp148 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp147, !tbaa !0
|
||||
%tmp147 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 5
|
||||
%tmp148 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp147, !tbaa !0
|
||||
%tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
|
||||
%tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
|
||||
%tmp151 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
|
||||
%tmp152 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp151, !tbaa !0
|
||||
%tmp151 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 6
|
||||
%tmp152 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp151, !tbaa !0
|
||||
%tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
|
||||
%tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
|
||||
%tmp155 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
|
||||
%tmp156 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp155, !tbaa !0
|
||||
%tmp155 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 7
|
||||
%tmp156 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp155, !tbaa !0
|
||||
%tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
|
||||
%tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
|
||||
%tmp159 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 8
|
||||
%tmp160 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp159, !tbaa !0
|
||||
%tmp159 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 8
|
||||
%tmp160 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp159, !tbaa !0
|
||||
%tmp161 = fcmp ugt float %arg17, 0.000000e+00
|
||||
%tmp162 = select i1 %tmp161, float 1.000000e+00, float 0.000000e+00
|
||||
%i.i = extractelement <2 x i32> %arg6, i32 0
|
||||
@ -1144,7 +1144,7 @@ main_body:
|
||||
%tmp222 = bitcast float %p2.i126 to i32
|
||||
%tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
|
||||
%tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
|
||||
%tmp132.bc = bitcast <16 x i8> %tmp132 to <4 x i32>
|
||||
%tmp132.bc = bitcast <4 x i32> %tmp132 to <4 x i32>
|
||||
%tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float>
|
||||
%tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp226 = extractelement <4 x float> %tmp225, i32 0
|
||||
@ -1218,7 +1218,7 @@ LOOP: ; preds = %LOOP, %main_body
|
||||
%tmp279 = insertelement <4 x i32> %tmp278, i32 %tmp277, i32 1
|
||||
%tmp280 = insertelement <4 x i32> %tmp279, i32 0, i32 2
|
||||
%tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
|
||||
%tmp148.bc = bitcast <16 x i8> %tmp148 to <4 x i32>
|
||||
%tmp148.bc = bitcast <4 x i32> %tmp148 to <4 x i32>
|
||||
%tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float>
|
||||
%tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp283 = extractelement <4 x float> %tmp282, i32 3
|
||||
@ -1283,7 +1283,7 @@ IF189: ; preds = %LOOP
|
||||
%tmp339 = bitcast float %tmp335 to i32
|
||||
%tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
|
||||
%tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
|
||||
%tmp136.bc = bitcast <16 x i8> %tmp136 to <4 x i32>
|
||||
%tmp136.bc = bitcast <4 x i32> %tmp136 to <4 x i32>
|
||||
%a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float>
|
||||
%tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp343 = extractelement <4 x float> %tmp0, i32 0
|
||||
@ -1317,7 +1317,7 @@ IF189: ; preds = %LOOP
|
||||
%tmp359 = bitcast float %tmp337 to i32
|
||||
%tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
|
||||
%tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
|
||||
%tmp152.bc = bitcast <16 x i8> %tmp152 to <4 x i32>
|
||||
%tmp152.bc = bitcast <4 x i32> %tmp152 to <4 x i32>
|
||||
%a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float>
|
||||
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp363 = extractelement <4 x float> %tmp1, i32 2
|
||||
@ -1329,7 +1329,7 @@ IF189: ; preds = %LOOP
|
||||
%tmp369 = bitcast float %tmp311 to i32
|
||||
%tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
|
||||
%tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
|
||||
%tmp140.bc = bitcast <16 x i8> %tmp140 to <4 x i32>
|
||||
%tmp140.bc = bitcast <4 x i32> %tmp140 to <4 x i32>
|
||||
%a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float>
|
||||
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp373 = extractelement <4 x float> %tmp2, i32 0
|
||||
@ -1347,7 +1347,7 @@ IF189: ; preds = %LOOP
|
||||
%tmp383 = bitcast float %tmp321 to i32
|
||||
%tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
|
||||
%tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
|
||||
%tmp144.bc = bitcast <16 x i8> %tmp144 to <4 x i32>
|
||||
%tmp144.bc = bitcast <4 x i32> %tmp144 to <4 x i32>
|
||||
%a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float>
|
||||
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp387 = extractelement <4 x float> %tmp3, i32 0
|
||||
@ -1446,7 +1446,7 @@ ENDIF197: ; preds = %IF198, %IF189
|
||||
%tmp467 = bitcast float %tmp220 to i32
|
||||
%tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
|
||||
%tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
|
||||
%tmp160.bc = bitcast <16 x i8> %tmp160 to <4 x i32>
|
||||
%tmp160.bc = bitcast <4 x i32> %tmp160 to <4 x i32>
|
||||
%tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float>
|
||||
%tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp471 = extractelement <4 x float> %tmp470, i32 0
|
||||
@ -1465,7 +1465,7 @@ ENDIF197: ; preds = %IF198, %IF189
|
||||
%tmp484 = bitcast float %p2.i138 to i32
|
||||
%tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
|
||||
%tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
|
||||
%tmp156.bc = bitcast <16 x i8> %tmp156 to <4 x i32>
|
||||
%tmp156.bc = bitcast <4 x i32> %tmp156 to <4 x i32>
|
||||
%tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float>
|
||||
%tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp488 = extractelement <4 x float> %tmp487, i32 0
|
||||
@ -1674,7 +1674,7 @@ ENDIF209: ; preds = %ELSE214, %ELSE211,
|
||||
%tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
|
||||
%tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
|
||||
%tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
|
||||
%tmp128.bc = bitcast <16 x i8> %tmp128 to <4 x i32>
|
||||
%tmp128.bc = bitcast <4 x i32> %tmp128 to <4 x i32>
|
||||
%tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float>
|
||||
%tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
|
||||
%tmp661 = extractelement <4 x float> %tmp660, i32 0
|
||||
@ -1869,7 +1869,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -9,73 +9,73 @@
|
||||
|
||||
define amdgpu_ps void @main() #0 {
|
||||
main_body:
|
||||
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
|
||||
%tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
|
||||
%tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
|
||||
%tmp3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
|
||||
%tmp4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
|
||||
%tmp5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
|
||||
%tmp6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
|
||||
%tmp7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
|
||||
%tmp8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
|
||||
%tmp9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
|
||||
%tmp10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
|
||||
%tmp11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
|
||||
%tmp12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
|
||||
%tmp13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
|
||||
%tmp14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
|
||||
%tmp15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
|
||||
%tmp16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
|
||||
%tmp17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
|
||||
%tmp18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
|
||||
%tmp19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
|
||||
%tmp20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
|
||||
%tmp22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
|
||||
%tmp23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
|
||||
%tmp24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
|
||||
%tmp25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
|
||||
%tmp26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
|
||||
%tmp27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
|
||||
%tmp28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
|
||||
%tmp29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
|
||||
%tmp30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
|
||||
%tmp31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
|
||||
%tmp32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
|
||||
%tmp33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
|
||||
%tmp34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
|
||||
%tmp35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
|
||||
%tmp36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
|
||||
%tmp37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
|
||||
%tmp38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
|
||||
%tmp39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
|
||||
%tmp40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
|
||||
%tmp41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
|
||||
%tmp42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
|
||||
%tmp43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
|
||||
%tmp44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
|
||||
%tmp45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
|
||||
%tmp46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
|
||||
%tmp47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
|
||||
%tmp48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
|
||||
%tmp49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
|
||||
%tmp50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
|
||||
%tmp51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
|
||||
%tmp52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
|
||||
%tmp53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
|
||||
%tmp54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
|
||||
%tmp55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
|
||||
%tmp56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
|
||||
%tmp57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
|
||||
%tmp58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
|
||||
%tmp59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
|
||||
%tmp60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
|
||||
%tmp61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
|
||||
%tmp62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
|
||||
%tmp63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
|
||||
%tmp64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
|
||||
%tmp65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
|
||||
%tmp66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
|
||||
%tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 16)
|
||||
%tmp1 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 32)
|
||||
%tmp2 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 80)
|
||||
%tmp3 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 84)
|
||||
%tmp4 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 88)
|
||||
%tmp5 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
|
||||
%tmp6 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 100)
|
||||
%tmp7 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 104)
|
||||
%tmp8 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 112)
|
||||
%tmp9 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 116)
|
||||
%tmp10 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 120)
|
||||
%tmp11 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 128)
|
||||
%tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 132)
|
||||
%tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 136)
|
||||
%tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 144)
|
||||
%tmp15 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 148)
|
||||
%tmp16 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 152)
|
||||
%tmp17 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 160)
|
||||
%tmp18 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 164)
|
||||
%tmp19 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 168)
|
||||
%tmp20 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 176)
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 180)
|
||||
%tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 184)
|
||||
%tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 192)
|
||||
%tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 196)
|
||||
%tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 200)
|
||||
%tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 208)
|
||||
%tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 212)
|
||||
%tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 216)
|
||||
%tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 224)
|
||||
%tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 228)
|
||||
%tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 232)
|
||||
%tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 240)
|
||||
%tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 244)
|
||||
%tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 248)
|
||||
%tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 256)
|
||||
%tmp36 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 260)
|
||||
%tmp37 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 264)
|
||||
%tmp38 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 272)
|
||||
%tmp39 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 276)
|
||||
%tmp40 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 280)
|
||||
%tmp41 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 288)
|
||||
%tmp42 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 292)
|
||||
%tmp43 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 296)
|
||||
%tmp44 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 304)
|
||||
%tmp45 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 308)
|
||||
%tmp46 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 312)
|
||||
%tmp47 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 320)
|
||||
%tmp48 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 324)
|
||||
%tmp49 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 328)
|
||||
%tmp50 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 336)
|
||||
%tmp51 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 340)
|
||||
%tmp52 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 344)
|
||||
%tmp53 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 352)
|
||||
%tmp54 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 356)
|
||||
%tmp55 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 360)
|
||||
%tmp56 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 368)
|
||||
%tmp57 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 372)
|
||||
%tmp58 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 376)
|
||||
%tmp59 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 384)
|
||||
%tmp60 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 388)
|
||||
%tmp61 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 392)
|
||||
%tmp62 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 400)
|
||||
%tmp63 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 404)
|
||||
%tmp64 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 408)
|
||||
%tmp65 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 416)
|
||||
%tmp66 = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 420)
|
||||
br label %LOOP
|
||||
|
||||
LOOP: ; preds = %ENDIF2795, %main_body
|
||||
@ -497,7 +497,7 @@ declare float @llvm.minnum.f32(float, float) #1
|
||||
declare float @llvm.maxnum.f32(float, float) #1
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -84,34 +84,34 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; SMRD load using the load.const intrinsic with an immediate offset
|
||||
; SMRD load using the load.const.v4i32 intrinsic with an immediate offset
|
||||
; GCN-LABEL: {{^}}smrd_load_const0:
|
||||
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
|
||||
define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const0(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; SMRD load using the load.const intrinsic with the largest possible immediate
|
||||
; SMRD load using the load.const.v4i32 intrinsic with the largest possible immediate
|
||||
; offset.
|
||||
; GCN-LABEL: {{^}}smrd_load_const1:
|
||||
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||
define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1020)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1020)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; SMRD load using the load.const intrinsic with an offset greater than the
|
||||
; SMRD load using the load.const.v4i32 intrinsic with an offset greater than the
|
||||
; largets possible immediate.
|
||||
; immediate offset.
|
||||
; GCN-LABEL: {{^}}smrd_load_const2:
|
||||
@ -119,11 +119,11 @@ main_body:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||
define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1024)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1024)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
@ -134,11 +134,11 @@ main_body:
|
||||
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||
define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const3(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048572)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048572)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
@ -149,17 +149,17 @@ main_body:
|
||||
; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
define amdgpu_ps void @smrd_load_const4(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
|
||||
main_body:
|
||||
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048576)
|
||||
%tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
|
||||
%tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp
|
||||
%tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 1048576)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -8,7 +8,7 @@
|
||||
; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
|
||||
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
|
||||
bb:
|
||||
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
|
||||
%tmp = call float @llvm.SI.load.const.v4i32(<4 x i32> undef, i32 96)
|
||||
%tmp1 = bitcast float %tmp to i32
|
||||
br i1 undef, label %bb2, label %bb3
|
||||
|
||||
@ -31,7 +31,7 @@ bb3: ; preds = %bb
|
||||
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
@ -27,17 +27,17 @@
|
||||
; GCN: NumVgprs: 256
|
||||
; GCN: ScratchSize: 1536
|
||||
|
||||
define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
define amdgpu_vs void @main([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <4 x i32>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
bb:
|
||||
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0
|
||||
%tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
|
||||
%tmp12 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 0)
|
||||
%tmp13 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 16)
|
||||
%tmp14 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 32)
|
||||
%tmp15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
|
||||
%tmp16 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp15, align 16, !tbaa !0
|
||||
%tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i64 0, i64 0
|
||||
%tmp11 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, align 16, !tbaa !0
|
||||
%tmp12 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 0)
|
||||
%tmp13 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 16)
|
||||
%tmp14 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp11, i32 32)
|
||||
%tmp15 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg4, i64 0, i64 0
|
||||
%tmp16 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp15, align 16, !tbaa !0
|
||||
%tmp17 = add i32 %arg5, %arg7
|
||||
%tmp16.cast = bitcast <16 x i8> %tmp16 to <4 x i32>
|
||||
%tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
|
||||
%tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)
|
||||
%tmp19 = extractelement <4 x float> %tmp18, i32 0
|
||||
%tmp20 = extractelement <4 x float> %tmp18, i32 1
|
||||
@ -488,7 +488,7 @@ bb157: ; preds = %bb24
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user