From c6df3d773b4c4e85551c2bf46844acf621513afe Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 15 Jun 2016 00:19:09 +0000 Subject: [PATCH] Remove the ScalarReplAggregates pass Nearly all the changes to this pass have been done while maintaining and updating other parts of LLVM. LLVM has had another pass, SROA, which has superseded ScalarReplAggregates for quite some time. Differential Revision: http://reviews.llvm.org/D21316 llvm-svn: 272737 --- .../scalar_opts/llvm_scalar_opts.mli | 6 +- docs/Atomics.rst | 2 +- docs/Passes.rst | 8 +- docs/tutorial/LangImpl7.rst | 2 +- docs/tutorial/OCamlLangImpl7.rst | 2 +- include/llvm-c/Transforms/Scalar.h | 6 +- include/llvm/InitializePasses.h | 2 - include/llvm/LinkAllPasses.h | 2 +- include/llvm/Transforms/Scalar.h | 11 - lib/LTO/LTOCodeGenerator.cpp | 2 - lib/Target/README.txt | 2 +- lib/Target/X86/README-X86-64.txt | 2 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 19 +- lib/Transforms/Scalar/CMakeLists.txt | 1 - lib/Transforms/Scalar/Scalar.cpp | 8 +- .../Scalar/ScalarReplAggregates.cpp | 2618 ----------------- test/CodeGen/X86/vec_ins_extract.ll | 2 +- test/Transforms/ArgumentPromotion/inalloca.ll | 4 +- test/Transforms/Inline/basictest.ll | 2 +- test/Transforms/Inline/crash2.ll | 2 +- test/Transforms/Inline/devirtualize-3.ll | 2 +- .../2009-02-20-InstCombine-SROA.ll | 2 +- ...004-04-13-LoopSimplifyUpdateDomFrontier.ll | 2 +- .../LoopUnswitch/2011-09-26-EHCrash.ll | 2 +- .../ScalarRepl/2003-05-29-ArrayFail.ll | 13 - .../ScalarRepl/2003-09-12-IncorrectPromote.ll | 12 - .../ScalarRepl/2003-10-29-ArrayProblem.ll | 16 - .../2006-11-07-InvalidArrayPromote.ll | 20 - .../ScalarRepl/2007-05-29-MemcpyPreserve.ll | 24 - .../ScalarRepl/2007-11-03-bigendian_apint.ll | 36 - .../ScalarRepl/2008-01-29-PromoteBug.ll | 21 - .../2008-02-28-SubElementExtractCrash.ll | 16 - .../ScalarRepl/2008-06-05-loadstore-agg.ll | 33 - .../ScalarRepl/2008-06-22-LargeArray.ll | 17 - .../2008-08-22-out-of-range-array-promote.ll | 23 - .../ScalarRepl/2008-09-22-vector-gep.ll | 25 - .../2009-02-02-ScalarPromoteOutOfRange.ll | 16 - .../ScalarRepl/2009-02-05-LoadFCA.ll | 20 - .../ScalarRepl/2009-03-04-MemCpyAlign.ll | 19 - .../ScalarRepl/2009-12-11-NeonTypes.ll | 90 - .../ScalarRepl/2010-01-18-SelfCopy.ll | 18 - .../ScalarRepl/2011-05-06-CapturedAlloca.ll | 26 - .../2011-06-08-VectorExtractValue.ll | 75 - .../2011-06-17-VectorPartialMemset.ll | 37 - .../2011-09-22-PHISpeculateInvoke.ll | 40 - .../ScalarRepl/2011-10-11-VectorMemset.ll | 22 - .../ScalarRepl/2011-10-22-VectorCrash.ll | 19 - .../ScalarRepl/2011-11-11-EmptyStruct.ll | 26 - .../Transforms/ScalarRepl/AggregatePromote.ll | 51 - test/Transforms/ScalarRepl/DifferingTypes.ll | 16 - test/Transforms/ScalarRepl/address-space.ll | 35 - test/Transforms/ScalarRepl/arraytest.ll | 11 - test/Transforms/ScalarRepl/badarray.ll | 57 - test/Transforms/ScalarRepl/basictest.ll | 30 - test/Transforms/ScalarRepl/bitfield-sroa.ll | 17 - test/Transforms/ScalarRepl/copy-aggregate.ll | 107 - test/Transforms/ScalarRepl/crash.ll | 286 -- .../ScalarRepl/debuginfo-preserved.ll | 64 - test/Transforms/ScalarRepl/inline-vector.ll | 53 - test/Transforms/ScalarRepl/lifetime.ll | 139 - .../ScalarRepl/load-store-aggregate.ll | 31 - test/Transforms/ScalarRepl/memcpy-align.ll | 32 - .../memset-aggregate-byte-leader.ll | 23 - .../Transforms/ScalarRepl/memset-aggregate.ll | 67 - test/Transforms/ScalarRepl/negative-memset.ll | 20 - .../ScalarRepl/nonzero-first-index.ll | 53 - test/Transforms/ScalarRepl/not-a-vector.ll | 20 - .../Transforms/ScalarRepl/only-memcpy-uses.ll | 27 - test/Transforms/ScalarRepl/phi-cycle.ll | 80 - test/Transforms/ScalarRepl/phi-select.ll | 153 - test/Transforms/ScalarRepl/phinodepromote.ll | 34 - test/Transforms/ScalarRepl/select_promote.ll | 18 - test/Transforms/ScalarRepl/sroa-fca.ll | 21 - test/Transforms/ScalarRepl/sroa_two.ll | 13 - test/Transforms/ScalarRepl/union-fp-int.ll | 14 - test/Transforms/ScalarRepl/union-packed.ll | 14 - test/Transforms/ScalarRepl/union-pointer.ll | 74 - test/Transforms/ScalarRepl/vector_memcpy.ll | 28 - test/Transforms/ScalarRepl/vector_promote.ll | 137 - .../vectors-with-mismatched-elements.ll | 27 - test/Transforms/ScalarRepl/volatile.ll | 13 - utils/findoptdiff | 2 +- 83 files changed, 30 insertions(+), 5114 deletions(-) delete mode 100644 lib/Transforms/Scalar/ScalarReplAggregates.cpp delete mode 100644 test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll delete mode 100644 test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll delete mode 100644 test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll delete mode 100644 test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll delete mode 100644 test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll delete mode 100644 test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll delete mode 100644 test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll delete mode 100644 test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll delete mode 100644 test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll delete mode 100644 test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll delete mode 100644 test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll delete mode 100644 test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll delete mode 100644 test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll delete mode 100644 test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll delete mode 100644 test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll delete mode 100644 test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll delete mode 100644 test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll delete mode 100644 test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll delete mode 100644 test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll delete mode 100644 test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll delete mode 100644 test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll delete mode 100644 test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll delete mode 100644 test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll delete mode 100644 test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll delete mode 100644 test/Transforms/ScalarRepl/AggregatePromote.ll delete mode 100644 test/Transforms/ScalarRepl/DifferingTypes.ll delete mode 100644 test/Transforms/ScalarRepl/address-space.ll delete mode 100644 test/Transforms/ScalarRepl/arraytest.ll delete mode 100644 test/Transforms/ScalarRepl/badarray.ll delete mode 100644 test/Transforms/ScalarRepl/basictest.ll delete mode 100644 test/Transforms/ScalarRepl/bitfield-sroa.ll delete mode 100644 test/Transforms/ScalarRepl/copy-aggregate.ll delete mode 100644 test/Transforms/ScalarRepl/crash.ll delete mode 100644 test/Transforms/ScalarRepl/debuginfo-preserved.ll delete mode 100644 test/Transforms/ScalarRepl/inline-vector.ll delete mode 100644 test/Transforms/ScalarRepl/lifetime.ll delete mode 100644 test/Transforms/ScalarRepl/load-store-aggregate.ll delete mode 100644 test/Transforms/ScalarRepl/memcpy-align.ll delete mode 100644 test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll delete mode 100644 test/Transforms/ScalarRepl/memset-aggregate.ll delete mode 100644 test/Transforms/ScalarRepl/negative-memset.ll delete mode 100644 test/Transforms/ScalarRepl/nonzero-first-index.ll delete mode 100644 test/Transforms/ScalarRepl/not-a-vector.ll delete mode 100644 test/Transforms/ScalarRepl/only-memcpy-uses.ll delete mode 100644 test/Transforms/ScalarRepl/phi-cycle.ll delete mode 100644 test/Transforms/ScalarRepl/phi-select.ll delete mode 100644 test/Transforms/ScalarRepl/phinodepromote.ll delete mode 100644 test/Transforms/ScalarRepl/select_promote.ll delete mode 100644 test/Transforms/ScalarRepl/sroa-fca.ll delete mode 100644 test/Transforms/ScalarRepl/sroa_two.ll delete mode 100644 test/Transforms/ScalarRepl/union-fp-int.ll delete mode 100644 test/Transforms/ScalarRepl/union-packed.ll delete mode 100644 test/Transforms/ScalarRepl/union-pointer.ll delete mode 100644 test/Transforms/ScalarRepl/vector_memcpy.ll delete mode 100644 test/Transforms/ScalarRepl/vector_promote.ll delete mode 100644 test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll delete mode 100644 test/Transforms/ScalarRepl/volatile.ll diff --git a/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli b/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli index b4cefed76d5..48109dfdb6b 100644 --- a/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli +++ b/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli @@ -127,17 +127,17 @@ external add_sccp : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit = "llvm_add_sccp" -(** See the [llvm::createScalarReplAggregatesPass] function. *) +(** See the [llvm::createSROAPass] function. *) external add_scalar_repl_aggregation : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit = "llvm_add_scalar_repl_aggregates" -(** See the [llvm::createScalarReplAggregatesPassSSA] function. *) +(** See the [llvm::createSROAPass] function. *) external add_scalar_repl_aggregation_ssa : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit = "llvm_add_scalar_repl_aggregates_ssa" -(** See the [llvm::createScalarReplAggregatesWithThreshold] function. *) +(** See the [llvm::createSROAPass] function. *) external add_scalar_repl_aggregation_with_threshold : int -> [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit = "llvm_add_scalar_repl_aggregates_with_threshold" diff --git a/docs/Atomics.rst b/docs/Atomics.rst index 89f5f44dae6..def927d2a20 100644 --- a/docs/Atomics.rst +++ b/docs/Atomics.rst @@ -398,7 +398,7 @@ operations: MemoryDependencyAnalysis (which is also used by other passes like GVN). * Folding a load: Any atomic load from a constant global can be constant-folded, - because it cannot be observed. Similar reasoning allows scalarrepl with + because it cannot be observed. Similar reasoning allows sroa with atomic loads and stores. Atomics and Codegen diff --git a/docs/Passes.rst b/docs/Passes.rst index 2b5b85ea400..77461f3c52d 100644 --- a/docs/Passes.rst +++ b/docs/Passes.rst @@ -947,7 +947,7 @@ that this should make CFG hacking much easier. To make later hacking easier, the entry block is split into two, such that all introduced ``alloca`` instructions (and nothing else) are in the entry block. -``-scalarrepl``: Scalar Replacement of Aggregates (DT) +``-sroa``: Scalar Replacement of Aggregates ------------------------------------------------------ The well-known scalar replacement of aggregates transformation. This transform @@ -956,12 +956,6 @@ individual ``alloca`` instructions for each member if possible. Then, if possible, it transforms the individual ``alloca`` instructions into nice clean scalar SSA form. -This combines a simple scalar replacement of aggregates algorithm with the -:ref:`mem2reg ` algorithm because they often interact, -especially for C++ programs. As such, iterating between ``scalarrepl``, then -:ref:`mem2reg ` until we run out of things to promote works -well. - .. _passes-sccp: ``-sccp``: Sparse Conditional Constant Propagation diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst index efc38f62b57..68a86eda8a7 100644 --- a/docs/tutorial/LangImpl7.rst +++ b/docs/tutorial/LangImpl7.rst @@ -224,7 +224,7 @@ variables in certain circumstances: class <../LangRef.html#first-class-types>`_ values (such as pointers, scalars and vectors), and only if the array size of the allocation is 1 (or missing in the .ll file). mem2reg is not capable of promoting - structs or arrays to registers. Note that the "scalarrepl" pass is + structs or arrays to registers. Note that the "sroa" pass is more powerful and can promote structs, "unions", and arrays in many cases. diff --git a/docs/tutorial/OCamlLangImpl7.rst b/docs/tutorial/OCamlLangImpl7.rst index c8c701b9101..f36845c5234 100644 --- a/docs/tutorial/OCamlLangImpl7.rst +++ b/docs/tutorial/OCamlLangImpl7.rst @@ -224,7 +224,7 @@ variables in certain circumstances: class <../LangRef.html#first-class-types>`_ values (such as pointers, scalars and vectors), and only if the array size of the allocation is 1 (or missing in the .ll file). mem2reg is not capable of promoting - structs or arrays to registers. Note that the "scalarrepl" pass is + structs or arrays to registers. Note that the "sroa" pass is more powerful and can promote structs, "unions", and arrays in many cases. diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index c989ee86b9f..b8a09984aa4 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -104,13 +104,13 @@ void LLVMAddReassociatePass(LLVMPassManagerRef PM); /** See llvm::createSCCPPass function. */ void LLVMAddSCCPPass(LLVMPassManagerRef PM); -/** See llvm::createScalarReplAggregatesPass function. */ +/** See llvm::createSROAPass function. */ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM); -/** See llvm::createScalarReplAggregatesPass function. */ +/** See llvm::createSROAPass function. */ void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM); -/** See llvm::createScalarReplAggregatesPass function. */ +/** See llvm::createSROAPass function. */ void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold); diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 954d7d75b54..3ce3a01a17e 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -286,8 +286,6 @@ void initializeSCCPLegacyPassPass(PassRegistry &); void initializeSCEVAAWrapperPassPass(PassRegistry&); void initializeSLPVectorizerPass(PassRegistry&); void initializeSROALegacyPassPass(PassRegistry&); -void initializeSROA_DTPass(PassRegistry&); -void initializeSROA_SSAUpPass(PassRegistry&); void initializeSafeStackPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeSanitizerCoverageModulePass(PassRegistry&); diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index dd2553a5c78..612bb3bd53e 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -145,7 +145,7 @@ namespace { (void) llvm::createRegionViewerPass(); (void) llvm::createSCCPPass(); (void) llvm::createSafeStackPass(); - (void) llvm::createScalarReplAggregatesPass(); + (void) llvm::createSROAPass(); (void) llvm::createSingleLoopExtractorPass(); (void) llvm::createStripSymbolsPass(); (void) llvm::createStripNonDebugSymbolsPass(); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 60e1311ec87..040e95c0ebd 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -104,17 +104,6 @@ FunctionPass *createBitTrackingDCEPass(); // FunctionPass *createSROAPass(); -//===----------------------------------------------------------------------===// -// -// ScalarReplAggregates - Break up alloca's of aggregates into multiple allocas -// if possible. -// -FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1, - bool UseDomTree = true, - signed StructMemberThreshold = -1, - signed ArrayElementThreshold = -1, - signed ScalarLoadThreshold = -1); - //===----------------------------------------------------------------------===// // // InductiveRangeCheckElimination - Transform loops to elide range checks on diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 01d4d5579ab..d4becc37a1a 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -119,8 +119,6 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeArgPromotionPass(R); initializeJumpThreadingPass(R); initializeSROALegacyPassPass(R); - initializeSROA_DTPass(R); - initializeSROA_SSAUpPass(R); initializePostOrderFunctionAttrsLegacyPassPass(R); initializeReversePostOrderFunctionAttrsLegacyPassPass(R); initializeGlobalsAAWrapperPassPass(R); diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 7e9888cc13e..ab9a025930f 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2081,7 +2081,7 @@ struct x testfunc() { } We currently compile this to: -$ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S +$ clang t.c -S -o - -O0 -emit-llvm | opt -sroa -S %struct.x = type { i8, [4 x i32] } diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index bcfdf0bc56b..09626e13849 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -170,7 +170,7 @@ generated for it. The primary issue with the result is that it doesn't do any of the optimizations which are possible if we know the address of a va_list in the current function is never taken: 1. We shouldn't spill the XMM registers because we only call va_arg with "int". -2. It would be nice if we could scalarrepl the va_list. +2. It would be nice if we could sroa the va_list. 3. Probably overkill, but it'd be cool if we could peel off the first five iterations of the loop. diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index a808df2af76..1599ddd2ad5 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -307,7 +307,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow scalarrepl to hack on + // Passing the elements as a scalar will allow sroa to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 6397915da1c..094dda8cc1f 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -61,10 +61,6 @@ static cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); -static cl::opt UseNewSROA("use-new-sroa", - cl::init(true), cl::Hidden, - cl::desc("Enable the new, experimental SROA pass")); - static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); @@ -201,10 +197,7 @@ void PassManagerBuilder::populateFunctionPassManager( addInitialAliasAnalysisPasses(FPM); FPM.add(createCFGSimplificationPass()); - if (UseNewSROA) - FPM.add(createSROAPass()); - else - FPM.add(createScalarReplAggregatesPass()); + FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); } @@ -225,10 +218,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. - if (UseNewSROA) - MPM.add(createSROAPass()); - else - MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); @@ -654,10 +644,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createJumpThreadingPass()); // Break up allocas - if (UseNewSROA) - PM.add(createSROAPass()); - else - PM.add(createScalarReplAggregatesPass()); + PM.add(createSROAPass()); // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index bb623576c77..ac162039037 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -45,7 +45,6 @@ add_llvm_library(LLVMScalarOpts SCCP.cpp SROA.cpp Scalar.cpp - ScalarReplAggregates.cpp Scalarizer.cpp SeparateConstOffsetFromGEP.cpp SimplifyCFGPass.cpp diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 98603ac0305..1f655840360 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -74,8 +74,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSCCPLegacyPassPass(Registry); initializeIPSCCPLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); - initializeSROA_DTPass(Registry); - initializeSROA_SSAUpPass(Registry); initializeCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); initializeSinkingLegacyPassPass(Registry); @@ -198,16 +196,16 @@ void LLVMAddSCCPPass(LLVMPassManagerRef PM) { } void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createScalarReplAggregatesPass()); + unwrap(PM)->add(createSROAPass()); } void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) { - unwrap(PM)->add(createScalarReplAggregatesPass(-1, false)); + unwrap(PM)->add(createSROAPass()); } void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold) { - unwrap(PM)->add(createScalarReplAggregatesPass(Threshold)); + unwrap(PM)->add(createSROAPass()); } void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp deleted file mode 100644 index 9ff149ae91d..00000000000 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ /dev/null @@ -1,2618 +0,0 @@ -//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This transformation implements the well known scalar replacement of -// aggregates transformation. This xform breaks up alloca instructions of -// aggregate type (structure or array) into individual alloca instructions for -// each member (if possible). Then, if possible, it transforms the individual -// alloca instructions into nice clean scalar SSA form. -// -// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they -// often interact, especially for C++ programs. As such, iterating between -// SRoA, then Mem2Reg until we run out of things to promote works well. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -using namespace llvm; - -#define DEBUG_TYPE "scalarrepl" - -STATISTIC(NumReplaced, "Number of allocas broken up"); -STATISTIC(NumPromoted, "Number of allocas promoted"); -STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); -STATISTIC(NumConverted, "Number of aggregates converted to scalar"); - -namespace { -#define SROA SROA_ - struct SROA : public FunctionPass { - SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT) - : FunctionPass(ID), HasDomTree(hasDT) { - if (T == -1) - SRThreshold = 128; - else - SRThreshold = T; - if (ST == -1) - StructMemberThreshold = 32; - else - StructMemberThreshold = ST; - if (AT == -1) - ArrayElementThreshold = 8; - else - ArrayElementThreshold = AT; - if (SLT == -1) - // Do not limit the scalar integer load size if no threshold is given. - ScalarLoadThreshold = -1; - else - ScalarLoadThreshold = SLT; - } - - bool runOnFunction(Function &F) override; - - bool performScalarRepl(Function &F); - bool performPromotion(Function &F); - - private: - bool HasDomTree; - - /// DeadInsts - Keep track of instructions we have made dead, so that - /// we can remove them after we are done working. - SmallVector DeadInsts; - - /// AllocaInfo - When analyzing uses of an alloca instruction, this captures - /// information about the uses. All these fields are initialized to false - /// and set to true when something is learned. - struct AllocaInfo { - /// The alloca to promote. - AllocaInst *AI; - - /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite - /// looping and avoid redundant work. - SmallPtrSet CheckedPHIs; - - /// isUnsafe - This is set to true if the alloca cannot be SROA'd. - bool isUnsafe : 1; - - /// isMemCpySrc - This is true if this aggregate is memcpy'd from. - bool isMemCpySrc : 1; - - /// isMemCpyDst - This is true if this aggregate is memcpy'd into. - bool isMemCpyDst : 1; - - /// hasSubelementAccess - This is true if a subelement of the alloca is - /// ever accessed, or false if the alloca is only accessed with mem - /// intrinsics or load/store that only access the entire alloca at once. - bool hasSubelementAccess : 1; - - /// hasALoadOrStore - This is true if there are any loads or stores to it. - /// The alloca may just be accessed with memcpy, for example, which would - /// not set this. - bool hasALoadOrStore : 1; - - explicit AllocaInfo(AllocaInst *ai) - : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), - hasSubelementAccess(false), hasALoadOrStore(false) {} - }; - - /// SRThreshold - The maximum alloca size to considered for SROA. - unsigned SRThreshold; - - /// StructMemberThreshold - The maximum number of members a struct can - /// contain to be considered for SROA. - unsigned StructMemberThreshold; - - /// ArrayElementThreshold - The maximum number of elements an array can - /// have to be considered for SROA. - unsigned ArrayElementThreshold; - - /// ScalarLoadThreshold - The maximum size in bits of scalars to load when - /// converting to scalar - unsigned ScalarLoadThreshold; - - void MarkUnsafe(AllocaInfo &I, Instruction *User) { - I.isUnsafe = true; - DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); - } - - bool isSafeAllocaToScalarRepl(AllocaInst *AI); - - void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); - void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, - AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); - void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - Type *MemOpType, bool isStore, AllocaInfo &Info, - Instruction *TheAccess, bool AllowWholeAccess); - bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, - const DataLayout &DL); - uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, - const DataLayout &DL); - - void DoScalarReplacement(AllocaInst *AI, - std::vector &WorkList); - void DeleteDeadInstructions(); - - void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVectorImpl &NewElts); - void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVectorImpl &NewElts); - void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVectorImpl &NewElts); - void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, - uint64_t Offset, - SmallVectorImpl &NewElts); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, - AllocaInst *AI, - SmallVectorImpl &NewElts); - void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, - SmallVectorImpl &NewElts); - void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, - SmallVectorImpl &NewElts); - bool ShouldAttemptScalarRepl(AllocaInst *AI); - }; - - // SROA_DT - SROA that uses DominatorTree. - struct SROA_DT : public SROA { - static char ID; - public: - SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : - SROA(T, true, ID, ST, AT, SLT) { - initializeSROA_DTPass(*PassRegistry::getPassRegistry()); - } - - // getAnalysisUsage - This pass does not require any passes, but we know it - // will not alter the CFG, so say so. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesCFG(); - } - }; - - // SROA_SSAUp - SROA that uses SSAUpdater. - struct SROA_SSAUp : public SROA { - static char ID; - public: - SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : - SROA(T, false, ID, ST, AT, SLT) { - initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); - } - - // getAnalysisUsage - This pass does not require any passes, but we know it - // will not alter the CFG, so say so. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.setPreservesCFG(); - } - }; - -} - -char SROA_DT::ID = 0; -char SROA_SSAUp::ID = 0; - -INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", - "Scalar Replacement of Aggregates (DT)", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(SROA_DT, "scalarrepl", - "Scalar Replacement of Aggregates (DT)", false, false) - -INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", - "Scalar Replacement of Aggregates (SSAUp)", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", - "Scalar Replacement of Aggregates (SSAUp)", false, false) - -// Public interface to the ScalarReplAggregates pass -FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, - bool UseDomTree, - int StructMemberThreshold, - int ArrayElementThreshold, - int ScalarLoadThreshold) { - if (UseDomTree) - return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold, - ScalarLoadThreshold); - return new SROA_SSAUp(Threshold, StructMemberThreshold, - ArrayElementThreshold, ScalarLoadThreshold); -} - - -//===----------------------------------------------------------------------===// -// Convert To Scalar Optimization. -//===----------------------------------------------------------------------===// - -namespace { -/// ConvertToScalarInfo - This class implements the "Convert To Scalar" -/// optimization, which scans the uses of an alloca and determines if it can -/// rewrite it in terms of a single new alloca that can be mem2reg'd. -class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered in bytes. - unsigned AllocaSize; - const DataLayout &DL; - unsigned ScalarLoadThreshold; - - /// IsNotTrivial - This is set to true if there is some access to the object - /// which means that mem2reg can't promote it. - bool IsNotTrivial; - - /// ScalarKind - Tracks the kind of alloca being considered for promotion, - /// computed based on the uses of the alloca rather than the LLVM type system. - enum { - Unknown, - - // Accesses via GEPs that are consistent with element access of a vector - // type. This will not be converted into a vector unless there is a later - // access using an actual vector type. - ImplicitVector, - - // Accesses via vector operations and GEPs that are consistent with the - // layout of a vector type. - Vector, - - // An integer bag-of-bits with bitwise operations for insertion and - // extraction. Any combination of types can be converted into this kind - // of scalar. - Integer - } ScalarKind; - - /// VectorTy - This tracks the type that we should promote the vector to if - /// it is possible to turn it into a vector. This starts out null, and if it - /// isn't possible to turn into a vector type, it gets set to VoidTy. - VectorType *VectorTy; - - /// HadNonMemTransferAccess - True if there is at least one access to the - /// alloca that is not a MemTransferInst. We don't want to turn structs into - /// large integers unless there is some potential for optimization. - bool HadNonMemTransferAccess; - - /// HadDynamicAccess - True if some element of this alloca was dynamic. - /// We don't yet have support for turning a dynamic access into a large - /// integer. - bool HadDynamicAccess; - -public: - explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL, - unsigned SLT) - : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false), - ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false), - HadDynamicAccess(false) { } - - AllocaInst *TryConvert(AllocaInst *AI); - -private: - bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx); - void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); - bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); - void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset, - Value *NonConstantIdx); - - Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, - uint64_t Offset, Value* NonConstantIdx, - IRBuilder<> &Builder); - Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, - uint64_t Offset, Value* NonConstantIdx, - IRBuilder<> &Builder); -}; -} // end anonymous namespace. - - -/// TryConvert - Analyze the specified alloca, and if it is safe to do so, -/// rewrite it to be a new alloca which is mem2reg'able. This returns the new -/// alloca if possible or null if not. -AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { - // If we can't convert this scalar, or if mem2reg can trivially do it, bail - // out. - if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial) - return nullptr; - - // If an alloca has only memset / memcpy uses, it may still have an Unknown - // ScalarKind. Treat it as an Integer below. - if (ScalarKind == Unknown) - ScalarKind = Integer; - - if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) - ScalarKind = Integer; - - // If we were able to find a vector type that can handle this with - // insert/extract elements, and if there was at least one use that had - // a vector type, promote this to a vector. We don't want to promote - // random stuff that doesn't use vectors (e.g. <9 x double>) because then - // we just get a lot of insert/extracts. If at least one vector is - // involved, then we probably really do have a union of vector/array. - Type *NewTy; - if (ScalarKind == Vector) { - assert(VectorTy && "Missing type for vector scalar."); - DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " - << *VectorTy << '\n'); - NewTy = VectorTy; // Use the vector type. - } else { - unsigned BitWidth = AllocaSize * 8; - - // Do not convert to scalar integer if the alloca size exceeds the - // scalar load threshold. - if (BitWidth > ScalarLoadThreshold) - return nullptr; - - if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && - !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth)) - return nullptr; - // Dynamic accesses on integers aren't yet supported. They need us to shift - // by a dynamic amount which could be difficult to work out as we might not - // know whether to use a left or right shift. - if (ScalarKind == Integer && HadDynamicAccess) - return nullptr; - - DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); - // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), BitWidth); - } - AllocaInst *NewAI = - new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front()); - ConvertUsesToScalar(AI, NewAI, 0, nullptr); - return NewAI; -} - -/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type -/// (VectorTy) so far at the offset specified by Offset (which is specified in -/// bytes). -/// -/// There are two cases we handle here: -/// 1) A union of vector types of the same size and potentially its elements. -/// Here we turn element accesses into insert/extract element operations. -/// This promotes a <4 x float> with a store of float to the third element -/// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially -/// large) integer type with extract and insert operations where the loads -/// and stores would mutate the memory. We mark this by setting VectorTy -/// to VoidTy. -void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, - uint64_t Offset) { - // If we already decided to turn this into a blob of integer memory, there is - // nothing to be done. - if (ScalarKind == Integer) - return; - - // If this could be contributing to a vector, analyze it. - - // If the In type is a vector that is the same size as the alloca, see if it - // matches the existing VecTy. - if (VectorType *VInTy = dyn_cast(In)) { - if (MergeInVectorType(VInTy, Offset)) - return; - } else if (In->isFloatTy() || In->isDoubleTy() || - (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && - isPowerOf2_32(In->getPrimitiveSizeInBits()))) { - // Full width accesses can be ignored, because they can always be turned - // into bitcasts. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (EltSize == AllocaSize) - return; - - // If we're accessing something that could be an element of a vector, see - // if the implied vector agrees with what we already have and if Offset is - // compatible with it. - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (!VectorTy || EltSize == VectorTy->getElementType() - ->getPrimitiveSizeInBits()/8)) { - if (!VectorTy) { - ScalarKind = ImplicitVector; - VectorTy = VectorType::get(In, AllocaSize/EltSize); - } - return; - } - } - - // Otherwise, we have a case that we can't handle with an optimized vector - // form. We can still turn this into a large integer. - ScalarKind = Integer; -} - -/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, -/// returning true if the type was successfully merged and false otherwise. -bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, - uint64_t Offset) { - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (!VectorTy) - VectorTy = VInTy; - ScalarKind = Vector; - return true; - } - - return false; -} - -/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all -/// its accesses to a single vector type, return true and set VecTy to -/// the new type. If we could convert the alloca into a single promotable -/// integer, return true but set VecTy to VoidTy. Further, if the use is not a -/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset -/// is the current offset from the base of the alloca being analyzed. -/// -/// If we see at least one access to the value that is as a vector type, set the -/// SawVec flag. -bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, - Value* NonConstantIdx) { - for (User *U : V->users()) { - Instruction *UI = cast(U); - - if (LoadInst *LI = dyn_cast(UI)) { - // Don't break volatile loads. - if (!LI->isSimple()) - return false; - // Don't touch MMX operations. - if (LI->getType()->isX86_MMXTy()) - return false; - HadNonMemTransferAccess = true; - MergeInTypeForLoadOrStore(LI->getType(), Offset); - continue; - } - - if (StoreInst *SI = dyn_cast(UI)) { - // Storing the pointer, not into the value? - if (SI->getOperand(0) == V || !SI->isSimple()) return false; - // Don't touch MMX operations. - if (SI->getOperand(0)->getType()->isX86_MMXTy()) - return false; - HadNonMemTransferAccess = true; - MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); - continue; - } - - if (BitCastInst *BCI = dyn_cast(UI)) { - if (!onlyUsedByLifetimeMarkers(BCI)) - IsNotTrivial = true; // Can't be mem2reg'd. - if (!CanConvertToScalar(BCI, Offset, NonConstantIdx)) - return false; - continue; - } - - if (GetElementPtrInst *GEP = dyn_cast(UI)) { - // If this is a GEP with a variable indices, we can't handle it. - // Compute the offset that this GEP adds to the pointer. - SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - Value *GEPNonConstantIdx = nullptr; - if (!GEP->hasAllConstantIndices()) { - if (!isa(GEP->getSourceElementType())) - return false; - if (NonConstantIdx) - return false; - GEPNonConstantIdx = Indices.pop_back_val(); - if (!GEPNonConstantIdx->getType()->isIntegerTy(32)) - return false; - HadDynamicAccess = true; - } else - GEPNonConstantIdx = NonConstantIdx; - uint64_t GEPOffset = DL.getIndexedOffsetInType(GEP->getSourceElementType(), - Indices); - // See if all uses can be converted. - if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx)) - return false; - IsNotTrivial = true; // Can't be mem2reg'd. - HadNonMemTransferAccess = true; - continue; - } - - // If this is a constant sized memset of a constant value (e.g. 0) we can - // handle it. - if (MemSetInst *MSI = dyn_cast(UI)) { - // Store to dynamic index. - if (NonConstantIdx) - return false; - // Store of constant value. - if (!isa(MSI->getValue())) - return false; - - // Store of constant size. - ConstantInt *Len = dyn_cast(MSI->getLength()); - if (!Len) - return false; - - // If the size differs from the alloca, we can only convert the alloca to - // an integer bag-of-bits. - // FIXME: This should handle all of the cases that are currently accepted - // as vector element insertions. - if (Len->getZExtValue() != AllocaSize || Offset != 0) - ScalarKind = Integer; - - IsNotTrivial = true; // Can't be mem2reg'd. - HadNonMemTransferAccess = true; - continue; - } - - // If this is a memcpy or memmove into or out of the whole allocation, we - // can handle it like a load or store of the scalar type. - if (MemTransferInst *MTI = dyn_cast(UI)) { - // Store to dynamic index. - if (NonConstantIdx) - return false; - ConstantInt *Len = dyn_cast(MTI->getLength()); - if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0) - return false; - - IsNotTrivial = true; // Can't be mem2reg'd. - continue; - } - - // If this is a lifetime intrinsic, we can handle it. - if (IntrinsicInst *II = dyn_cast(UI)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - continue; - } - } - - // Otherwise, we cannot handle this! - return false; - } - - return true; -} - -/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca -/// directly. This happens when we are converting an "integer union" to a -/// single integer scalar, or when we are converting a "vector union" to a -/// vector with insert/extractelement instructions. -/// -/// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. By the end of this, there should be no uses of Ptr. -void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, - uint64_t Offset, - Value* NonConstantIdx) { - while (!Ptr->use_empty()) { - Instruction *User = cast(Ptr->user_back()); - - if (BitCastInst *CI = dyn_cast(User)) { - ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx); - CI->eraseFromParent(); - continue; - } - - if (GetElementPtrInst *GEP = dyn_cast(User)) { - // Compute the offset that this GEP adds to the pointer. - SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - Value* GEPNonConstantIdx = nullptr; - if (!GEP->hasAllConstantIndices()) { - assert(!NonConstantIdx && - "Dynamic GEP reading from dynamic GEP unsupported"); - GEPNonConstantIdx = Indices.pop_back_val(); - } else - GEPNonConstantIdx = NonConstantIdx; - uint64_t GEPOffset = DL.getIndexedOffsetInType(GEP->getSourceElementType(), - Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx); - GEP->eraseFromParent(); - continue; - } - - IRBuilder<> Builder(User); - - if (LoadInst *LI = dyn_cast(User)) { - // The load is a bit extract from NewAI shifted right by Offset bits. - Value *LoadedVal = Builder.CreateLoad(NewAI); - Value *NewLoadVal - = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, - NonConstantIdx, Builder); - LI->replaceAllUsesWith(NewLoadVal); - LI->eraseFromParent(); - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - assert(SI->getOperand(0) != Ptr && "Consistency error!"); - Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); - Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, - NonConstantIdx, Builder); - Builder.CreateStore(New, NewAI); - SI->eraseFromParent(); - - // If the load we just inserted is now dead, then the inserted store - // overwrote the entire thing. - if (Old->use_empty()) - Old->eraseFromParent(); - continue; - } - - // If this is a constant sized memset of a constant value (e.g. 0) we can - // transform it into a store of the expanded constant value. - if (MemSetInst *MSI = dyn_cast(User)) { - assert(MSI->getRawDest() == Ptr && "Consistency error!"); - assert(!NonConstantIdx && "Cannot replace dynamic memset with insert"); - int64_t SNumBytes = cast(MSI->getLength())->getSExtValue(); - if (SNumBytes > 0 && (SNumBytes >> 32) == 0) { - unsigned NumBytes = static_cast(SNumBytes); - unsigned Val = cast(MSI->getValue())->getZExtValue(); - - // Compute the value replicated the right number of times. - APInt APVal(NumBytes*8, Val); - - // Splat the value if non-zero. - if (Val) - for (unsigned i = 1; i != NumBytes; ++i) - APVal |= APVal << 8; - - Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); - Value *New = ConvertScalar_InsertValue( - ConstantInt::get(User->getContext(), APVal), - Old, Offset, nullptr, Builder); - Builder.CreateStore(New, NewAI); - - // If the load we just inserted is now dead, then the memset overwrote - // the entire thing. - if (Old->use_empty()) - Old->eraseFromParent(); - } - MSI->eraseFromParent(); - continue; - } - - // If this is a memcpy or memmove into or out of the whole allocation, we - // can handle it like a load or store of the scalar type. - if (MemTransferInst *MTI = dyn_cast(User)) { - assert(Offset == 0 && "must be store to start of alloca"); - assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert"); - - // If the source and destination are both to the same alloca, then this is - // a noop copy-to-self, just delete it. Otherwise, emit a load and store - // as appropriate. - AllocaInst *OrigAI = cast(GetUnderlyingObject(Ptr, DL, 0)); - - if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) { - // Dest must be OrigAI, change this to be a load from the original - // pointer (bitcasted), then a store to our new alloca. - assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); - Value *SrcPtr = MTI->getSource(); - PointerType* SPTy = cast(SrcPtr->getType()); - PointerType* AIPTy = cast(NewAI->getType()); - if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { - AIPTy = PointerType::get(NewAI->getAllocatedType(), - SPTy->getAddressSpace()); - } - SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); - - LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); - SrcVal->setAlignment(MTI->getAlignment()); - Builder.CreateStore(SrcVal, NewAI); - } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) { - // Src must be OrigAI, change this to be a load from NewAI then a store - // through the original dest pointer (bitcasted). - assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); - LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - - PointerType* DPTy = cast(MTI->getDest()->getType()); - PointerType* AIPTy = cast(NewAI->getType()); - if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { - AIPTy = PointerType::get(NewAI->getAllocatedType(), - DPTy->getAddressSpace()); - } - Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); - - StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); - NewStore->setAlignment(MTI->getAlignment()); - } else { - // Noop transfer. Src == Dst - } - - MTI->eraseFromParent(); - continue; - } - - if (IntrinsicInst *II = dyn_cast(User)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - // There's no need to preserve these, as the resulting alloca will be - // converted to a register anyways. - II->eraseFromParent(); - continue; - } - } - - llvm_unreachable("Unsupported operation!"); - } -} - -/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer -/// or vector value FromVal, extracting the bits from the offset specified by -/// Offset. This returns the value, which is of type ToType. -/// -/// This happens when we are converting an "integer union" to a single -/// integer scalar, or when we are converting a "vector union" to a vector with -/// insert/extractelement instructions. -/// -/// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. -Value *ConvertToScalarInfo:: -ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, - uint64_t Offset, Value* NonConstantIdx, - IRBuilder<> &Builder) { - // If the load is of the whole new alloca, no conversion is needed. - Type *FromType = FromVal->getType(); - if (FromType == ToType && Offset == 0) - return FromVal; - - // If the result alloca is a vector type, this is either an element - // access or a bitcast to another vector type of the same size. - if (VectorType *VTy = dyn_cast(FromType)) { - unsigned FromTypeSize = DL.getTypeAllocSize(FromType); - unsigned ToTypeSize = DL.getTypeAllocSize(ToType); - if (FromTypeSize == ToTypeSize) - return Builder.CreateBitCast(FromVal, ToType); - - // Otherwise it must be an element access. - unsigned Elt = 0; - if (Offset) { - unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType()); - Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - } - // Return the element extracted out of it. - Value *Idx; - if (NonConstantIdx) { - if (Elt) - Idx = Builder.CreateAdd(NonConstantIdx, - Builder.getInt32(Elt), - "dyn.offset"); - else - Idx = NonConstantIdx; - } else - Idx = Builder.getInt32(Elt); - Value *V = Builder.CreateExtractElement(FromVal, Idx); - if (V->getType() != ToType) - V = Builder.CreateBitCast(V, ToType); - return V; - } - - // If ToType is a first class aggregate, extract out each of the pieces and - // use insertvalue's to form the FCA. - if (StructType *ST = dyn_cast(ToType)) { - assert(!NonConstantIdx && - "Dynamic indexing into struct types not supported"); - const StructLayout &Layout = *DL.getStructLayout(ST); - Value *Res = UndefValue::get(ST); - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), - Offset+Layout.getElementOffsetInBits(i), - nullptr, Builder); - Res = Builder.CreateInsertValue(Res, Elt, i); - } - return Res; - } - - if (ArrayType *AT = dyn_cast(ToType)) { - assert(!NonConstantIdx && - "Dynamic indexing into array types not supported"); - uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); - Value *Res = UndefValue::get(AT); - for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, nullptr, - Builder); - Res = Builder.CreateInsertValue(Res, Elt, i); - } - return Res; - } - - // Otherwise, this must be a union that was converted to an integer value. - IntegerType *NTy = cast(FromVal->getType()); - - // If this is a big-endian system and the load is narrower than the - // full alloca type, we need to do a shift to get the right bits. - int ShAmt = 0; - if (DL.isBigEndian()) { - // On big-endian machines, the lowest bit is stored at the bit offset - // from the pointer given by getTypeStoreSizeInBits. This matters for - // integers with a bitwidth that is not a multiple of 8. - ShAmt = DL.getTypeStoreSizeInBits(NTy) - - DL.getTypeStoreSizeInBits(ToType) - Offset; - } else { - ShAmt = Offset; - } - - // Note: we support negative bitwidths (with shl) which are not defined. - // We do this to support (f.e.) loads off the end of a structure where - // only some bits are used. - if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateLShr(FromVal, - ConstantInt::get(FromVal->getType(), ShAmt)); - else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateShl(FromVal, - ConstantInt::get(FromVal->getType(), -ShAmt)); - - // Finally, unconditionally truncate the integer to the right width. - unsigned LIBitWidth = DL.getTypeSizeInBits(ToType); - if (LIBitWidth < NTy->getBitWidth()) - FromVal = - Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth)); - else if (LIBitWidth > NTy->getBitWidth()) - FromVal = - Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth)); - - // If the result is an integer, this is a trunc or bitcast. - if (ToType->isIntegerTy()) { - // Should be done. - } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { - // Just do a bitcast, we know the sizes match up. - FromVal = Builder.CreateBitCast(FromVal, ToType); - } else { - // Otherwise must be a pointer. - FromVal = Builder.CreateIntToPtr(FromVal, ToType); - } - assert(FromVal->getType() == ToType && "Didn't convert right?"); - return FromVal; -} - -/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer -/// or vector value "Old" at the offset specified by Offset. -/// -/// This happens when we are converting an "integer union" to a -/// single integer scalar, or when we are converting a "vector union" to a -/// vector with insert/extractelement instructions. -/// -/// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. -/// -/// NonConstantIdx is an index value if there was a GEP with a non-constant -/// index value. If this is 0 then all GEPs used to find this insert address -/// are constant. -Value *ConvertToScalarInfo:: -ConvertScalar_InsertValue(Value *SV, Value *Old, - uint64_t Offset, Value* NonConstantIdx, - IRBuilder<> &Builder) { - // Convert the stored type to the actual type, shift it left to insert - // then 'or' into place. - Type *AllocaType = Old->getType(); - LLVMContext &Context = Old->getContext(); - - if (VectorType *VTy = dyn_cast(AllocaType)) { - uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy); - uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType()); - - // Changing the whole vector with memset or with an access of a different - // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType); - - // Must be an element insertion. - Type *EltTy = VTy->getElementType(); - if (SV->getType() != EltTy) - SV = Builder.CreateBitCast(SV, EltTy); - uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy); - unsigned Elt = Offset/EltSize; - Value *Idx; - if (NonConstantIdx) { - if (Elt) - Idx = Builder.CreateAdd(NonConstantIdx, - Builder.getInt32(Elt), - "dyn.offset"); - else - Idx = NonConstantIdx; - } else - Idx = Builder.getInt32(Elt); - return Builder.CreateInsertElement(Old, SV, Idx); - } - - // If SV is a first-class aggregate value, insert each value recursively. - if (StructType *ST = dyn_cast(SV->getType())) { - assert(!NonConstantIdx && - "Dynamic indexing into struct types not supported"); - const StructLayout &Layout = *DL.getStructLayout(ST); - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, - Offset+Layout.getElementOffsetInBits(i), - nullptr, Builder); - } - return Old; - } - - if (ArrayType *AT = dyn_cast(SV->getType())) { - assert(!NonConstantIdx && - "Dynamic indexing into array types not supported"); - uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); - for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr, - Builder); - } - return Old; - } - - // If SV is a float, convert it to the appropriate integer type. - // If it is a pointer, do the same. - unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType()); - unsigned DestWidth = DL.getTypeSizeInBits(AllocaType); - unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType()); - unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType); - if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) - SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); - else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType())); - - // Zero extend or truncate the value if needed. - if (SV->getType() != AllocaType) { - if (SV->getType()->getPrimitiveSizeInBits() < - AllocaType->getPrimitiveSizeInBits()) - SV = Builder.CreateZExt(SV, AllocaType); - else { - // Truncation may be needed if storing more than the alloca can hold - // (undefined behavior). - SV = Builder.CreateTrunc(SV, AllocaType); - SrcWidth = DestWidth; - SrcStoreWidth = DestStoreWidth; - } - } - - // If this is a big-endian system and the store is narrower than the - // full alloca type, we need to do a shift to get the right bits. - int ShAmt = 0; - if (DL.isBigEndian()) { - // On big-endian machines, the lowest bit is stored at the bit offset - // from the pointer given by getTypeStoreSizeInBits. This matters for - // integers with a bitwidth that is not a multiple of 8. - ShAmt = DestStoreWidth - SrcStoreWidth - Offset; - } else { - ShAmt = Offset; - } - - // Note: we support negative bitwidths (with shr) which are not defined. - // We do this to support (f.e.) stores off the end of a structure where - // only some bits in the structure are set. - APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); - if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); - Mask <<= ShAmt; - } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); - Mask = Mask.lshr(-ShAmt); - } - - // Mask out the bits we are about to insert from the old value, and or - // in the new bits. - if (SrcWidth != DestWidth) { - assert(DestWidth > SrcWidth); - Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); - SV = Builder.CreateOr(Old, SV, "ins"); - } - return SV; -} - - -//===----------------------------------------------------------------------===// -// SRoA Driver -//===----------------------------------------------------------------------===// - - -bool SROA::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - bool Changed = performPromotion(F); - - while (1) { - bool LocalChange = performScalarRepl(F); - if (!LocalChange) break; // No need to repromote if no scalarrepl - Changed = true; - LocalChange = performPromotion(F); - if (!LocalChange) break; // No need to re-scalarrepl if no promotion - } - - return Changed; -} - -namespace { -class AllocaPromoter : public LoadAndStorePromoter { - AllocaInst *AI; - DIBuilder *DIB; - SmallVector DDIs; - SmallVector DVIs; -public: - AllocaPromoter(ArrayRef Insts, SSAUpdater &S, - DIBuilder *DB) - : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {} - - void run(AllocaInst *AI, const SmallVectorImpl &Insts) { - // Remember which alloca we're promoting (for isInstInList). - this->AI = AI; - if (auto *L = LocalAsMetadata::getIfExists(AI)) { - if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) { - for (User *U : DINode->users()) - if (DbgDeclareInst *DDI = dyn_cast(U)) - DDIs.push_back(DDI); - else if (DbgValueInst *DVI = dyn_cast(U)) - DVIs.push_back(DVI); - } - } - - LoadAndStorePromoter::run(Insts); - AI->eraseFromParent(); - for (SmallVectorImpl::iterator I = DDIs.begin(), - E = DDIs.end(); I != E; ++I) { - DbgDeclareInst *DDI = *I; - DDI->eraseFromParent(); - } - for (SmallVectorImpl::iterator I = DVIs.begin(), - E = DVIs.end(); I != E; ++I) { - DbgValueInst *DVI = *I; - DVI->eraseFromParent(); - } - } - - bool isInstInList(Instruction *I, - const SmallVectorImpl &Insts) const override { - if (LoadInst *LI = dyn_cast(I)) - return LI->getOperand(0) == AI; - return cast(I)->getPointerOperand() == AI; - } - - void updateDebugInfo(Instruction *Inst) const override { - for (SmallVectorImpl::const_iterator I = DDIs.begin(), - E = DDIs.end(); I != E; ++I) { - DbgDeclareInst *DDI = *I; - if (StoreInst *SI = dyn_cast(Inst)) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); - else if (LoadInst *LI = dyn_cast(Inst)) - ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); - } - for (SmallVectorImpl::const_iterator I = DVIs.begin(), - E = DVIs.end(); I != E; ++I) { - DbgValueInst *DVI = *I; - Value *Arg = nullptr; - if (StoreInst *SI = dyn_cast(Inst)) { - // If an argument is zero extended then use argument directly. The ZExt - // may be zapped by an optimization pass in future. - if (ZExtInst *ZExt = dyn_cast(SI->getOperand(0))) - Arg = dyn_cast(ZExt->getOperand(0)); - if (SExtInst *SExt = dyn_cast(SI->getOperand(0))) - Arg = dyn_cast(SExt->getOperand(0)); - if (!Arg) - Arg = SI->getOperand(0); - } else if (LoadInst *LI = dyn_cast(Inst)) { - Arg = LI->getOperand(0); - } else { - continue; - } - DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(), - DVI->getExpression(), DVI->getDebugLoc(), - Inst); - } - } -}; -} // end anon namespace - -/// isSafeSelectToSpeculate - Select instructions that use an alloca and are -/// subsequently loaded can be rewritten to load both input pointers and then -/// select between the result, allowing the load of the alloca to be promoted. -/// From this: -/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other -/// %V = load i32* %P2 -/// to: -/// %V1 = load i32* %Alloca -> will be mem2reg'd -/// %V2 = load i32* %Other -/// %V = select i1 %cond, i32 %V1, i32 %V2 -/// -/// We can do this to a select if its only uses are loads and if the operand to -/// the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst *SI) { - const DataLayout &DL = SI->getModule()->getDataLayout(); - - for (User *U : SI->users()) { - LoadInst *LI = dyn_cast(U); - if (!LI || !LI->isSimple()) return false; - - // Both operands to the select need to be dereferencable, either absolutely - // (e.g. allocas) or at this point because we can see other accesses to it. - if (!isSafeToLoadUnconditionally(SI->getTrueValue(), LI->getAlignment(), - DL, LI)) - return false; - if (!isSafeToLoadUnconditionally(SI->getFalseValue(), LI->getAlignment(), - DL, LI)) - return false; - } - - return true; -} - -/// isSafePHIToSpeculate - PHI instructions that use an alloca and are -/// subsequently loaded can be rewritten to load both input pointers in the pred -/// blocks and then PHI the results, allowing the load of the alloca to be -/// promoted. -/// From this: -/// %P2 = phi [i32* %Alloca, i32* %Other] -/// %V = load i32* %P2 -/// to: -/// %V1 = load i32* %Alloca -> will be mem2reg'd -/// ... -/// %V2 = load i32* %Other -/// ... -/// %V = phi [i32 %V1, i32 %V2] -/// -/// We can do this to a select if its only uses are loads and if the operand to -/// the select can be loaded unconditionally. -static bool isSafePHIToSpeculate(PHINode *PN) { - // For now, we can only do this promotion if the load is in the same block as - // the PHI, and if there are no stores between the phi and load. - // TODO: Allow recursive phi users. - // TODO: Allow stores. - BasicBlock *BB = PN->getParent(); - unsigned MaxAlign = 0; - for (User *U : PN->users()) { - LoadInst *LI = dyn_cast(U); - if (!LI || !LI->isSimple()) return false; - - // For now we only allow loads in the same block as the PHI. This is a - // common case that happens when instcombine merges two loads through a PHI. - if (LI->getParent() != BB) return false; - - // Ensure that there are no instructions between the PHI and the load that - // could store. - for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) - if (BBI->mayWriteToMemory()) - return false; - - MaxAlign = std::max(MaxAlign, LI->getAlignment()); - } - - const DataLayout &DL = PN->getModule()->getDataLayout(); - - // Okay, we know that we have one or more loads in the same block as the PHI. - // We can transform this if it is safe to push the loads into the predecessor - // blocks. The only thing to watch out for is that we can't put a possibly - // trapping load in the predecessor if it is a critical edge. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); - Value *InVal = PN->getIncomingValue(i); - - // If the terminator of the predecessor has side-effects (an invoke), - // there is no safe place to put a load in the predecessor. - if (Pred->getTerminator()->mayHaveSideEffects()) - return false; - - // If the value is produced by the terminator of the predecessor - // (an invoke), there is no valid place to put a load in the predecessor. - if (Pred->getTerminator() == InVal) - return false; - - // If the predecessor has a single successor, then the edge isn't critical. - if (Pred->getTerminator()->getNumSuccessors() == 1) - continue; - - // If this pointer is always safe to load, or if we can prove that there is - // already a load in the block, then we can move the load to the pred block. - if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, Pred->getTerminator())) - continue; - - return false; - } - - return true; -} - - -/// tryToMakeAllocaBePromotable - This returns true if the alloca only has -/// direct (non-volatile) loads and stores to it. If the alloca is close but -/// not quite there, this will transform the code to allow promotion. As such, -/// it is a non-pure predicate. -static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) { - SetVector, - SmallPtrSet > InstsToRewrite; - for (User *U : AI->users()) { - if (LoadInst *LI = dyn_cast(U)) { - if (!LI->isSimple()) - return false; - continue; - } - - if (StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI || !SI->isSimple()) - return false; // Don't allow a store OF the AI, only INTO the AI. - continue; - } - - if (SelectInst *SI = dyn_cast(U)) { - // If the condition being selected on is a constant, fold the select, yes - // this does (rarely) happen early on. - if (ConstantInt *CI = dyn_cast(SI->getCondition())) { - Value *Result = SI->getOperand(1+CI->isZero()); - SI->replaceAllUsesWith(Result); - SI->eraseFromParent(); - - // This is very rare and we just scrambled the use list of AI, start - // over completely. - return tryToMakeAllocaBePromotable(AI, DL); - } - - // If it is safe to turn "load (select c, AI, ptr)" into a select of two - // loads, then we can transform this by rewriting the select. - if (!isSafeSelectToSpeculate(SI)) - return false; - - InstsToRewrite.insert(SI); - continue; - } - - if (PHINode *PN = dyn_cast(U)) { - if (PN->use_empty()) { // Dead PHIs can be stripped. - InstsToRewrite.insert(PN); - continue; - } - - // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads - // in the pred blocks, then we can transform this by rewriting the PHI. - if (!isSafePHIToSpeculate(PN)) - return false; - - InstsToRewrite.insert(PN); - continue; - } - - if (BitCastInst *BCI = dyn_cast(U)) { - if (onlyUsedByLifetimeMarkers(BCI)) { - InstsToRewrite.insert(BCI); - continue; - } - } - - return false; - } - - // If there are no instructions to rewrite, then all uses are load/stores and - // we're done! - if (InstsToRewrite.empty()) - return true; - - // If we have instructions that need to be rewritten for this to be promotable - // take care of it now. - for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { - if (BitCastInst *BCI = dyn_cast(InstsToRewrite[i])) { - // This could only be a bitcast used by nothing but lifetime intrinsics. - for (BitCastInst::user_iterator I = BCI->user_begin(), E = BCI->user_end(); - I != E;) - cast(*I++)->eraseFromParent(); - BCI->eraseFromParent(); - continue; - } - - if (SelectInst *SI = dyn_cast(InstsToRewrite[i])) { - // Selects in InstsToRewrite only have load uses. Rewrite each as two - // loads with a new select. - while (!SI->use_empty()) { - LoadInst *LI = cast(SI->user_back()); - - IRBuilder<> Builder(LI); - LoadInst *TrueLoad = - Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); - LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); - - // Transfer alignment and AA info if present. - TrueLoad->setAlignment(LI->getAlignment()); - FalseLoad->setAlignment(LI->getAlignment()); - - AAMDNodes Tags; - LI->getAAMetadata(Tags); - if (Tags) { - TrueLoad->setAAMetadata(Tags); - FalseLoad->setAAMetadata(Tags); - } - - Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); - V->takeName(LI); - LI->replaceAllUsesWith(V); - LI->eraseFromParent(); - } - - // Now that all the loads are gone, the select is gone too. - SI->eraseFromParent(); - continue; - } - - // Otherwise, we have a PHI node which allows us to push the loads into the - // predecessors. - PHINode *PN = cast(InstsToRewrite[i]); - if (PN->use_empty()) { - PN->eraseFromParent(); - continue; - } - - Type *LoadTy = AI->getAllocatedType(); - PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), - PN->getName()+".ld", PN); - - // Get the AA tags and alignment to use from one of the loads. It doesn't - // matter which one we get and if any differ, it doesn't matter. - LoadInst *SomeLoad = cast(PN->user_back()); - - AAMDNodes AATags; - SomeLoad->getAAMetadata(AATags); - unsigned Align = SomeLoad->getAlignment(); - - // Rewrite all loads of the PN to use the new PHI. - while (!PN->use_empty()) { - LoadInst *LI = cast(PN->user_back()); - LI->replaceAllUsesWith(NewPN); - LI->eraseFromParent(); - } - - // Inject loads into all of the pred blocks. Keep track of which blocks we - // insert them into in case we have multiple edges from the same block. - DenseMap InsertedLoads; - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); - LoadInst *&Load = InsertedLoads[Pred]; - if (!Load) { - Load = new LoadInst(PN->getIncomingValue(i), - PN->getName() + "." + Pred->getName(), - Pred->getTerminator()); - Load->setAlignment(Align); - if (AATags) Load->setAAMetadata(AATags); - } - - NewPN->addIncoming(Load, Pred); - } - - PN->eraseFromParent(); - } - - ++NumAdjusted; - return true; -} - -bool SROA::performPromotion(Function &F) { - std::vector Allocas; - const DataLayout &DL = F.getParent()->getDataLayout(); - DominatorTree *DT = nullptr; - if (HasDomTree) - DT = &getAnalysis().getDomTree(); - AssumptionCache &AC = - getAnalysis().getAssumptionCache(F); - - BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); - bool Changed = false; - SmallVector Insts; - while (1) { - Allocas.clear(); - - // Find allocas that are safe to promote, by looking at all instructions in - // the entry node - for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? - if (tryToMakeAllocaBePromotable(AI, DL)) - Allocas.push_back(AI); - - if (Allocas.empty()) break; - - if (HasDomTree) - PromoteMemToReg(Allocas, *DT, nullptr, &AC); - else { - SSAUpdater SSA; - for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { - AllocaInst *AI = Allocas[i]; - - // Build list of instructions to promote. - for (User *U : AI->users()) - Insts.push_back(cast(U)); - AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); - Insts.clear(); - } - } - NumPromoted += Allocas.size(); - Changed = true; - } - - return Changed; -} - - -/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for -/// SROA. It must be a struct or array type with a small number of elements. -bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { - Type *T = AI->getAllocatedType(); - // Do not promote any struct that has too many members. - if (StructType *ST = dyn_cast(T)) - return ST->getNumElements() <= StructMemberThreshold; - // Do not promote any array that has too many elements. - if (ArrayType *AT = dyn_cast(T)) - return AT->getNumElements() <= ArrayElementThreshold; - return false; -} - -// performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the alloca instructions in the entry block, removing -// them if they are only used by getelementptr instructions. -// -bool SROA::performScalarRepl(Function &F) { - std::vector WorkList; - const DataLayout &DL = F.getParent()->getDataLayout(); - - // Scan the entry basic block, adding allocas to the worklist. - BasicBlock &BB = F.getEntryBlock(); - for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) - if (AllocaInst *A = dyn_cast(I)) - WorkList.push_back(A); - - // Process the worklist - bool Changed = false; - while (!WorkList.empty()) { - AllocaInst *AI = WorkList.back(); - WorkList.pop_back(); - - // Handle dead allocas trivially. These can be formed by SROA'ing arrays - // with unused elements. - if (AI->use_empty()) { - AI->eraseFromParent(); - Changed = true; - continue; - } - - // If this alloca is impossible for us to promote, reject it early. - if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) - continue; - - // Check to see if we can perform the core SROA transformation. We cannot - // transform the allocation instruction if it is an array allocation - // (allocations OF arrays are ok though), and an allocation of a scalar - // value cannot be decomposed at all. - uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType()); - - // Do not promote [0 x %struct]. - if (AllocaSize == 0) continue; - - // Do not promote any struct whose size is too big. - if (AllocaSize > SRThreshold) continue; - - // If the alloca looks like a good candidate for scalar replacement, and if - // all its users can be transformed, then split up the aggregate into its - // separate elements. - if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { - DoScalarReplacement(AI, WorkList); - Changed = true; - continue; - } - - // If we can turn this aggregate value (potentially with casts) into a - // simple scalar value that can be mem2reg'd into a register value. - // IsNotTrivial tracks whether this is something that mem2reg could have - // promoted itself. If so, we don't want to transform it needlessly. Note - // that we can't just check based on the type: the alloca may be of an i32 - // but that has pointer arithmetic to set byte 3 of it or something. - if (AllocaInst *NewAI = - ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold) - .TryConvert(AI)) { - NewAI->takeName(AI); - AI->eraseFromParent(); - ++NumConverted; - Changed = true; - continue; - } - - // Otherwise, couldn't process this alloca. - } - - return Changed; -} - -/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl -/// predicate, do SROA now. -void SROA::DoScalarReplacement(AllocaInst *AI, - std::vector &WorkList) { - DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); - SmallVector ElementAllocas; - if (StructType *ST = dyn_cast(AI->getAllocatedType())) { - ElementAllocas.reserve(ST->getNumContainedTypes()); - for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr, - AI->getAlignment(), - AI->getName() + "." + Twine(i), AI); - ElementAllocas.push_back(NA); - WorkList.push_back(NA); // Add to worklist for recursive processing - } - } else { - ArrayType *AT = cast(AI->getAllocatedType()); - ElementAllocas.reserve(AT->getNumElements()); - Type *ElTy = AT->getElementType(); - for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(), - AI->getName() + "." + Twine(i), AI); - ElementAllocas.push_back(NA); - WorkList.push_back(NA); // Add to worklist for recursive processing - } - } - - // Now that we have created the new alloca instructions, rewrite all the - // uses of the old alloca. - RewriteForScalarRepl(AI, AI, 0, ElementAllocas); - - // Now erase any instructions that were made dead while rewriting the alloca. - DeleteDeadInstructions(); - AI->eraseFromParent(); - - ++NumReplaced; -} - -/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, -/// recursively including all their operands that become trivially dead. -void SROA::DeleteDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = cast(DeadInsts.pop_back_val()); - - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast(*OI)) { - // Zero out the operand and see if it becomes trivially dead. - // (But, don't add allocas to the dead instruction list -- they are - // already on the worklist and will be deleted separately.) - *OI = nullptr; - if (isInstructionTriviallyDead(U) && !isa(U)) - DeadInsts.push_back(U); - } - - I->eraseFromParent(); - } -} - -/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to -/// performing scalar replacement of alloca AI. The results are flagged in -/// the Info parameter. Offset indicates the position within AI that is -/// referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, - AllocaInfo &Info) { - const DataLayout &DL = I->getModule()->getDataLayout(); - for (Use &U : I->uses()) { - Instruction *User = cast(U.getUser()); - - if (BitCastInst *BC = dyn_cast(User)) { - isSafeForScalarRepl(BC, Offset, Info); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, GEPOffset, Info); - if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, GEPOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast(User)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - if (!Length || Length->isNegative()) - return MarkUnsafe(Info, User); - - isSafeMemAccess(Offset, Length->getZExtValue(), nullptr, - U.getOperandNo() == 0, Info, MI, - true /*AllowWholeAccess*/); - } else if (LoadInst *LI = dyn_cast(User)) { - if (!LI->isSimple()) - return MarkUnsafe(Info, User); - Type *LIType = LI->getType(); - isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, - LI, true /*AllowWholeAccess*/); - Info.hasALoadOrStore = true; - - } else if (StoreInst *SI = dyn_cast(User)) { - // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isSimple() || SI->getOperand(0) == I) - return MarkUnsafe(Info, User); - - Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, - SI, true /*AllowWholeAccess*/); - Info.hasALoadOrStore = true; - } else if (IntrinsicInst *II = dyn_cast(User)) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) - return MarkUnsafe(Info, User); - } else if (isa(User) || isa(User)) { - isSafePHISelectUseForScalarRepl(User, Offset, Info); - } else { - return MarkUnsafe(Info, User); - } - if (Info.isUnsafe) return; - } -} - - -/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer -/// derived from the alloca, we can often still split the alloca into elements. -/// This is useful if we have a large alloca where one element is phi'd -/// together somewhere: we can SRoA and promote all the other elements even if -/// we end up not being able to promote this one. -/// -/// All we require is that the uses of the PHI do not index into other parts of -/// the alloca. The most important use case for this is single load and stores -/// that are PHI'd together, which can happen due to code sinking. -void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, - AllocaInfo &Info) { - // If we've already checked this PHI, don't do it again. - if (PHINode *PN = dyn_cast(I)) - if (!Info.CheckedPHIs.insert(PN).second) - return; - - const DataLayout &DL = I->getModule()->getDataLayout(); - for (User *U : I->users()) { - Instruction *UI = cast(U); - - if (BitCastInst *BC = dyn_cast(UI)) { - isSafePHISelectUseForScalarRepl(BC, Offset, Info); - } else if (GetElementPtrInst *GEPI = dyn_cast(UI)) { - // Only allow "bitcast" GEPs for simplicity. We could generalize this, - // but would have to prove that we're staying inside of an element being - // promoted. - if (!GEPI->hasAllZeroIndices()) - return MarkUnsafe(Info, UI); - isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); - } else if (LoadInst *LI = dyn_cast(UI)) { - if (!LI->isSimple()) - return MarkUnsafe(Info, UI); - Type *LIType = LI->getType(); - isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, - LI, false /*AllowWholeAccess*/); - Info.hasALoadOrStore = true; - - } else if (StoreInst *SI = dyn_cast(UI)) { - // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isSimple() || SI->getOperand(0) == I) - return MarkUnsafe(Info, UI); - - Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, - SI, false /*AllowWholeAccess*/); - Info.hasALoadOrStore = true; - } else if (isa(UI) || isa(UI)) { - isSafePHISelectUseForScalarRepl(UI, Offset, Info); - } else { - return MarkUnsafe(Info, UI); - } - if (Info.isUnsafe) return; - } -} - -/// isSafeGEP - Check if a GEP instruction can be handled for scalar -/// replacement. It is safe when all the indices are constant, in-bounds -/// references, and when the resulting offset corresponds to an element within -/// the alloca type. The results are flagged in the Info parameter. Upon -/// return, Offset is adjusted as specified by the GEP indices. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, - uint64_t &Offset, AllocaInfo &Info) { - gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); - if (GEPIt == E) - return; - bool NonConstant = false; - unsigned NonConstantIdxSize = 0; - - // Walk through the GEP type indices, checking the types that this indexes - // into. - for (; GEPIt != E; ++GEPIt) { - // Ignore struct elements, no extra checking needed for these. - if ((*GEPIt)->isStructTy()) - continue; - - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info, GEPI); - } - - // Compute the offset due to this GEP and check if the alloca has a - // component element at that offset. - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - // If this GEP is non-constant then the last operand must have been a - // dynamic index into a vector. Pop this now as it has no impact on the - // constant part of the offset. - if (NonConstant) - Indices.pop_back(); - - const DataLayout &DL = GEPI->getModule()->getDataLayout(); - Offset += DL.getIndexedOffsetInType(GEPI->getSourceElementType(), Indices); - if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize, - DL)) - MarkUnsafe(Info, GEPI); -} - -/// isHomogeneousAggregate - Check if type T is a struct or array containing -/// elements of the same type (which is always true for arrays). If so, -/// return true with NumElts and EltTy set to the number of elements and the -/// element type, respectively. -static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, - Type *&EltTy) { - if (ArrayType *AT = dyn_cast(T)) { - NumElts = AT->getNumElements(); - EltTy = (NumElts == 0 ? nullptr : AT->getElementType()); - return true; - } - if (StructType *ST = dyn_cast(T)) { - NumElts = ST->getNumContainedTypes(); - EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0)); - for (unsigned n = 1; n < NumElts; ++n) { - if (ST->getContainedType(n) != EltTy) - return false; - } - return true; - } - return false; -} - -/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are -/// "homogeneous" aggregates with the same element type and number of elements. -static bool isCompatibleAggregate(Type *T1, Type *T2) { - if (T1 == T2) - return true; - - unsigned NumElts1, NumElts2; - Type *EltTy1, *EltTy2; - if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && - isHomogeneousAggregate(T2, NumElts2, EltTy2) && - NumElts1 == NumElts2 && - EltTy1 == EltTy2) - return true; - - return false; -} - -/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI -/// alloca or has an offset and size that corresponds to a component element -/// within it. The offset checked here may have been formed from a GEP with a -/// pointer bitcasted to a different type. -/// -/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a -/// unit. If false, it only allows accesses known to be in a single element. -void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - Type *MemOpType, bool isStore, - AllocaInfo &Info, Instruction *TheAccess, - bool AllowWholeAccess) { - const DataLayout &DL = TheAccess->getModule()->getDataLayout(); - // Check if this is a load/store of the entire alloca. - if (Offset == 0 && AllowWholeAccess && - MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) { - // This can be safe for MemIntrinsics (where MemOpType is 0) and integer - // loads/stores (which are essentially the same as the MemIntrinsics with - // regard to copying padding between elements). But, if an alloca is - // flagged as both a source and destination of such operations, we'll need - // to check later for padding between elements. - if (!MemOpType || MemOpType->isIntegerTy()) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - return; - } - // This is also safe for references using a type that is compatible with - // the type of the alloca, so that loads/stores can be rewritten using - // insertvalue/extractvalue. - if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { - Info.hasSubelementAccess = true; - return; - } - } - // Check if the offset/size correspond to a component within the alloca type. - Type *T = Info.AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize, DL)) { - Info.hasSubelementAccess = true; - return; - } - - return MarkUnsafe(Info, TheAccess); -} - -/// TypeHasComponent - Return true if T has a component type with the -/// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, - const DataLayout &DL) { - Type *EltTy; - uint64_t EltSize; - if (StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = DL.getStructLayout(ST); - unsigned EltIdx = Layout->getElementContainingOffset(Offset); - EltTy = ST->getContainedType(EltIdx); - EltSize = DL.getTypeAllocSize(EltTy); - Offset -= Layout->getElementOffset(EltIdx); - } else if (ArrayType *AT = dyn_cast(T)) { - EltTy = AT->getElementType(); - EltSize = DL.getTypeAllocSize(EltTy); - if (Offset >= AT->getNumElements() * EltSize) - return false; - Offset %= EltSize; - } else if (VectorType *VT = dyn_cast(T)) { - EltTy = VT->getElementType(); - EltSize = DL.getTypeAllocSize(EltTy); - if (Offset >= VT->getNumElements() * EltSize) - return false; - Offset %= EltSize; - } else { - return false; - } - if (Offset == 0 && (Size == 0 || EltSize == Size)) - return true; - // Check if the component spans multiple elements. - if (Offset + Size > EltSize) - return false; - return TypeHasComponent(EltTy, Offset, Size, DL); -} - -/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite -/// the instruction I, which references it, to use the separate elements. -/// Offset indicates the position within AI that is referenced by this -/// instruction. -void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVectorImpl &NewElts) { - const DataLayout &DL = I->getModule()->getDataLayout(); - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { - Use &TheUse = *UI++; - Instruction *User = cast(TheUse.getUser()); - - if (BitCastInst *BC = dyn_cast(User)) { - RewriteBitCast(BC, AI, Offset, NewElts); - continue; - } - - if (GetElementPtrInst *GEPI = dyn_cast(User)) { - RewriteGEP(GEPI, AI, Offset, NewElts); - continue; - } - - if (MemIntrinsic *MI = dyn_cast(User)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType())) - RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); - // Otherwise the intrinsic can only touch a single element and the - // address operand will be updated, so nothing else needs to be done. - continue; - } - - if (IntrinsicInst *II = dyn_cast(User)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); - } - continue; - } - - if (LoadInst *LI = dyn_cast(User)) { - Type *LIType = LI->getType(); - - if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - Value *Insert = UndefValue::get(LIType); - IRBuilder<> Builder(LI); - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = Builder.CreateLoad(NewElts[i], "load"); - Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); - } - LI->replaceAllUsesWith(Insert); - DeadInsts.push_back(LI); - } else if (LIType->isIntegerTy() && - DL.getTypeAllocSize(LIType) == - DL.getTypeAllocSize(AI->getAllocatedType())) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - } - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - Type *SIType = Val->getType(); - if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - IRBuilder<> Builder(SI); - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); - Builder.CreateStore(Extract, NewElts[i]); - } - DeadInsts.push_back(SI); - } else if (SIType->isIntegerTy() && - DL.getTypeAllocSize(SIType) == - DL.getTypeAllocSize(AI->getAllocatedType())) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - } - continue; - } - - if (isa(User) || isa(User)) { - // If we have a PHI user of the alloca itself (as opposed to a GEP or - // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to - // the new pointer. - if (!isa(I)) continue; - - assert(Offset == 0 && NewElts[0] && - "Direct alloca use should have a zero offset"); - - // If we have a use of the alloca, we know the derived uses will be - // utilizing just the first element of the scalarized result. Insert a - // bitcast of the first alloca before the user as required. - AllocaInst *NewAI = NewElts[0]; - BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); - NewAI->moveBefore(BCI); - TheUse = BCI; - continue; - } - } -} - -/// RewriteBitCast - Update a bitcast reference to the alloca being replaced -/// and recursively continue updating all of its uses. -void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVectorImpl &NewElts) { - RewriteForScalarRepl(BC, AI, Offset, NewElts); - if (BC->getOperand(0) != AI) - return; - - // The bitcast references the original alloca. Replace its uses with - // references to the alloca containing offset zero (which is normally at - // index zero, but might not be in cases involving structs with elements - // of size zero). - Type *T = AI->getAllocatedType(); - uint64_t EltOffset = 0; - Type *IdxTy; - uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, - BC->getModule()->getDataLayout()); - Instruction *Val = NewElts[Idx]; - if (Val->getType() != BC->getDestTy()) { - Val = new BitCastInst(Val, BC->getDestTy(), "", BC); - Val->takeName(BC); - } - BC->replaceAllUsesWith(Val); - DeadInsts.push_back(BC); -} - -/// FindElementAndOffset - Return the index of the element containing Offset -/// within the specified type, which must be either a struct or an array. -/// Sets T to the type of the element and Offset to the offset within that -/// element. IdxTy is set to the type of the index result to be used in a -/// GEP instruction. -uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, - const DataLayout &DL) { - uint64_t Idx = 0; - - if (StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = DL.getStructLayout(ST); - Idx = Layout->getElementContainingOffset(Offset); - T = ST->getContainedType(Idx); - Offset -= Layout->getElementOffset(Idx); - IdxTy = Type::getInt32Ty(T->getContext()); - return Idx; - } else if (ArrayType *AT = dyn_cast(T)) { - T = AT->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(T); - Idx = Offset / EltSize; - Offset -= Idx * EltSize; - IdxTy = Type::getInt64Ty(T->getContext()); - return Idx; - } - VectorType *VT = cast(T); - T = VT->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(T); - Idx = Offset / EltSize; - Offset -= Idx * EltSize; - IdxTy = Type::getInt64Ty(T->getContext()); - return Idx; -} - -/// RewriteGEP - Check if this GEP instruction moves the pointer across -/// elements of the alloca that are being split apart, and if so, rewrite -/// the GEP to be relative to the new element. -void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVectorImpl &NewElts) { - uint64_t OldOffset = Offset; - const DataLayout &DL = GEPI->getModule()->getDataLayout(); - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - // If the GEP was dynamic then it must have been a dynamic vector lookup. - // In this case, it must be the last GEP operand which is dynamic so keep that - // aside until we've found the constant GEP offset then add it back in at the - // end. - Value* NonConstantIdx = nullptr; - if (!GEPI->hasAllConstantIndices()) - NonConstantIdx = Indices.pop_back_val(); - Offset += DL.getIndexedOffsetInType(GEPI->getSourceElementType(), Indices); - - RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - - Type *T = AI->getAllocatedType(); - Type *IdxTy; - uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL); - if (GEPI->getOperand(0) == AI) - OldIdx = ~0ULL; // Force the GEP to be rewritten. - - T = AI->getAllocatedType(); - uint64_t EltOffset = Offset; - uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL); - - // If this GEP does not move the pointer across elements of the alloca - // being split, then it does not needs to be rewritten. - if (Idx == OldIdx) - return; - - Type *i32Ty = Type::getInt32Ty(AI->getContext()); - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(i32Ty)); - while (EltOffset != 0) { - uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL); - NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); - } - if (NonConstantIdx) { - Type* GepTy = T; - // This GEP has a dynamic index. We need to add "i32 0" to index through - // any structs or arrays in the original type until we get to the vector - // to index. - while (!isa(GepTy)) { - NewArgs.push_back(Constant::getNullValue(i32Ty)); - GepTy = cast(GepTy)->getTypeAtIndex(0U); - } - NewArgs.push_back(NonConstantIdx); - } - Instruction *Val = NewElts[Idx]; - if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); - Val->takeName(GEPI); - } - if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); - GEPI->replaceAllUsesWith(Val); - DeadInsts.push_back(GEPI); -} - -/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it -/// to mark the lifetime of the scalarized memory. -void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, - uint64_t Offset, - SmallVectorImpl &NewElts) { - ConstantInt *OldSize = cast(II->getArgOperand(0)); - // Put matching lifetime markers on everything from Offset up to - // Offset+OldSize. - Type *AIType = AI->getAllocatedType(); - const DataLayout &DL = II->getModule()->getDataLayout(); - uint64_t NewOffset = Offset; - Type *IdxTy; - uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL); - - IRBuilder<> Builder(II); - uint64_t Size = OldSize->getLimitedValue(); - - if (NewOffset) { - // Splice the first element and index 'NewOffset' bytes in. SROA will - // split the alloca again later. - unsigned AS = AI->getType()->getAddressSpace(); - Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); - V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset)); - - IdxTy = NewElts[Idx]->getAllocatedType(); - uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; - if (EltSize > Size) { - EltSize = Size; - Size = 0; - } else { - Size -= EltSize; - } - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); - else - Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); - ++Idx; - } - - for (; Idx != NewElts.size() && Size; ++Idx) { - IdxTy = NewElts[Idx]->getAllocatedType(); - uint64_t EltSize = DL.getTypeAllocSize(IdxTy); - if (EltSize > Size) { - EltSize = Size; - Size = 0; - } else { - Size -= EltSize; - } - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - Builder.CreateLifetimeStart(NewElts[Idx], - Builder.getInt64(EltSize)); - else - Builder.CreateLifetimeEnd(NewElts[Idx], - Builder.getInt64(EltSize)); - } - DeadInsts.push_back(II); -} - -/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. -/// Rewrite it to copy or set the elements of the scalarized memory. -void -SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, - AllocaInst *AI, - SmallVectorImpl &NewElts) { - // If this is a memcpy/memmove, construct the other pointer as the - // appropriate type. The "Other" pointer is the pointer that goes to memory - // that doesn't have anything to do with the alloca that we are promoting. For - // memset, this Value* stays null. - Value *OtherPtr = nullptr; - unsigned MemAlignment = MI->getAlignment(); - if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (Inst == MTI->getRawDest()) - OtherPtr = MTI->getRawSource(); - else { - assert(Inst == MTI->getRawSource()); - OtherPtr = MTI->getRawDest(); - } - } - - // If there is an other pointer, we want to convert it to the same pointer - // type as AI has, so we can GEP through it safely. - if (OtherPtr) { - unsigned AddrSpace = - cast(OtherPtr->getType())->getAddressSpace(); - - // Remove bitcasts and all-zero GEPs from OtherPtr. This is an - // optimization, but it's also required to detect the corner case where - // both pointer operands are referencing the same memory, and where - // OtherPtr may be a bitcast or GEP that currently being rewritten. (This - // function is only called for mem intrinsics that access the whole - // aggregate, so non-zero GEPs are not an issue here.) - OtherPtr = OtherPtr->stripPointerCasts(); - - // Copying the alloca to itself is a no-op: just delete it. - if (OtherPtr == AI || OtherPtr == NewElts[0]) { - // This code will run twice for a no-op memcpy -- once for each operand. - // Put only one reference to MI on the DeadInsts list. - for (SmallVectorImpl::const_iterator I = DeadInsts.begin(), - E = DeadInsts.end(); I != E; ++I) - if (*I == MI) return; - DeadInsts.push_back(MI); - return; - } - - // If the pointer is not the right type, insert a bitcast to the right - // type. - Type *NewTy = PointerType::get(AI->getAllocatedType(), AddrSpace); - - if (OtherPtr->getType() != NewTy) - OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); - } - - // Process each element of the aggregate. - bool SROADest = MI->getRawDest() == Inst; - - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); - const DataLayout &DL = MI->getModule()->getDataLayout(); - - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - // If this is a memcpy/memmove, emit a GEP of the other element address. - Value *OtherElt = nullptr; - unsigned OtherEltAlign = MemAlignment; - - if (OtherPtr) { - Value *Idx[2] = { Zero, - ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, - OtherPtr->getName()+"."+Twine(i), - MI); - uint64_t EltOffset; - Type *OtherTy = AI->getAllocatedType(); - if (StructType *ST = dyn_cast(OtherTy)) { - EltOffset = DL.getStructLayout(ST)->getElementOffset(i); - } else { - Type *EltTy = cast(OtherTy)->getElementType(); - EltOffset = DL.getTypeAllocSize(EltTy) * i; - } - - // The alignment of the other pointer is the guaranteed alignment of the - // element, which is affected by both the known alignment of the whole - // mem intrinsic and the alignment of the element. If the alignment of - // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the - // known alignment is just 4 bytes. - OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); - } - - AllocaInst *EltPtr = NewElts[i]; - Type *EltTy = EltPtr->getAllocatedType(); - - // If we got down to a scalar, insert a load or store as appropriate. - if (EltTy->isSingleValueType()) { - if (isa(MI)) { - if (SROADest) { - // From Other to Alloca. - Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); - new StoreInst(Elt, EltPtr, MI); - } else { - // From Alloca to Other. - Value *Elt = new LoadInst(EltPtr, "tmp", MI); - new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); - } - continue; - } - assert(isa(MI)); - - // If the stored element is zero (common case), just store a null - // constant. - Constant *StoreVal; - if (ConstantInt *CI = dyn_cast(MI->getArgOperand(1))) { - if (CI->isZero()) { - StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> - } else { - // If EltTy is a vector type, get the element type. - Type *ValTy = EltTy->getScalarType(); - - // Construct an integer with the right value. - unsigned EltSize = DL.getTypeSizeInBits(ValTy); - APInt OneVal(EltSize, CI->getZExtValue()); - APInt TotalVal(OneVal); - // Set each byte. - for (unsigned i = 0; 8*i < EltSize; ++i) { - TotalVal = TotalVal.shl(8); - TotalVal |= OneVal; - } - - // Convert the integer value to the appropriate type. - StoreVal = ConstantInt::get(CI->getContext(), TotalVal); - if (ValTy->isPointerTy()) - StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); - else if (ValTy->isFloatingPointTy()) - StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); - assert(StoreVal->getType() == ValTy && "Type mismatch!"); - - // If the requested value was a vector constant, create it. - if (EltTy->isVectorTy()) { - unsigned NumElts = cast(EltTy)->getNumElements(); - StoreVal = ConstantVector::getSplat(NumElts, StoreVal); - } - } - new StoreInst(StoreVal, EltPtr, MI); - continue; - } - // Otherwise, if we're storing a byte variable, use a memset call for - // this element. - } - - unsigned EltSize = DL.getTypeAllocSize(EltTy); - if (!EltSize) - continue; - - IRBuilder<> Builder(MI); - - // Finally, insert the meminst for this element. - if (isa(MI)) { - Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, - MI->isVolatile()); - } else { - assert(isa(MI)); - Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr - Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr - - if (isa(MI)) - Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); - else - Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); - } - } - DeadInsts.push_back(MI); -} - -/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that -/// overwrites the entire allocation. Extract out the pieces of the stored -/// integer and store them individually. -void -SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, - SmallVectorImpl &NewElts) { - // Extract each element out of the integer according to its structure offset - // and store the element value to the individual alloca. - Value *SrcVal = SI->getOperand(0); - Type *AllocaEltTy = AI->getAllocatedType(); - const DataLayout &DL = SI->getModule()->getDataLayout(); - uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); - - IRBuilder<> Builder(SI); - - // Handle tail padding by extending the operand - if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) - SrcVal = Builder.CreateZExt(SrcVal, - IntegerType::get(SI->getContext(), AllocaSizeBits)); - - DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI - << '\n'); - - // There are two forms here: AI could be an array or struct. Both cases - // have different ways to compute the element offset. - if (StructType *EltSTy = dyn_cast(AllocaEltTy)) { - const StructLayout *Layout = DL.getStructLayout(EltSTy); - - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - // Get the number of bits to shift SrcVal to get the value. - Type *FieldTy = EltSTy->getElementType(i); - uint64_t Shift = Layout->getElementOffsetInBits(i); - - if (DL.isBigEndian()) - Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy); - - Value *EltVal = SrcVal; - if (Shift) { - Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); - EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); - } - - // Truncate down to an integer of the right size. - uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); - - // Ignore zero sized fields like {}, they obviously contain no data. - if (FieldSizeBits == 0) continue; - - if (FieldSizeBits != AllocaSizeBits) - EltVal = Builder.CreateTrunc(EltVal, - IntegerType::get(SI->getContext(), FieldSizeBits)); - Value *DestField = NewElts[i]; - if (EltVal->getType() == FieldTy) { - // Storing to an integer field of this size, just do it. - } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { - // Bitcast to the right element type (for fp/vector values). - EltVal = Builder.CreateBitCast(EltVal, FieldTy); - } else { - // Otherwise, bitcast the dest pointer (for aggregates). - DestField = Builder.CreateBitCast(DestField, - PointerType::getUnqual(EltVal->getType())); - } - new StoreInst(EltVal, DestField, SI); - } - - } else { - ArrayType *ATy = cast(AllocaEltTy); - Type *ArrayEltTy = ATy->getElementType(); - uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); - uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy); - - uint64_t Shift; - - if (DL.isBigEndian()) - Shift = AllocaSizeBits-ElementOffset; - else - Shift = 0; - - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - // Ignore zero sized fields like {}, they obviously contain no data. - if (ElementSizeBits == 0) continue; - - Value *EltVal = SrcVal; - if (Shift) { - Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); - EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); - } - - // Truncate down to an integer of the right size. - if (ElementSizeBits != AllocaSizeBits) - EltVal = Builder.CreateTrunc(EltVal, - IntegerType::get(SI->getContext(), - ElementSizeBits)); - Value *DestField = NewElts[i]; - if (EltVal->getType() == ArrayEltTy) { - // Storing to an integer field of this size, just do it. - } else if (ArrayEltTy->isFloatingPointTy() || - ArrayEltTy->isVectorTy()) { - // Bitcast to the right element type (for fp/vector values). - EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); - } else { - // Otherwise, bitcast the dest pointer (for aggregates). - DestField = Builder.CreateBitCast(DestField, - PointerType::getUnqual(EltVal->getType())); - } - new StoreInst(EltVal, DestField, SI); - - if (DL.isBigEndian()) - Shift -= ElementOffset; - else - Shift += ElementOffset; - } - } - - DeadInsts.push_back(SI); -} - -/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to -/// an integer. Load the individual pieces to form the aggregate value. -void -SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, - SmallVectorImpl &NewElts) { - // Extract each element out of the NewElts according to its structure offset - // and form the result value. - Type *AllocaEltTy = AI->getAllocatedType(); - const DataLayout &DL = LI->getModule()->getDataLayout(); - uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); - - DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI - << '\n'); - - // There are two forms here: AI could be an array or struct. Both cases - // have different ways to compute the element offset. - const StructLayout *Layout = nullptr; - uint64_t ArrayEltBitOffset = 0; - if (StructType *EltSTy = dyn_cast(AllocaEltTy)) { - Layout = DL.getStructLayout(EltSTy); - } else { - Type *ArrayEltTy = cast(AllocaEltTy)->getElementType(); - ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); - } - - Value *ResultVal = - Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); - - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - // Load the value from the alloca. If the NewElt is an aggregate, cast - // the pointer to an integer of the same size before doing the load. - Value *SrcField = NewElts[i]; - Type *FieldTy = NewElts[i]->getAllocatedType(); - uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); - - // Ignore zero sized fields like {}, they obviously contain no data. - if (FieldSizeBits == 0) continue; - - IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), - FieldSizeBits); - if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && - !FieldTy->isVectorTy()) - SrcField = new BitCastInst(SrcField, - PointerType::getUnqual(FieldIntTy), - "", LI); - SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); - - // If SrcField is a fp or vector of the right size but that isn't an - // integer type, bitcast to an integer so we can shift it. - if (SrcField->getType() != FieldIntTy) - SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); - - // Zero extend the field to be the same size as the final alloca so that - // we can shift and insert it. - if (SrcField->getType() != ResultVal->getType()) - SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); - - // Determine the number of bits to shift SrcField. - uint64_t Shift; - if (Layout) // Struct case. - Shift = Layout->getElementOffsetInBits(i); - else // Array case. - Shift = i*ArrayEltBitOffset; - - if (DL.isBigEndian()) - Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); - - if (Shift) { - Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); - SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); - } - - // Don't create an 'or x, 0' on the first iteration. - if (!isa(ResultVal) || - !cast(ResultVal)->isNullValue()) - ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); - else - ResultVal = SrcField; - } - - // Handle tail padding by truncating the result - if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits) - ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); - - LI->replaceAllUsesWith(ResultVal); - DeadInsts.push_back(LI); -} - -/// HasPadding - Return true if the specified type has any structure or -/// alignment padding in between the elements that would be split apart -/// by SROA; return false otherwise. -static bool HasPadding(Type *Ty, const DataLayout &DL) { - if (ArrayType *ATy = dyn_cast(Ty)) { - Ty = ATy->getElementType(); - return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty); - } - - // SROA currently handles only Arrays and Structs. - StructType *STy = cast(Ty); - const StructLayout *SL = DL.getStructLayout(STy); - unsigned PrevFieldBitOffset = 0; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - unsigned FieldBitOffset = SL->getElementOffsetInBits(i); - - // Check to see if there is any padding between this element and the - // previous one. - if (i) { - unsigned PrevFieldEnd = - PrevFieldBitOffset+DL.getTypeSizeInBits(STy->getElementType(i-1)); - if (PrevFieldEnd < FieldBitOffset) - return true; - } - PrevFieldBitOffset = FieldBitOffset; - } - // Check for tail padding. - if (unsigned EltCount = STy->getNumElements()) { - unsigned PrevFieldEnd = PrevFieldBitOffset + - DL.getTypeSizeInBits(STy->getElementType(EltCount-1)); - if (PrevFieldEnd < SL->getSizeInBits()) - return true; - } - return false; -} - -/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of -/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, -/// or 1 if safe after canonicalization has been performed. -bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { - // Loop over the use list of the alloca. We can only transform it if all of - // the users are safe to transform. - AllocaInfo Info(AI); - - isSafeForScalarRepl(AI, 0, Info); - if (Info.isUnsafe) { - DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); - return false; - } - - const DataLayout &DL = AI->getModule()->getDataLayout(); - - // Okay, we know all the users are promotable. If the aggregate is a memcpy - // source and destination, we have to be careful. In particular, the memcpy - // could be moving around elements that live in structure padding of the LLVM - // types, but may actually be used. In these cases, we refuse to promote the - // struct. - if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), DL)) - return false; - - // If the alloca never has an access to just *part* of it, but is accessed - // via loads and stores, then we should use ConvertToScalarInfo to promote - // the alloca instead of promoting each piece at a time and inserting fission - // and fusion code. - if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { - // If the struct/array just has one element, use basic SRoA. - if (StructType *ST = dyn_cast(AI->getAllocatedType())) { - if (ST->getNumElements() > 1) return false; - } else { - if (cast(AI->getAllocatedType())->getNumElements() > 1) - return false; - } - } - - return true; -} diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll index 90dcbdfa43b..5ff49eff6df 100644 --- a/test/CodeGen/X86/vec_ins_extract.ll +++ b/test/CodeGen/X86/vec_ins_extract.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: opt < %s -scalarrepl -instcombine | \ +; RUN: opt < %s -sroa -instcombine | \ ; RUN: llc -march=x86 -mcpu=yonah | not grep sub.*esp ; This checks that various insert/extract idiom work without going to the diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll index 80bd6fdbc40..5bf57c8ff46 100644 --- a/test/Transforms/ArgumentPromotion/inalloca.ll +++ b/test/Transforms/ArgumentPromotion/inalloca.ll @@ -1,10 +1,10 @@ -; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s +; RUN: opt %s -argpromotion -sroa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.ss = type { i32, i32 } -; Argpromote + scalarrepl should change this to passing the two integers by value. +; Argpromote + sroa should change this to passing the two integers by value. define internal i32 @f(%struct.ss* inalloca %s) { entry: %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll index 8f0b3eafaec..5847e623831 100644 --- a/test/Transforms/Inline/basictest.ll +++ b/test/Transforms/Inline/basictest.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -scalarrepl -S | FileCheck %s +; RUN: opt < %s -inline -sroa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define i32 @test1f(i32 %i) { diff --git a/test/Transforms/Inline/crash2.ll b/test/Transforms/Inline/crash2.ll index 4c0dfaea036..e3a136010ee 100644 --- a/test/Transforms/Inline/crash2.ll +++ b/test/Transforms/Inline/crash2.ll @@ -1,4 +1,4 @@ -; RUN: opt -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s +; RUN: opt -inline -sroa -max-cg-scc-iterations=1 -disable-output < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.3" diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll index 76c8150de01..2a0a6d7f65a 100644 --- a/test/Transforms/Inline/devirtualize-3.ll +++ b/test/Transforms/Inline/devirtualize-3.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s +; RUN: opt -basicaa -inline -S -sroa -gvn -instcombine < %s | FileCheck %s ; PR5009 ; CHECK: define i32 @main() diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll index 0f8b38c8e9c..35cd3affec6 100644 --- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll +++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -scalarrepl -S | not grep " = alloca" +; RUN: opt < %s -instcombine -sroa -S | not grep " = alloca" ; rdar://6417724 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it. diff --git a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll index 5818808ae0c..df7034baf66 100644 --- a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll +++ b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info +; RUN: opt < %s -sroa -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info define void @inflate() { entry: diff --git a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll index 1a929d68573..5d763a9b3e7 100644 --- a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll +++ b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl-ssa -loop-unswitch -disable-output +; RUN: opt < %s -sroa -loop-unswitch -disable-output ; PR11016 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.2" diff --git a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll deleted file mode 100644 index 336c0a9dfa6..00000000000 --- a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -; Test that an array is not incorrectly deconstructed. - -define i32 @test() nounwind { - %X = alloca [4 x i32] ; <[4 x i32]*> [#uses=1] - %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0 ; [#uses=1] - ; Must preserve arrayness! - %Z = getelementptr i32, i32* %Y, i64 1 ; [#uses=1] - %A = load i32, i32* %Z ; [#uses=1] - ret i32 %A -} diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll deleted file mode 100644 index c5ca428be4b..00000000000 --- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll +++ /dev/null @@ -1,12 +0,0 @@ -; Scalar replacement was incorrectly promoting this alloca!! -; -; RUN: opt < %s -scalarrepl -S | FileCheck %s - -define i8* @test() { - %A = alloca [30 x i8] ; <[30 x i8]*> [#uses=1] - %B = getelementptr [30 x i8], [30 x i8]* %A, i64 0, i64 0 ; [#uses=2] - %C = getelementptr i8, i8* %B, i64 1 ; [#uses=1] - store i8 0, i8* %B - ret i8* %C -} -; CHECK: alloca [ diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll deleted file mode 100644 index ce652737bc1..00000000000 --- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | grep "alloca %%T" - -%T = type { [80 x i8], i32, i32 } -declare i32 @.callback_1(i8*) - -declare void @.iter_2(i32 (i8*)*, i8*) - -define i32 @main() { - %d = alloca %T ; <{ [80 x i8], i32, i32 }*> [#uses=2] - %tmp.0 = getelementptr %T, %T* %d, i64 0, i32 2 ; [#uses=1] - store i32 0, i32* %tmp.0 - %tmp.1 = getelementptr %T, %T* %d, i64 0, i32 0, i64 0 ; [#uses=1] - call void @.iter_2( i32 (i8*)* @.callback_1, i8* %tmp.1 ) - ret i32 0 -} - diff --git a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll deleted file mode 100644 index 2701fdaea51..00000000000 --- a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind { - %vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3] - %tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 ) ; <<4 x i32>> [#uses=2] - %tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64> ; <<2 x i64>> [#uses=0] - %tmp.upgrd.2 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0 ; <<4 x i32>*> [#uses=1] - store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2 - %tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 ) ; <<4 x i32>> [#uses=2] - %tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64> ; <<2 x i64>> [#uses=0] - %tmp14 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 1 ; <<4 x i32>*> [#uses=1] - store <4 x i32> %tmp10, <4 x i32>* %tmp14 - %tmp15 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4 ; [#uses=1] - %tmp.upgrd.4 = load i32, i32* %tmp15 ; [#uses=1] - ret i32 %tmp.upgrd.4 -} - -declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) - diff --git a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll deleted file mode 100644 index 966b17939fe..00000000000 --- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | grep memcpy -; PR1421 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "i686-apple-darwin8" - -%struct.LongestMember = type { i8, i32 } -%struct.MyString = type { i32 } -%struct.UnionType = type { %struct.LongestMember } - -define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) { -entry: - %tmp = alloca %struct.UnionType, align 8 - %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0 - %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false) - %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion - %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0 - %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll deleted file mode 100644 index 28f503a210c..00000000000 --- a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | not grep shr - -; FIXME: I think this test is no longer valid. -; It was working because SROA was aborting when -; no datalayout was supplied -; XFAIL: * - - -%struct.S = type { i16 } - -define zeroext i1 @f(i16 signext %b) { -entry: - %b_addr = alloca i16 ; [#uses=2] - %retval = alloca i32 ; [#uses=2] - %s = alloca %struct.S ; <%struct.S*> [#uses=2] - %tmp = alloca i32 ; [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - store i16 %b, i16* %b_addr - %tmp1 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; [#uses=1] - %tmp2 = load i16, i16* %b_addr, align 2 ; [#uses=1] - store i16 %tmp2, i16* %tmp1, align 2 - %tmp3 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; [#uses=1] - %tmp34 = bitcast i16* %tmp3 to [2 x i1]* ; <[2 x i1]*> [#uses=1] - %tmp5 = getelementptr [2 x i1], [2 x i1]* %tmp34, i32 0, i32 1 ; [#uses=1] - %tmp6 = load i1, i1* %tmp5, align 1 ; [#uses=1] - %tmp67 = zext i1 %tmp6 to i32 ; [#uses=1] - store i32 %tmp67, i32* %tmp, align 4 - %tmp8 = load i32, i32* %tmp, align 4 ; [#uses=1] - store i32 %tmp8, i32* %retval, align 4 - br label %return - -return: ; preds = %entry - %retval9 = load i32, i32* %retval ; [#uses=1] - %retval910 = trunc i32 %retval9 to i1 ; [#uses=1] - ret i1 %retval910 -} diff --git a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll deleted file mode 100644 index 99366b36442..00000000000 --- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i8 17" -; rdar://5707076 -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.1.0" - %struct.T = type <{ i8, [3 x i8] }> - -define i8 @f() { -entry: - %s = alloca [1 x %struct.T], align 4 ; <[1 x %struct.T]*> [#uses=2] - %T3 = bitcast [1 x %struct.T]* %s to i32* - store i32 -61184, i32* %T3 - - %tmp16 = getelementptr [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0 ; <%struct.T*> [#uses=1] - %tmp17 = getelementptr %struct.T, %struct.T* %tmp16, i32 0, i32 1 ; <[3 x i8]*> [#uses=1] - %tmp1718 = bitcast [3 x i8]* %tmp17 to i32* ; [#uses=1] - %tmp19 = load i32, i32* %tmp1718, align 4 ; [#uses=1] - %mask = and i32 %tmp19, 16777215 ; [#uses=2] - %mask2324 = trunc i32 %mask to i8 ; [#uses=1] - ret i8 %mask2324 -} - diff --git a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll deleted file mode 100644 index f37b6529a54..00000000000 --- a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i686-apple-darwin8" - %struct..0anon = type { <1 x i64> } - -define i32 @main(i32 %argc, i8** %argv) { -entry: - %c = alloca %struct..0anon ; <%struct..0anon*> [#uses=2] - %tmp2 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0 ; <<1 x i64>*> [#uses=1] - store <1 x i64> zeroinitializer, <1 x i64>* %tmp2, align 8 - %tmp7 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0 ; <<1 x i64>*> [#uses=1] - %tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]* ; <[2 x i32]*> [#uses=1] - %tmp9 = getelementptr [2 x i32], [2 x i32]* %tmp78, i32 0, i32 0 ; [#uses=1] - %tmp10 = load i32, i32* %tmp9, align 4 ; [#uses=0] - unreachable -} diff --git a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll deleted file mode 100644 index d1f33121174..00000000000 --- a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll +++ /dev/null @@ -1,33 +0,0 @@ -; This test shows an alloca of a struct and an array that can be reduced to -; multiple variables easily. However, the alloca is used by a store -; instruction, which was not possible before aggregrates were first class -; values. This checks of scalarrepl splits up the struct and array properly. - -; RUN: opt < %s -scalarrepl -S | not grep alloca -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define i32 @foo() { - %target = alloca { i32, i32 } ; <{ i32, i32 }*> [#uses=1] - ; Build a first class struct to store - %res1 = insertvalue { i32, i32 } undef, i32 1, 0 ; <{ i32, i32 }> [#uses=1] - %res2 = insertvalue { i32, i32 } %res1, i32 2, 1 ; <{ i32, i32 }> [#uses=1] - ; And store it - store { i32, i32 } %res2, { i32, i32 }* %target - ; Actually use %target, so it doesn't get removed altogether - %ptr = getelementptr { i32, i32 }, { i32, i32 }* %target, i32 0, i32 0 - %val = load i32, i32* %ptr - ret i32 %val -} - -define i32 @bar() { - %target = alloca [ 2 x i32 ] ; <{ i32, i32 }*> [#uses=1] - ; Build a first class array to store - %res1 = insertvalue [ 2 x i32 ] undef, i32 1, 0 ; <{ i32, i32 }> [#uses=1] - %res2 = insertvalue [ 2 x i32 ] %res1, i32 2, 1 ; <{ i32, i32 }> [#uses=1] - ; And store it - store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target - ; Actually use %target, so it doesn't get removed altogether - %ptr = getelementptr [ 2 x i32 ], [ 2 x i32 ]* %target, i32 0, i32 0 - %val = load i32, i32* %ptr - ret i32 %val -} diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll deleted file mode 100644 index f597613ef2b..00000000000 --- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | grep "call.*mem" -; PR2369 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" - -define void @memtest1(i8* %dst, i8* %src) nounwind { -entry: - %temp = alloca [200 x i8] - %temp1 = bitcast [200 x i8]* %temp to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false) - %temp3 = bitcast [200 x i8]* %temp to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll deleted file mode 100644 index c0ff25f3541..00000000000 --- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | grep "s = alloca .struct.x" -; PR2423 -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" - -%struct.x = type { [1 x i32], i32, i32 } - -define i32 @b() nounwind { -entry: - %s = alloca %struct.x - %r = alloca %struct.x - %0 = call i32 @a(%struct.x* %s) nounwind - %r1 = bitcast %struct.x* %r to i8* - %s2 = bitcast %struct.x* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false) - %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1 - %2 = load i32, i32* %1, align 4 - ret i32 %2 -} - -declare i32 @a(%struct.x*) - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll deleted file mode 100644 index 16d9108bb24..00000000000 --- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll +++ /dev/null @@ -1,25 +0,0 @@ -; This test checks to see if scalarrepl also works when a gep with all zeroes is -; used instead of a bitcast to prepare a memmove pointer argument. Previously, -; this would not work when there was a vector involved in the struct, preventing -; scalarrepl from removing the alloca below. - -; RUN: opt < %s -scalarrepl -S > %t -; RUN: cat %t | not grep alloca -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -%struct.two = type <{ < 2 x i8 >, i16 }> - -define void @main(%struct.two* %D, i16 %V) { -entry: - %S = alloca %struct.two - %S.2 = getelementptr %struct.two, %struct.two* %S, i32 0, i32 1 - store i16 %V, i16* %S.2 - ; This gep is effectively a bitcast to i8*, but is sometimes generated - ; because the type of the first element in %struct.two is i8. - %tmpS = getelementptr %struct.two, %struct.two* %S, i32 0, i32 0, i32 0 - %tmpD = bitcast %struct.two* %D to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false) - ret void -} - -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll deleted file mode 100644 index f0af1caa461..00000000000 --- a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i32 %x" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-pc-linux-gnu" - -%pair = type { [1 x i32], i32 } - -define i32 @f(i32 %x, i32 %y) { - %instance = alloca %pair - %first = getelementptr %pair, %pair* %instance, i32 0, i32 0 - %cast = bitcast [1 x i32]* %first to i32* - store i32 %x, i32* %cast - %second = getelementptr %pair, %pair* %instance, i32 0, i32 1 - store i32 %y, i32* %second - %v = load i32, i32* %cast - ret i32 %v -} diff --git a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll deleted file mode 100644 index 56375ffe793..00000000000 --- a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep "ret i32 42" -; PR3489 -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "x86_64-apple-darwin10.0" - %struct.anon = type <{ i32, i32, i32 }> - -define i32 @f({ i64, i64 }) nounwind { -entry: - %tmp = alloca { i64, i64 }, align 8 ; <{ i64, i64 }*> [#uses=2] - store { i64, i64 } %0, { i64, i64 }* %tmp - %1 = bitcast { i64, i64 }* %tmp to %struct.anon* ; <%struct.anon*> [#uses=1] - %2 = load %struct.anon, %struct.anon* %1, align 8 ; <%struct.anon> [#uses=1] - %tmp3 = extractvalue %struct.anon %2, 0 - ret i32 %tmp3 -} - -define i32 @g() { - %a = call i32 @f({i64,i64} { i64 42, i64 1123123123123123 }) - ret i32 %a -} diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll deleted file mode 100644 index 025578c7f44..00000000000 --- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll +++ /dev/null @@ -1,19 +0,0 @@ -; The store into %p should end up with a known alignment of 1, since the memcpy -; is only known to access it with 1-byte alignment. -; RUN: opt < %s -scalarrepl -S | grep "store i16 1, .*, align 1" -; PR3720 -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - - %struct.st = type { i16 } - -define void @f(i8* %p) nounwind { -entry: - %s = alloca %struct.st, align 4 ; <%struct.st*> [#uses=2] - %0 = getelementptr %struct.st, %struct.st* %s, i32 0, i32 0 ; [#uses=1] - store i16 1, i16* %0, align 4 - %s1 = bitcast %struct.st* %s to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll deleted file mode 100644 index d1cc4244ccf..00000000000 --- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll +++ /dev/null @@ -1,90 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; Radar 7441282 - -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" - -%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } -%struct.int16x8_t = type { <8 x i16> } -%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } -%union..0anon = type { %struct.int16x8x2_t } - -define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { -; CHECK-LABEL: @test( -; CHECK-NOT: alloca -; CHECK: "alloca point" -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> - -entry: - %tmp_addr = alloca %struct.int16x8_t - %dst_addr = alloca %struct.int16x8x2_t* - %__rv = alloca %union..0anon - %__bx = alloca %struct.int16x8_t - %__ax = alloca %struct.int16x8_t - %tmp2 = alloca %struct.int16x8x2_t - %0 = alloca %struct.int16x8x2_t - %"alloca point" = bitcast i32 0 to i32 - %1 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0 - store <8 x i16> %tmp.0, <8 x i16>* %1 - store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr - %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0 - %3 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0 - %4 = load <8 x i16>, <8 x i16>* %3, align 16 - store <8 x i16> %4, <8 x i16>* %2, align 16 - %5 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0 - %6 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0 - %7 = load <8 x i16>, <8 x i16>* %6, align 16 - store <8 x i16> %7, <8 x i16>* %5, align 16 - %8 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0 - %9 = load <8 x i16>, <8 x i16>* %8, align 16 - %10 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0 - %11 = load <8 x i16>, <8 x i16>* %10, align 16 - %12 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0 - %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* - %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> - %15 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 - store <8 x i16> %14, <8 x i16>* %15 - %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> - %17 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 - store <8 x i16> %16, <8 x i16>* %17 - %18 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0 - %19 = bitcast %struct.int16x8x2_t* %0 to i8* - %20 = bitcast %struct.int16x8x2_t* %18 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false) - %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* - %21 = bitcast %struct.int16x8x2_t* %0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false) - %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4 - %23 = bitcast %struct.int16x8x2_t* %22 to i8* - %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false) - br label %return - -return: ; preds = %entry - ret void -} - -; Radar 7466574 -%struct._NSRange = type { i64 } - -define void @test_memcpy_self() nounwind { -entry: - %range = alloca %struct._NSRange - br i1 undef, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %tmp3 = bitcast %struct._NSRange* %range to i8* - %tmp4 = bitcast %struct._NSRange* %range to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false) - ret void - -cond.false: ; preds = %entry - ret void - -; CHECK-LABEL: @test_memcpy_self( -; CHECK-NOT: alloca -; CHECK: br i1 -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll deleted file mode 100644 index b926b021caf..00000000000 --- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; Radar 7552893 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" - -%struct.test = type { [3 x double] } - -define void @test_memcpy_self() nounwind { -; CHECK-LABEL: @test_memcpy_self( -; CHECK-NOT: alloca -; CHECK: ret void - %1 = alloca %struct.test - %2 = bitcast %struct.test* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll deleted file mode 100644 index 997d03b059e..00000000000 --- a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s -; PR9820 - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -@func_1.l_10 = internal unnamed_addr constant [4 x i32] [i32 1, i32 0, i32 0, i32 0], align 16 - -define i32* @noop(i32* %p_29) nounwind readnone { -entry: - ret i32* %p_29 -} - -define i32 @main() nounwind { -entry: - %l_10 = alloca [4 x i32], align 16 - %tmp = bitcast [4 x i32]* %l_10 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false) -; CHECK: call void @llvm.memcpy - %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %l_10, i64 0, i64 0 - %call = call i32* @noop(i32* %arrayidx) - store i32 0, i32* %call - ret i32 0 -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll deleted file mode 100644 index dee27f8e306..00000000000 --- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll +++ /dev/null @@ -1,75 +0,0 @@ -; RUN: opt < %s -S -scalarrepl | FileCheck %s -; RUN: opt < %s -S -scalarrepl-ssa | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" - -%0 = type { <2 x float>, float } -%struct.PointC3 = type { %struct.array } -%struct.Point_3 = type { %struct.PointC3 } -%struct.array = type { [3 x float], [4 x i8] } - -; CHECK: main -; CHECK-NOT: alloca -; CHECK: extractelement <2 x float> zeroinitializer, i32 0 - -define void @main() uwtable ssp { -entry: - %ref.tmp2 = alloca %0, align 16 - %tmpcast = bitcast %0* %ref.tmp2 to %struct.Point_3* - %0 = getelementptr %0, %0* %ref.tmp2, i64 0, i32 0 - store <2 x float> zeroinitializer, <2 x float>* %0, align 16 - %1 = getelementptr inbounds %struct.Point_3, %struct.Point_3* %tmpcast, i64 0, i32 0 - %base.i.i.i = getelementptr inbounds %struct.PointC3, %struct.PointC3* %1, i64 0, i32 0 - %arrayidx.i.i.i.i = getelementptr inbounds %struct.array, %struct.array* %base.i.i.i, i64 0, i32 0, i64 0 - %tmp5.i.i = load float, float* %arrayidx.i.i.i.i, align 4 - ret void -} - -; CHECK: test1 -; CHECK-NOT: alloca -; CHECK: extractelement <2 x float> zeroinitializer, i32 0 - -define void @test1() uwtable ssp { -entry: - %ref.tmp2 = alloca {<2 x float>, float}, align 16 - %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float* - %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0 - store <2 x float> zeroinitializer, <2 x float>* %0, align 16 - %tmp5.i.i = load float, float* %tmpcast, align 4 - ret void -} - -; CHECK: test2 -; CHECK-NOT: alloca -; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> zeroinitializer, i32 0 -; CHECK: fadd float %[[A]], 1.000000e+00 -; CHECK-NOT: insertelement -; CHECK-NOT: extractelement - -define float @test2() uwtable ssp { -entry: - %ref.tmp2 = alloca {<2 x float>, float}, align 16 - %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float* - %tmpcast2 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 1 - %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0 - store <2 x float> zeroinitializer, <2 x float>* %0, align 16 - store float 1.0, float* %tmpcast2, align 4 - %r1 = load float, float* %tmpcast, align 4 - %r2 = load float, float* %tmpcast2, align 4 - %r = fadd float %r1, %r2 - ret float %r -} - -; CHECK: test3 -; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> , i32 1 -; CHECK: ret float %[[A]] - -define float @test3() { -entry: - %ai = alloca { <2 x float>, <2 x float> }, align 8 - store { <2 x float>, <2 x float> } {<2 x float> , <2 x float> }, { <2 x float>, <2 x float> }* %ai, align 8 - %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]* - %arrayidx = getelementptr inbounds [4 x float], [4 x float]* %tmpcast, i64 0, i64 3 - %f = load float, float* %arrayidx, align 4 - ret float %f -} diff --git a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll deleted file mode 100644 index af6d1f36fae..00000000000 --- a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" - -; CHECK: f -; CHECK-NOT: alloca -; CHECK: %[[A:[a-z0-9]*]] = and i128 undef, -16777216 -; CHECK: %[[B:[a-z0-9]*]] = bitcast i128 %[[A]] to <4 x float> -; CHECK: %[[C:[a-z0-9]*]] = extractelement <4 x float> %[[B]], i32 0 -; CHECK: ret float %[[C]] - -define float @f() nounwind ssp { -entry: - %a = alloca <4 x float>, align 16 - %p = bitcast <4 x float>* %a to i8* - call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false) - %vec = load <4 x float>, <4 x float>* %a, align 8 - %val = extractelement <4 x float> %vec, i32 0 - ret float %val -} - -; CHECK: g -; CHECK-NOT: alloca -; CHECK: and i128 - -define void @g() nounwind ssp { -entry: - %a = alloca { <4 x float> }, align 16 - %p = bitcast { <4 x float> }* %a to i8* - call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false) - %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]* - %arrayidx = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* %q, i32 0, i32 0 - store <2 x float> undef, <2 x float>* %arrayidx, align 8 - ret void -} - -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll deleted file mode 100644 index bff6566d178..00000000000 --- a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; PR10987 - -; Make sure scalarrepl doesn't move a load across an invoke which could -; modify the loaded value. -; (The PHI could theoretically be transformed by splitting the critical -; edge, but scalarrepl doesn't modify the CFG, at least at the moment.) - -declare void @extern_fn(i32*) -declare i32 @extern_fn2(i32) -declare i32 @__gcc_personality_v0(i32, i64, i8*, i8*) - -define void @odd_fn(i1) noinline personality i32 (i32, i64, i8*, i8*)* @__gcc_personality_v0 { - %retptr1 = alloca i32 - %retptr2 = alloca i32 - br i1 %0, label %then, label %else - -then: ; preds = %2 - invoke void @extern_fn(i32* %retptr1) - to label %join unwind label %unwind - -else: ; preds = %2 - store i32 3, i32* %retptr2 - br label %join - -join: ; preds = %then, %else - %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ] - %storemerge = load i32, i32* %storemerge.in - %x3 = call i32 @extern_fn2(i32 %storemerge) - ret void - -unwind: ; preds = %then - %info = landingpad { i8*, i32 } - cleanup - call void @extern_fn(i32* null) - unreachable -} - -; CHECK-LABEL: define void @odd_fn( -; CHECK: %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ] diff --git a/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll b/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll deleted file mode 100644 index 9e312314929..00000000000 --- a/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt < %s -S -scalarrepl | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin11.0.1" - -; CHECK: test -; CHECK-NOT: alloca - -define void @test() nounwind { -entry: - %a156286 = alloca [4 x <4 x float>], align 16 - br i1 undef, label %cif_done, label %for_test158.preheader - -for_test158.preheader: ; preds = %entry - %a156286305 = bitcast [4 x <4 x float>]* %a156286 to i8* - call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i32 16, i1 false) - unreachable - -cif_done: ; preds = %entry - ret void -} - -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll deleted file mode 100644 index c9c1a148a48..00000000000 --- a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt < %s -S -scalarrepl | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -target triple = "thumbv7-apple-ios5.0.0" - -%union.anon = type { <4 x float> } - -; CHECK-LABEL: @test( -; CHECK-NOT: alloca - -define void @test() nounwind { -entry: - %u = alloca %union.anon, align 16 - %u164 = bitcast %union.anon* %u to [4 x i32]* - %arrayidx165 = getelementptr inbounds [4 x i32], [4 x i32]* %u164, i32 0, i32 0 - store i32 undef, i32* %arrayidx165, align 4 - %v186 = bitcast %union.anon* %u to <4 x float>* - store <4 x float> undef, <4 x float>* %v186, align 16 - ret void -} diff --git a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll deleted file mode 100644 index 51d1d146a90..00000000000 --- a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: opt < %s -S -scalarrepl | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -%struct.S = type { [2 x %struct.anon], double } -%struct.anon = type {} - -; CHECK: @test() -; CHECK-NOT: alloca -; CHECK: ret double 1.0 - -define double @test() nounwind uwtable ssp { -entry: - %retval = alloca %struct.S, align 8 - %ret = alloca %struct.S, align 8 - %b = getelementptr inbounds %struct.S, %struct.S* %ret, i32 0, i32 1 - store double 1.000000e+00, double* %b, align 8 - %0 = bitcast %struct.S* %retval to i8* - %1 = bitcast %struct.S* %ret to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false) - %2 = bitcast %struct.S* %retval to double* - %3 = load double, double* %2, align 1 - ret double %3 -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/AggregatePromote.ll b/test/Transforms/ScalarRepl/AggregatePromote.ll deleted file mode 100644 index f6dfdf55346..00000000000 --- a/test/Transforms/ScalarRepl/AggregatePromote.ll +++ /dev/null @@ -1,51 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | \ -; RUN: not grep alloca - -target datalayout = "E-p:32:32" -target triple = "powerpc-apple-darwin8.0.0" - -define i64 @test1(i64 %X) { - %A = alloca i64 ; [#uses=3] - store i64 %X, i64* %A - %B = bitcast i64* %A to i32* ; [#uses=1] - %C = bitcast i32* %B to i8* ; [#uses=1] - store i8 0, i8* %C - %Y = load i64, i64* %A ; [#uses=1] - ret i64 %Y -} - -define i8 @test2(i64 %X) { - %X_addr = alloca i64 ; [#uses=2] - store i64 %X, i64* %X_addr - %tmp.0 = bitcast i64* %X_addr to i32* ; [#uses=1] - %tmp.1 = getelementptr i32, i32* %tmp.0, i32 1 ; [#uses=1] - %tmp.2 = bitcast i32* %tmp.1 to i8* ; [#uses=1] - %tmp.3 = getelementptr i8, i8* %tmp.2, i32 3 ; [#uses=1] - %tmp.2.upgrd.1 = load i8, i8* %tmp.3 ; [#uses=1] - ret i8 %tmp.2.upgrd.1 -} - -define i16 @crafty(i64 %X) { - %a = alloca { i64 } ; <{ i64 }*> [#uses=2] - %tmp.0 = getelementptr { i64 }, { i64 }* %a, i32 0, i32 0 ; [#uses=1] - store i64 %X, i64* %tmp.0 - %tmp.3 = bitcast { i64 }* %a to [4 x i16]* ; <[4 x i16]*> [#uses=2] - %tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3 ; [#uses=1] - %tmp.5 = load i16, i16* %tmp.4 ; [#uses=1] - %tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2 ; [#uses=1] - %tmp.9 = load i16, i16* %tmp.8 ; [#uses=1] - %tmp.10 = or i16 %tmp.9, %tmp.5 ; [#uses=1] - ret i16 %tmp.10 -} - -define i16 @crafty2(i64 %X) { - %a = alloca i64 ; [#uses=2] - store i64 %X, i64* %a - %tmp.3 = bitcast i64* %a to [4 x i16]* ; <[4 x i16]*> [#uses=2] - %tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3 ; [#uses=1] - %tmp.5 = load i16, i16* %tmp.4 ; [#uses=1] - %tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2 ; [#uses=1] - %tmp.9 = load i16, i16* %tmp.8 ; [#uses=1] - %tmp.10 = or i16 %tmp.9, %tmp.5 ; [#uses=1] - ret i16 %tmp.10 -} diff --git a/test/Transforms/ScalarRepl/DifferingTypes.ll b/test/Transforms/ScalarRepl/DifferingTypes.ll deleted file mode 100644 index 3860f6cd757..00000000000 --- a/test/Transforms/ScalarRepl/DifferingTypes.ll +++ /dev/null @@ -1,16 +0,0 @@ -; This is a feature test. Hopefully one day this will be implemented. The -; generated code should perform the appropriate masking operations required -; depending on the endianness of the target... -; RUN: opt < %s -scalarrepl -S | \ -; RUN: not grep alloca -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define i32 @testfunc(i32 %i, i8 %j) { - %I = alloca i32 ; [#uses=3] - store i32 %i, i32* %I - %P = bitcast i32* %I to i8* ; [#uses=1] - store i8 %j, i8* %P - %t = load i32, i32* %I ; [#uses=1] - ret i32 %t -} - diff --git a/test/Transforms/ScalarRepl/address-space.ll b/test/Transforms/ScalarRepl/address-space.ll deleted file mode 100644 index b8b90efefc3..00000000000 --- a/test/Transforms/ScalarRepl/address-space.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: opt -S -scalarrepl < %s | FileCheck %s -; PR7437 - Make sure SROA preserves address space of memcpy when -; hacking on it. -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10" - -%struct.anon = type { [1 x float] } - -; CHECK-LABEL: define void @Test( -; CHECK: load float, float addrspace(2)* -; CHECK-NEXT: fsub float -; CHECK: store float {{.*}}, float addrspace(2)* -define void @Test(%struct.anon addrspace(2)* %pPtr) nounwind { -entry: - %s = alloca %struct.anon, align 4 ; <%struct.anon*> [#uses=3] - %arrayidx = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1] - %tmp1 = bitcast %struct.anon* %s to i8* ; [#uses=1] - %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; [#uses=1] - call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false) - %tmp3 = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1] - %arrayidx4 = getelementptr inbounds [1 x float], [1 x float]* %tmp3, i32 0, i64 0 ; [#uses=2] - %tmp5 = load float, float* %arrayidx4 ; [#uses=1] - %sub = fsub float %tmp5, 5.000000e+00 ; [#uses=1] - store float %sub, float* %arrayidx4 - %arrayidx7 = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1] - %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; [#uses=1] - %tmp9 = bitcast %struct.anon* %s to i8* ; [#uses=1] - call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false) - ret void -} - -declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind - -declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind - diff --git a/test/Transforms/ScalarRepl/arraytest.ll b/test/Transforms/ScalarRepl/arraytest.ll deleted file mode 100644 index 486e725fa6a..00000000000 --- a/test/Transforms/ScalarRepl/arraytest.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define i32 @test() { - %X = alloca [4 x i32] ; <[4 x i32]*> [#uses=1] - %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0 ; [#uses=2] - store i32 0, i32* %Y - %Z = load i32, i32* %Y ; [#uses=1] - ret i32 %Z -} - diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll deleted file mode 100644 index 6f5bc95b43b..00000000000 --- a/test/Transforms/ScalarRepl/badarray.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -target triple = "i386-pc-linux-gnu" - - -; PR3466 -; Off end of array, don't transform. -define i32 @test1() { -; CHECK-LABEL: @test1( -; CHECK-NOT: = alloca - %X = alloca [4 x i32] - %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 6 ; [#uses=2] - store i32 0, i32* %Y - %Z = load i32, i32* %Y ; [#uses=1] - ret i32 %Z -} - - -; Off end of array, don't transform. -define i32 @test2() nounwind { -entry: -; CHECK-LABEL: @test2( -; CHECK-NOT: = alloca - %yx2.i = alloca float, align 4 ; [#uses=1] - %yx26.i = bitcast float* %yx2.i to i64* ; [#uses=1] - %0 = load i64, i64* %yx26.i, align 8 ; [#uses=0] - unreachable -} - -%base = type { i32, [0 x i8] } -%padded = type { %base, [1 x i32] } - -; PR5436 -define void @test3() { -entry: -; CHECK-LABEL: @test3( -; CHECK-NOT: = alloca -; CHECK: store i64 - %var_1 = alloca %padded, align 8 ; <%padded*> [#uses=3] - %0 = getelementptr inbounds %padded, %padded* %var_1, i32 0, i32 0 ; <%base*> [#uses=2] - - %p2 = getelementptr inbounds %base, %base* %0, i32 0, i32 1, i32 0 ; [#uses=1] - store i8 72, i8* %p2, align 1 - - ; 72 -> a[0]. - - %callret = call %padded *@test3f() ; [#uses=2] - %callretcast = bitcast %padded* %callret to i8* ; [#uses=1] - %var_11 = bitcast %padded* %var_1 to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind - -declare %padded* @test3f() diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll deleted file mode 100644 index 35d4d3ba86f..00000000000 --- a/test/Transforms/ScalarRepl/basictest.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define i32 @test1() { - %X = alloca { i32, float } ; <{ i32, float }*> [#uses=1] - %Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0 ; [#uses=2] - store i32 0, i32* %Y - %Z = load i32, i32* %Y ; [#uses=1] - ret i32 %Z -; CHECK-LABEL: @test1( -; CHECK-NOT: alloca -; CHECK: ret i32 0 -} - -; PR8980 -define i64 @test2(i64 %X) { - %A = alloca [8 x i8] - %B = bitcast [8 x i8]* %A to i64* - - store i64 %X, i64* %B - br label %L2 - -L2: - %Z = load i64, i64* %B ; [#uses=1] - ret i64 %Z -; CHECK-LABEL: @test2( -; CHECK-NOT: alloca -; CHECK: ret i64 %X -} - diff --git a/test/Transforms/ScalarRepl/bitfield-sroa.ll b/test/Transforms/ScalarRepl/bitfield-sroa.ll deleted file mode 100644 index 52986b0a49d..00000000000 --- a/test/Transforms/ScalarRepl/bitfield-sroa.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -; rdar://6532315 -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -%t = type { { i32, i16, i8, i8 } } - -define i8 @foo(i64 %A) { - %ALL = alloca %t, align 8 - %tmp59172 = bitcast %t* %ALL to i64* - store i64 %A, i64* %tmp59172, align 8 - %C = getelementptr %t, %t* %ALL, i32 0, i32 0, i32 1 - %D = bitcast i16* %C to i32* - %E = load i32, i32* %D, align 4 - %F = bitcast %t* %ALL to i8* - %G = load i8, i8* %F, align 8 - ret i8 %G -} - diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll deleted file mode 100644 index 97977dbf11f..00000000000 --- a/test/Transforms/ScalarRepl/copy-aggregate.ll +++ /dev/null @@ -1,107 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; PR3290 -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -;; Store of integer to whole alloca struct. -define i32 @test1(i64 %V) nounwind { -; CHECK: test1 -; CHECK-NOT: alloca - %X = alloca {{i32, i32}} - %Y = bitcast {{i32,i32}}* %X to i64* - store i64 %V, i64* %Y - - %A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0 - %B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1 - %a = load i32, i32* %A - %b = load i32, i32* %B - %c = add i32 %a, %b - ret i32 %c -} - -;; Store of integer to whole struct/array alloca. -define float @test2(i128 %V) nounwind { -; CHECK: test2 -; CHECK-NOT: alloca - %X = alloca {[4 x float]} - %Y = bitcast {[4 x float]}* %X to i128* - store i128 %V, i128* %Y - - %A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0 - %B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3 - %a = load float, float* %A - %b = load float, float* %B - %c = fadd float %a, %b - ret float %c -} - -;; Load of whole alloca struct as integer -define i64 @test3(i32 %a, i32 %b) nounwind { -; CHECK: test3 -; CHECK-NOT: alloca - %X = alloca {{i32, i32}} - - %A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0 - %B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1 - store i32 %a, i32* %A - store i32 %b, i32* %B - - %Y = bitcast {{i32,i32}}* %X to i64* - %Z = load i64, i64* %Y - ret i64 %Z -} - -;; load of integer from whole struct/array alloca. -define i128 @test4(float %a, float %b) nounwind { -; CHECK: test4 -; CHECK-NOT: alloca - %X = alloca {[4 x float]} - %A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0 - %B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3 - store float %a, float* %A - store float %b, float* %B - - %Y = bitcast {[4 x float]}* %X to i128* - %V = load i128, i128* %Y - ret i128 %V -} - -;; If the elements of a struct or array alloca contain padding, SROA can still -;; split up the alloca as long as there is no padding between the elements. -%padded = type { i16, i8 } -define void @test5([4 x %padded]* %p, [4 x %padded]* %q) { -entry: -; CHECK: test5 -; CHECK-NOT: i128 - %var = alloca [4 x %padded], align 4 - %vari8 = bitcast [4 x %padded]* %var to i8* - %pi8 = bitcast [4 x %padded]* %p to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false) - %qi8 = bitcast [4 x %padded]* %q to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false) - ret void -} - -;; Check that an array alloca can be split up when it is also accessed with -;; a load or store as a homogeneous structure with the same element type and -;; number of elements as the array. -%homogeneous = type { <8 x i16>, <8 x i16>, <8 x i16> } -%wrapped_array = type { [3 x <8 x i16>] } -define void @test6(i8* %p, %wrapped_array* %arr) { -entry: -; CHECK: test6 -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> - %var = alloca %wrapped_array, align 16 - %res = call %homogeneous @test6callee(i8* %p) - %varcast = bitcast %wrapped_array* %var to %homogeneous* - store %homogeneous %res, %homogeneous* %varcast - %tmp1 = bitcast %wrapped_array* %arr to i8* - %tmp2 = bitcast %wrapped_array* %var to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false) - ret void -} - -declare %homogeneous @test6callee(i8* nocapture) nounwind - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll deleted file mode 100644 index 72e9f090fb6..00000000000 --- a/test/Transforms/ScalarRepl/crash.ll +++ /dev/null @@ -1,286 +0,0 @@ -; RUN: opt -scalarrepl -disable-output < %s -; RUN: opt -scalarrepl-ssa -disable-output < %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" - -; PR9017 -define void @test1() nounwind readnone ssp { -entry: - %l_72 = alloca i32*, align 8 - unreachable - -for.cond: ; preds = %for.cond - %tmp1.i = load i32*, i32** %l_72, align 8 - store i32* %tmp1.i, i32** %l_72, align 8 - br label %for.cond - -if.end: ; No predecessors! - ret void -} - - -define void @test2() { - %E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } } ; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1] - %tmp.151 = getelementptr { { i32, float, double, i64 }, { i32, float, double, i64 } }, { { i32, float, double, i64 }, { i32, float, double, i64 } }* %E, i64 0, i32 1, i32 3 ; [#uses=0] - ret void -} - -define i32 @test3() { - %X = alloca { [4 x i32] } ; <{ [4 x i32] }*> [#uses=1] - %Y = getelementptr { [4 x i32] }, { [4 x i32] }* %X, i64 0, i32 0, i64 2 ; [#uses=2] - store i32 4, i32* %Y - %Z = load i32, i32* %Y ; [#uses=1] - ret i32 %Z -} - - -%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] } -%union.rtunion_def = type { i32 } - -define void @test4() { -entry: - %c_addr.i = alloca i8 ; [#uses=1] - switch i32 0, label %return [ - i32 36, label %label.7 - i32 34, label %label.7 - i32 41, label %label.5 - ] -label.5: ; preds = %entry - ret void -label.7: ; preds = %entry, %entry - br i1 false, label %then.4, label %switchexit.0 -then.4: ; preds = %label.7 - %tmp.0.i = bitcast i8* %c_addr.i to i32* ; [#uses=1] - store i32 44, i32* %tmp.0.i - ret void -switchexit.0: ; preds = %label.7 - ret void -return: ; preds = %entry - ret void -} - - -define void @test5() { -entry: - %source_ptr = alloca i8*, align 4 ; [#uses=2] - br i1 false, label %bb1357, label %cond_next583 -cond_next583: ; preds = %entry - ret void -bb1357: ; preds = %entry - br i1 false, label %bb1365, label %bb27055 -bb1365: ; preds = %bb1357 - switch i32 0, label %cond_next10377 [ - i32 0, label %bb4679 - i32 1, label %bb4679 - i32 2, label %bb4679 - i32 3, label %bb4679 - i32 4, label %bb5115 - i32 5, label %bb6651 - i32 6, label %bb7147 - i32 7, label %bb8683 - i32 8, label %bb9131 - i32 9, label %bb9875 - i32 10, label %bb4679 - i32 11, label %bb4859 - i32 12, label %bb4679 - i32 16, label %bb10249 - ] -bb4679: ; preds = %bb1365, %bb1365, %bb1365, %bb1365, %bb1365, %bb1365 - ret void -bb4859: ; preds = %bb1365 - ret void -bb5115: ; preds = %bb1365 - ret void -bb6651: ; preds = %bb1365 - ret void -bb7147: ; preds = %bb1365 - ret void -bb8683: ; preds = %bb1365 - ret void -bb9131: ; preds = %bb1365 - ret void -bb9875: ; preds = %bb1365 - %source_ptr9884 = bitcast i8** %source_ptr to i8** ; [#uses=1] - %tmp9885 = load i8*, i8** %source_ptr9884 ; [#uses=0] - ret void -bb10249: ; preds = %bb1365 - %source_ptr10257 = bitcast i8** %source_ptr to i16** ; [#uses=1] - %tmp10258 = load i16*, i16** %source_ptr10257 ; [#uses=0] - ret void -cond_next10377: ; preds = %bb1365 - ret void -bb27055: ; preds = %bb1357 - ret void -} - - - %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>" = type { %"struct.__gnu_cxx::bitmap_allocator::_Alloc_block"* } - %"struct.__gnu_cxx::bitmap_allocator" = type { i8 } - %"struct.__gnu_cxx::bitmap_allocator::_Alloc_block" = type { [8 x i8] } - -; PR1045 -define void @test6() { -entry: - %this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"* ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"**> [#uses=3] - %tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>", align 4 ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"*> [#uses=1] - store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"** %this_addr.i - %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"** %this_addr.i ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"*> [#uses=1] - %tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator"* ; <%"struct.__gnu_cxx::bitmap_allocator"*> [#uses=0] - %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"** %this_addr.i ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"*> [#uses=1] - %tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>", %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator::_Alloc_block*>"* %tmp1.i, i32 0, i32 0 ; <%"struct.__gnu_cxx::bitmap_allocator::_Alloc_block"**> [#uses=0] - unreachable -} - - %struct.CGPoint = type { float, float } - %struct.aal_big_range_t = type { i32, i32 } %struct.aal_callback_t = type { i8* (i8*, i32)*, void (i8*, i8*)* } %struct.aal_edge_pool_t = type { %struct.aal_edge_pool_t*, i32, i32, [0 x %struct.aal_edge_t] } %struct.aal_edge_t = type { %struct.CGPoint, %struct.CGPoint, i32 } - %struct.aal_range_t = type { i16, i16 } - %struct.aal_span_pool_t = type { %struct.aal_span_pool_t*, [341 x %struct.aal_span_t] } - %struct.aal_span_t = type { %struct.aal_span_t*, %struct.aal_big_range_t } - %struct.aal_spanarray_t = type { [2 x %struct.aal_range_t] } - %struct.aal_spanbucket_t = type { i16, [2 x i8], %struct.anon } - %struct.aal_state_t = type { %struct.CGPoint, %struct.CGPoint, %struct.CGPoint, i32, float, float, float, float, %struct.CGPoint, %struct.CGPoint, float, float, float, float, i32, i32, i32, i32, float, float, i8*, i32, i32, %struct.aal_edge_pool_t*, %struct.aal_edge_pool_t*, i8*, %struct.aal_callback_t*, i32, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_pool_t*, i8, float, i8, i32 } - %struct.anon = type { %struct.aal_spanarray_t } - - - -define fastcc void @test7() { -entry: - %SB = alloca %struct.aal_spanbucket_t, align 4 ; <%struct.aal_spanbucket_t*> [#uses=2] - br i1 false, label %cond_true, label %cond_next79 - -cond_true: ; preds = %entry - br i1 false, label %cond_next, label %cond_next114.i - -cond_next114.i: ; preds = %cond_true - ret void - -cond_next: ; preds = %cond_true - %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false) - br i1 false, label %cond_next34, label %cond_next79 - -cond_next34: ; preds = %cond_next - %i.2.reload22 = load i32, i32* null ; [#uses=1] - %tmp51 = getelementptr %struct.aal_spanbucket_t, %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1 - ; [#uses=0] - ret void - -cond_next79: ; preds = %cond_next, %entry - ret void -} - - - %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN } - %struct.c37304a__vrec___disc___XVN = type { -%struct.c37304a__vrec___disc___XVN___O } - %struct.c37304a__vrec___disc___XVN___O = type { } - -; PR3304 -define void @test8() { -entry: - %v = alloca %struct.c37304a__vrec - %0 = getelementptr %struct.c37304a__vrec, %struct.c37304a__vrec* %v, i32 0, i32 0 - store i8 8, i8* %0, align 1 - unreachable -} - - - -; rdar://6808691 - ZeroLengthMemSet - %0 = type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }> - -define i32 @test9() { -entry: - %.compoundliteral = alloca %0 - %tmp228 = getelementptr %0, %0* %.compoundliteral, i32 0, i32 7 - %tmp229 = bitcast [0 x i16]* %tmp228 to i8* - call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false) - unreachable -} - -declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind - - -; PR4146 - i1 handling -%wrapper = type { i1 } -define void @test10() { -entry: - %w = alloca %wrapper, align 8 ; <%wrapper*> [#uses=1] - %0 = getelementptr %wrapper, %wrapper* %w, i64 0, i32 0 ; - store i1 true, i1* %0 - ret void -} - - - %struct.singlebool = type <{ i8 }> -; PR4286 -define zeroext i8 @test11() nounwind { -entry: - %a = alloca %struct.singlebool, align 1 ; <%struct.singlebool*> [#uses=2] - %storetmp.i = bitcast %struct.singlebool* %a to i1* ; [#uses=1] - store i1 true, i1* %storetmp.i - %tmp = getelementptr %struct.singlebool, %struct.singlebool* %a, i64 0, i32 0 ; [#uses=1] - %tmp1 = load i8, i8* %tmp ; [#uses=1] - ret i8 %tmp1 -} - - - %struct.Item = type { [4 x i16], %struct.rule* } - %struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 } - %struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* } - %struct.plankMap = type { %struct.list*, i32, %struct.stateMap* } - %struct.list = type { i8*, %struct.list* } - %struct.stateMap = type { i8*, %struct.plank*, i32, i16* } - %struct.plank = type { i8*, %struct.list*, i32 } - %struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] } - %struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* } - %struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** } - %struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* } - %struct.Index_Map = type { i32, %struct.item_set** } - %struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* } - %struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** } - -; VLAs. -define void @test12() { -bb4.i: - %malloccall = tail call i8* @malloc(i32 0) - %0 = bitcast i8* %malloccall to [0 x %struct.Item]* - %.sub.i.c.i = getelementptr [0 x %struct.Item], [0 x %struct.Item]* %0, i32 0, i32 0 ; <%struct.Item*> [#uses=0] - unreachable -} -declare noalias i8* @malloc(i32) - -; PR8680 -define void @test13() nounwind { -entry: - %memtmp = alloca i32, align 4 - %0 = bitcast i32* %memtmp to void ()* - call void %0() nounwind - ret void -} - -; rdar://11861001 - The dynamic GEP here was incorrectly making all accesses -; to the alloca think they were also dynamic. Inserts and extracts created to -; access the vector were all being based from the dynamic access, even in BBs -; not dominated by the GEP. -define fastcc void @test() optsize inlinehint ssp align 2 { -entry: - %alloc.0.0 = alloca <4 x float>, align 16 - %bitcast = bitcast <4 x float>* %alloc.0.0 to [4 x float]* - %idx3 = getelementptr inbounds [4 x float], [4 x float]* %bitcast, i32 0, i32 3 - store float 0.000000e+00, float* %idx3, align 4 - br label %for.body10 - -for.body10: ; preds = %for.body10, %entry - %loopidx = phi i32 [ 0, %entry ], [ undef, %for.body10 ] - %unusedidx = getelementptr inbounds <4 x float>, <4 x float>* %alloc.0.0, i32 0, i32 %loopidx - br i1 undef, label %for.end, label %for.body10 - -for.end: ; preds = %for.body10 - store <4 x float> , <4 x float>* %alloc.0.0, align 16 - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll deleted file mode 100644 index 8dab20a735c..00000000000 --- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll +++ /dev/null @@ -1,64 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.6.0" - -; CHECK: f -; CHECK-NOT: llvm.dbg.declare -; CHECK: llvm.dbg.value -; CHECK: llvm.dbg.value -; CHECK: llvm.dbg.value -; CHECK: llvm.dbg.value -; CHECK: llvm.dbg.value - -define i32 @f(i32 %a, i32 %b) nounwind ssp !dbg !1 { -entry: - %a.addr = alloca i32, align 4 - %b.addr = alloca i32, align 4 - %c = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !6, metadata !DIExpression()), !dbg !7 - store i32 %b, i32* %b.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !8, metadata !DIExpression()), !dbg !9 - call void @llvm.dbg.declare(metadata i32* %c, metadata !10, metadata !DIExpression()), !dbg !12 - %tmp = load i32, i32* %a.addr, align 4, !dbg !13 - store i32 %tmp, i32* %c, align 4, !dbg !13 - %tmp1 = load i32, i32* %a.addr, align 4, !dbg !14 - %tmp2 = load i32, i32* %b.addr, align 4, !dbg !14 - %add = add nsw i32 %tmp1, %tmp2, !dbg !14 - store i32 %add, i32* %a.addr, align 4, !dbg !14 - %tmp3 = load i32, i32* %c, align 4, !dbg !15 - %tmp4 = load i32, i32* %b.addr, align 4, !dbg !15 - %sub = sub nsw i32 %tmp3, %tmp4, !dbg !15 - store i32 %sub, i32* %b.addr, align 4, !dbg !15 - %tmp5 = load i32, i32* %a.addr, align 4, !dbg !16 - %tmp6 = load i32, i32* %b.addr, align 4, !dbg !16 - %add7 = add nsw i32 %tmp5, %tmp6, !dbg !16 - ret i32 %add7, !dbg !16 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!20} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: FullDebug, file: !18, enums: !19, retainedTypes: !19) -!1 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !18, scope: !2, type: !3) -!2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b") -!3 = !DISubroutineType(types: !4) -!4 = !{!5} -!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!6 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5) -!7 = !DILocation(line: 1, column: 11, scope: !1) -!8 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5) -!9 = !DILocation(line: 1, column: 18, scope: !1) -!10 = !DILocalVariable(name: "c", line: 2, scope: !11, file: !2, type: !5) -!11 = distinct !DILexicalBlock(line: 1, column: 21, file: !18, scope: !1) -!12 = !DILocation(line: 2, column: 9, scope: !11) -!13 = !DILocation(line: 2, column: 14, scope: !11) -!14 = !DILocation(line: 3, column: 5, scope: !11) -!15 = !DILocation(line: 4, column: 5, scope: !11) -!16 = !DILocation(line: 5, column: 5, scope: !11) -!18 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b") -!19 = !{} -!20 = !{i32 1, !"Debug Info Version", i32 3} diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll deleted file mode 100644 index 85f37414e65..00000000000 --- a/test/Transforms/ScalarRepl/inline-vector.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10.0.0" - -%struct.Vector4 = type { float, float, float, float } -@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16 - -; CHECK-LABEL: define void @f( -; CHECK-NOT: alloca -; CHECK: phi <4 x float> - -define void @f() nounwind ssp { -entry: - %i = alloca i32, align 4 - %vector = alloca %struct.Vector4, align 16 - %agg.tmp = alloca %struct.Vector4, align 16 - %tmp = bitcast %struct.Vector4* %vector to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false) - br label %for.cond - -for.cond: ; preds = %for.body, %entry - %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ] - store i32 %storemerge, i32* %i, align 4 - %cmp = icmp slt i32 %storemerge, 1000000 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8* - %tmp3 = bitcast %struct.Vector4* %vector to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false) - %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]* - %1 = load [2 x i64], [2 x i64]* %0, align 16 - %tmp2.i = extractvalue [2 x i64] %1, 0 - %tmp3.i = zext i64 %tmp2.i to i128 - %tmp10.i = bitcast i128 %tmp3.i to <4 x float> - %sub.i.i = fsub <4 x float> , %tmp10.i - %2 = bitcast %struct.Vector4* %vector to <4 x float>* - store <4 x float> %sub.i.i, <4 x float>* %2, align 16 - %tmp4 = load i32, i32* %i, align 4 - %inc = add nsw i32 %tmp4, 1 - br label %for.cond - -for.end: ; preds = %for.cond - %x = getelementptr inbounds %struct.Vector4, %struct.Vector4* %vector, i32 0, i32 0 - %tmp5 = load float, float* %x, align 16 - %conv = fpext float %tmp5 to double - %call = call i32 (...) @printf(double %conv) nounwind - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare i32 @printf(...) diff --git a/test/Transforms/ScalarRepl/lifetime.ll b/test/Transforms/ScalarRepl/lifetime.ll deleted file mode 100644 index c0ddfb58bbd..00000000000 --- a/test/Transforms/ScalarRepl/lifetime.ll +++ /dev/null @@ -1,139 +0,0 @@ -; RUN: opt -scalarrepl -S < %s | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.lifetime.start(i64, i8*) -declare void @llvm.lifetime.end(i64, i8*) - -%t1 = type {i32, i32, i32} - -define void @test1() { -; CHECK-LABEL: @test1( - %A = alloca %t1 - %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0 - %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1 - %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2 - %B = bitcast i32* %A1 to i8* - store i32 0, i32* %A1 - call void @llvm.lifetime.start(i64 -1, i8* %B) - ret void -; CHECK-NEXT: ret void -} - -define void @test2() { -; CHECK-LABEL: @test2( - %A = alloca %t1 - %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0 - %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1 - %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2 - %B = bitcast i32* %A2 to i8* - store i32 0, i32* %A2 - call void @llvm.lifetime.start(i64 -1, i8* %B) - %C = load i32, i32* %A2 - ret void -; CHECK: ret void -} - -define void @test3() { -; CHECK-LABEL: @test3( - %A = alloca %t1 - %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0 - %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1 - %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2 - %B = bitcast i32* %A2 to i8* - store i32 0, i32* %A2 - call void @llvm.lifetime.start(i64 6, i8* %B) - %C = load i32, i32* %A2 - ret void -; CHECK-NEXT: ret void -} - -define void @test4() { -; CHECK-LABEL: @test4( - %A = alloca %t1 - %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0 - %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1 - %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2 - %B = bitcast i32* %A2 to i8* - store i32 0, i32* %A2 - call void @llvm.lifetime.start(i64 1, i8* %B) - %C = load i32, i32* %A2 - ret void -; CHECK-NEXT: ret void -} - -%t2 = type {i32, [4 x i8], i32} - -define void @test5() { -; CHECK-LABEL: @test5( - %A = alloca %t2 -; CHECK: alloca{{.*}}i8 -; CHECK: alloca{{.*}}i8 -; CHECK: alloca{{.*}}i8 - - %A21 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 0 - %A22 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 1 - %A23 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 2 - %A24 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 3 -; CHECK-NOT: store i8 1 - store i8 1, i8* %A21 - store i8 2, i8* %A22 - store i8 3, i8* %A23 - store i8 4, i8* %A24 - - %A1 = getelementptr %t2, %t2* %A, i32 0, i32 0 - %A2 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 1 - %A3 = getelementptr %t2, %t2* %A, i32 0, i32 2 - store i8 0, i8* %A2 - call void @llvm.lifetime.start(i64 5, i8* %A2) -; CHECK: llvm.lifetime{{.*}}i64 1 -; CHECK: llvm.lifetime{{.*}}i64 1 -; CHECK: llvm.lifetime{{.*}}i64 1 - %C = load i8, i8* %A2 - ret void -} - -%t3 = type {[4 x i16], [4 x i8]} - -define void @test6() { -; CHECK-LABEL: @test6( - %A = alloca %t3 -; CHECK: alloca i8 -; CHECK: alloca i8 -; CHECK: alloca i8 - - %A11 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 0 - %A12 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 1 - %A13 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 2 - %A14 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 3 - store i16 11, i16* %A11 - store i16 12, i16* %A12 - store i16 13, i16* %A13 - store i16 14, i16* %A14 -; CHECK-NOT: store i16 11 -; CHECK-NOT: store i16 12 -; CHECK-NOT: store i16 13 -; CHECK-NOT: store i16 14 - - %A21 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 0 - %A22 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 1 - %A23 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 2 - %A24 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 3 - store i8 21, i8* %A21 - store i8 22, i8* %A22 - store i8 23, i8* %A23 - store i8 24, i8* %A24 -; CHECK: store i8 21 -; CHECK: store i8 22 -; CHECK: store i8 23 -; CHECK-NOT: store i8 24 - - %B = bitcast i16* %A13 to i8* - call void @llvm.lifetime.start(i64 7, i8* %B) -; CHECK: lifetime.start{{.*}}i64 1 -; CHECK: lifetime.start{{.*}}i64 1 -; CHECK: lifetime.start{{.*}}i64 1 - - ret void -} diff --git a/test/Transforms/ScalarRepl/load-store-aggregate.ll b/test/Transforms/ScalarRepl/load-store-aggregate.ll deleted file mode 100644 index 88299f3679c..00000000000 --- a/test/Transforms/ScalarRepl/load-store-aggregate.ll +++ /dev/null @@ -1,31 +0,0 @@ -; This testcase shows that scalarrepl is able to replace struct alloca's which -; are directly loaded from or stored to (using the first class aggregates -; feature). -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -; RUN: opt < %s -scalarrepl -S > %t -; RUN: cat %t | not grep alloca - -%struct.foo = type { i32, i32 } - -define i32 @test(%struct.foo* %P) { -entry: - %L = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2] - %V = load %struct.foo, %struct.foo* %P - store %struct.foo %V, %struct.foo* %L - - %tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 ; [#uses=1] - %tmp5 = load i32, i32* %tmp4 ; [#uses=1] - ret i32 %tmp5 -} - -define %struct.foo @test2(i32 %A, i32 %B) { -entry: - %L = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2] - %L.0 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 - store i32 %A, i32* %L.0 - %L.1 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 1 - store i32 %B, i32* %L.1 - %V = load %struct.foo, %struct.foo* %L - ret %struct.foo %V -} diff --git a/test/Transforms/ScalarRepl/memcpy-align.ll b/test/Transforms/ScalarRepl/memcpy-align.ll deleted file mode 100644 index 29a1bb8a4b5..00000000000 --- a/test/Transforms/ScalarRepl/memcpy-align.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: opt -scalarrepl -S < %s | FileCheck %s -; PR6832 -target datalayout = -"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -target triple = "arm-u-u" - -%0 = type { %struct.anon, %struct.anon } -%struct.anon = type { [4 x i8] } - -@c = external global %0 ; <%0*> [#uses=1] - -define void @good() nounwind { -entry: - %x0 = alloca %struct.anon, align 4 ; <%struct.anon*> [#uses=2] - %tmp = bitcast %struct.anon* %x0 to i8* ; [#uses=1] - call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false) - %tmp1 = bitcast %struct.anon* %x0 to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0, %0* @c, i32 -0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false) - ret void - -; CHECK: store i8 0, i8*{{.*}}, align 4 -; CHECK: store i8 0, i8*{{.*}}, align 1 -; CHECK: store i8 0, i8*{{.*}}, align 2 -; CHECK: store i8 0, i8*{{.*}}, align 1 -} - -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, -i1) nounwind - diff --git a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll deleted file mode 100644 index e8088c121fb..00000000000 --- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll +++ /dev/null @@ -1,23 +0,0 @@ -; PR1226 -; RUN: opt < %s -scalarrepl -S | \ -; RUN: not grep "call void @llvm.memcpy.p0i8.p0i8.i32" -; RUN: opt < %s -scalarrepl -S | grep getelementptr -; END. - -target datalayout = "E-p:32:32" -target triple = "powerpc-apple-darwin8.8.0" - %struct.foo = type { i8, i8 } - - -define i32 @test1(%struct.foo* %P) { -entry: - %L = alloca %struct.foo, align 2 ; <%struct.foo*> [#uses=1] - %L2 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 ; [#uses=2] - %tmp13 = getelementptr %struct.foo, %struct.foo* %P, i32 0, i32 0 ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false) - %tmp5 = load i8, i8* %L2 ; [#uses=1] - %tmp56 = sext i8 %tmp5 to i32 ; [#uses=1] - ret i32 %tmp56 -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll deleted file mode 100644 index 98e2dddefe7..00000000000 --- a/test/Transforms/ScalarRepl/memset-aggregate.ll +++ /dev/null @@ -1,67 +0,0 @@ -; PR1226 -; RUN: opt < %s -scalarrepl -S | grep "ret i32 16843009" -; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i16 514" - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "i686-apple-darwin8" - %struct.bar = type { %struct.foo, i64, double } - %struct.foo = type { i32, i32 } - - -define i32 @test1(%struct.foo* %P) { -entry: - %L = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2] - %L2 = bitcast %struct.foo* %L to i8* ; [#uses=1] - %tmp13 = bitcast %struct.foo* %P to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false) - %tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 ; [#uses=1] - %tmp5 = load i32, i32* %tmp4 ; [#uses=1] - ret i32 %tmp5 -} - - -define i32 @test2() { -entry: - %L = alloca [4 x %struct.foo], align 16 ; <[4 x %struct.foo]*> [#uses=2] - %L12 = bitcast [4 x %struct.foo]* %L to i8* ; [#uses=1] - call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false) - %tmp4 = getelementptr [4 x %struct.foo], [4 x %struct.foo]* %L, i32 0, i32 0, i32 0 ; [#uses=1] - %tmp5 = load i32, i32* %tmp4 ; [#uses=1] - ret i32 %tmp5 -} - - -define i32 @test3() { -entry: - %B = alloca %struct.bar, align 16 ; <%struct.bar*> [#uses=4] - %B1 = bitcast %struct.bar* %B to i8* ; [#uses=1] - call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false) - %tmp3 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 0 ; [#uses=1] - store i32 1, i32* %tmp3 - %tmp4 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 2 ; [#uses=1] - store double 1.000000e+01, double* %tmp4 - %tmp6 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 1 ; [#uses=1] - %tmp7 = load i32, i32* %tmp6 ; [#uses=1] - ret i32 %tmp7 -} - - - %struct.f = type { i32, i32, i32, i32, i32, i32 } - -define i16 @test4() nounwind { -entry: - %A = alloca %struct.f, align 8 ; <%struct.f*> [#uses=3] - %0 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 0 ; [#uses=1] - store i32 1, i32* %0, align 8 - %1 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 1 ; [#uses=1] - %2 = bitcast i32* %1 to i8* ; [#uses=1] - call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false) - %3 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 2 ; [#uses=1] - %4 = load i32, i32* %3, align 8 ; [#uses=1] - %retval12 = trunc i32 %4 to i16 ; [#uses=1] - ret i16 %retval12 -} -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind - -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/negative-memset.ll b/test/Transforms/ScalarRepl/negative-memset.ll deleted file mode 100644 index 458d9610cd7..00000000000 --- a/test/Transforms/ScalarRepl/negative-memset.ll +++ /dev/null @@ -1,20 +0,0 @@ -; PR12202 -; RUN: opt < %s -scalarrepl -S -; Ensure that we do not hang or crash when feeding a negative value to memset - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32" -target triple = "i686-pc-win32" - -define i32 @test() nounwind { -entry: - %retval = alloca i32, align 4 - %buff = alloca [1 x i8], align 1 - store i32 0, i32* %retval - %0 = bitcast [1 x i8]* %buff to i8* - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) - %arraydecay = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false) ; Negative 8! - ret i32 0 -} - -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/nonzero-first-index.ll b/test/Transforms/ScalarRepl/nonzero-first-index.ll deleted file mode 100644 index da757b08d45..00000000000 --- a/test/Transforms/ScalarRepl/nonzero-first-index.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -target triple = "i386-pc-linux-gnu" - -%nested = type { i32, [4 x i32] } - -; Check that a GEP with a non-zero first index does not prevent SROA as long -; as the resulting offset corresponds to an element in the alloca. -define i32 @test1() { -; CHECK-LABEL: @test1( -; CHECK-NOT: = i160 -; CHECK: ret i32 undef - %A = alloca %nested - %B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0 - %C = getelementptr i32, i32* %B, i32 2 - %D = load i32, i32* %C - ret i32 %D -} - -; But, if the offset is out of range, then it should not be transformed. -define i32 @test2() { -; CHECK-LABEL: @test2( -; CHECK: i160 - %A = alloca %nested - %B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0 - %C = getelementptr i32, i32* %B, i32 4 - %D = load i32, i32* %C - ret i32 %D -} - -; Try it with a bitcast and single GEP.... -define i32 @test3() { -; CHECK-LABEL: @test3( -; CHECK-NOT: = i160 -; CHECK: ret i32 undef - %A = alloca %nested - %B = bitcast %nested* %A to i32* - %C = getelementptr i32, i32* %B, i32 2 - %D = load i32, i32* %C - ret i32 %D -} - -; ...and again make sure that out-of-range accesses are not transformed. -define i32 @test4() { -; CHECK-LABEL: @test4( -; CHECK: i160 - %A = alloca %nested - %B = bitcast %nested* %A to i32* - %C = getelementptr i32, i32* %B, i32 -1 - %D = load i32, i32* %C - ret i32 %D -} diff --git a/test/Transforms/ScalarRepl/not-a-vector.ll b/test/Transforms/ScalarRepl/not-a-vector.ll deleted file mode 100644 index 04c1f93617b..00000000000 --- a/test/Transforms/ScalarRepl/not-a-vector.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -S | not grep "7 x double" -; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret double %B" -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define double @test(double %A, double %B) { - %ARR = alloca [7 x i64] - %C = bitcast [7 x i64]* %ARR to double* - store double %A, double* %C - - %D = getelementptr [7 x i64], [7 x i64]* %ARR, i32 0, i32 4 - %E = bitcast i64* %D to double* - store double %B, double* %E - - %F = getelementptr double, double* %C, i32 4 - %G = load double, double* %F - ret double %G -} - - diff --git a/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/test/Transforms/ScalarRepl/only-memcpy-uses.ll deleted file mode 100644 index d0ed20b26bc..00000000000 --- a/test/Transforms/ScalarRepl/only-memcpy-uses.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" - -%struct.S = type { [12 x i32] } - -; CHECK-LABEL: @bar4( -define void @bar4(%struct.S* byval %s) nounwind ssp { -entry: -; CHECK: alloca -; CHECK-NOT: load -; CHECK: memcpy - %t = alloca %struct.S, align 4 - %agg.tmp = alloca %struct.S, align 4 - %tmp = bitcast %struct.S* %t to i8* - %tmp1 = bitcast %struct.S* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false) - %tmp2 = bitcast %struct.S* %agg.tmp to i8* - %tmp3 = bitcast %struct.S* %t to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false) - %call = call i32 (...) @bazz(%struct.S* byval %agg.tmp) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind - -declare i32 @bazz(...) diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll deleted file mode 100644 index a44f0818064..00000000000 --- a/test/Transforms/ScalarRepl/phi-cycle.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s -; rdar://10589171 - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.foo = type { i32, i32 } - -@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1 - -define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable { -entry: - %f = alloca %struct.foo, align 4 - %x.i = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0 - store i32 1, i32* %x.i, align 4 - %y.i = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 1 - br label %while.cond.i - -; CHECK: while.cond.i: -; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ] -; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ] -; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ] -; CHECK-NOT: phi -while.cond.i: ; preds = %while.cond.backedge.i, %entry - %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ] - %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ] - %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ] - %cmp.i = icmp sgt i32 %left.0.i, 0 - br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge - -while.cond.i.func.exit_crit_edge: ; preds = %while.cond.i - br label %func.exit - -while.body.i: ; preds = %while.cond.i - %dec.i = add nsw i32 %left.0.i, -1 - switch i32 1, label %while.body.i.func.exit_crit_edge [ - i32 0, label %while.cond.backedge.i - i32 1, label %sw.bb.i - ] - -while.body.i.func.exit_crit_edge: ; preds = %while.body.i - br label %func.exit - -sw.bb.i: ; preds = %while.body.i - %cmp2.i = icmp eq i32 %tmp, 1 - br i1 %cmp2.i, label %if.then.i, label %if.end.i - -if.then.i: ; preds = %sw.bb.i - store i32 %pos.0.i, i32* %x.i, align 4 - br label %if.end.i - -; CHECK: if.end.i: -; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ] -; CHECK-NOT: phi -if.end.i: ; preds = %if.then.i, %sw.bb.i - %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ] - store i32 %tmp1, i32* %y.i, align 4 - br label %while.cond.backedge.i - -; CHECK: while.cond.backedge.i: -; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ] -; CHECK-NOT: phi -while.cond.backedge.i: ; preds = %if.end.i, %while.body.i - %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ] - %xtmp.i = add i32 %pos.0.i, 1 - br label %while.cond.i - -; CHECK: func.exit: -; CHECK-NOT: load -; CHECK: %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]] -func.exit: ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge - %tmp3 = load i32, i32* %x.i, align 4 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind - ret i32 0 -} - -declare i32 @printf(i8* nocapture, ...) nounwind - -; CHECK: attributes #0 = { nounwind uwtable } -; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll deleted file mode 100644 index a6c7135a492..00000000000 --- a/test/Transforms/ScalarRepl/phi-select.ll +++ /dev/null @@ -1,153 +0,0 @@ -; RUN: opt -scalarrepl -S < %s | FileCheck %s -; Test promotion of allocas that have phis and select users. -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.2" - -%struct.X = type { i32 } -%PairTy = type {i32, i32} - -; CHECK-LABEL: @test1( -; CHECK: %a.0 = alloca i32 -; CHECK: %b.0 = alloca i32 -define i32 @test1(i32 %x) nounwind readnone ssp { -entry: - %a = alloca %struct.X, align 8 ; <%struct.X*> [#uses=2] - %b = alloca %struct.X, align 8 ; <%struct.X*> [#uses=2] - %0 = getelementptr inbounds %struct.X, %struct.X* %a, i64 0, i32 0 ; [#uses=1] - store i32 1, i32* %0, align 8 - %1 = getelementptr inbounds %struct.X, %struct.X* %b, i64 0, i32 0 ; [#uses=1] - store i32 2, i32* %1, align 8 - %2 = icmp eq i32 %x, 0 ; [#uses=1] - %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1] - %3 = getelementptr inbounds %struct.X, %struct.X* %p.0, i64 0, i32 0 ; [#uses=1] - %4 = load i32, i32* %3, align 8 ; [#uses=1] - ret i32 %4 -} - -; CHECK-LABEL: @test2( -; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ] -; CHECK-NEXT: ret i32 %X.ld -define i32 @test2(i1 %c) { -entry: - %A = alloca {i32, i32} - %B = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 0 - store i32 1, i32* %B - br i1 %c, label %T, label %F -T: - %C = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 1 - store i32 2, i32* %C - br label %F -F: - %X = phi i32* [%B, %entry], [%C, %T] - %Q = load i32, i32* %X - ret i32 %Q -} - -; CHECK-LABEL: @test3( -; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2 -; CHECK-NEXT: ret i32 %Q -; rdar://8904039 -define i32 @test3(i1 %c) { - %A = alloca {i32, i32} - %B = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 0 - store i32 1, i32* %B - %C = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 1 - store i32 2, i32* %C - - %X = select i1 %c, i32* %B, i32* %C - %Q = load i32, i32* %X - ret i32 %Q -} - -;; We can't scalarize this, a use of the select is not an element access. -define i64 @test4(i1 %c) { -entry: - %A = alloca %PairTy - ; CHECK-LABEL: @test4( - ; CHECK: %A = alloca %PairTy - %B = getelementptr %PairTy, %PairTy* %A, i32 0, i32 0 - store i32 1, i32* %B - %C = getelementptr %PairTy, %PairTy* %A, i32 0, i32 1 - store i32 2, i32* %B - - %X = select i1 %c, i32* %B, i32* %C - %Y = bitcast i32* %X to i64* - %Q = load i64, i64* %Y - ret i64 %Q -} - - -;; -;; Tests for promoting allocas used by selects. -;; rdar://7339113 -;; - -define i32 @test5(i32 *%P) nounwind readnone ssp { -entry: - %b = alloca i32, align 8 - store i32 2, i32* %b, align 8 - - ;; Select on constant condition should be folded. - %p.0 = select i1 false, i32* %b, i32* %P - store i32 123, i32* %p.0 - - %r = load i32, i32* %b, align 8 - ret i32 %r - -; CHECK-LABEL: @test5( -; CHECK: store i32 123, i32* %P -; CHECK: ret i32 2 -} - -define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp { - %a = alloca i32, align 8 - %b = alloca i32, align 8 - store i32 1, i32* %a, align 8 - store i32 2, i32* %b, align 8 - %p.0 = select i1 %c, i32* %b, i32* %a - %r = load i32, i32* %p.0, align 8 - ret i32 %r -; CHECK-LABEL: @test6( -; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1 -; CHECK-NEXT: ret i32 %r -} - -; Verify that the loads happen where the loads are, not where the select is. -define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp { - %a = alloca i32, align 8 - %b = alloca i32, align 8 - store i32 1, i32* %a - store i32 2, i32* %b - %p.0 = select i1 %c, i32* %b, i32* %a - - store i32 0, i32* %a - - %r = load i32, i32* %p.0, align 8 - ret i32 %r -; CHECK-LABEL: @test7( -; CHECK-NOT: alloca i32 -; CHECK: %r = select i1 %c, i32 2, i32 0 -; CHECK: ret i32 %r -} - -;; Promote allocs that are PHI'd together by moving the loads. -define i32 @test8(i32 %x) nounwind readnone ssp { -; CHECK-LABEL: @test8( -; CHECK-NOT: load i32 -; CHECK-NOT: store i32 -; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ] -; CHECK-NEXT: ret i32 %p.0.ld -entry: - %a = alloca i32, align 8 - %b = alloca i32, align 8 - store i32 1, i32* %a, align 8 - store i32 2, i32* %b, align 8 - %c = icmp eq i32 %x, 0 - br i1 %c, label %T, label %Cont -T: - br label %Cont -Cont: - %p.0 = phi i32* [%b, %entry],[%a, %T] - %r = load i32, i32* %p.0, align 8 - ret i32 %r -} diff --git a/test/Transforms/ScalarRepl/phinodepromote.ll b/test/Transforms/ScalarRepl/phinodepromote.ll deleted file mode 100644 index c3af62485db..00000000000 --- a/test/Transforms/ScalarRepl/phinodepromote.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: opt < %s -simplifycfg -instcombine -mem2reg -S | not grep alloca -; -; This tests to see if mem2reg can promote alloca instructions whose addresses -; are used by PHI nodes that are immediately loaded. The LLVM C++ front-end -; often generates code that looks like this (when it codegen's ?: exprs as -; lvalues), so handling this simple extension is quite useful. -; -; This testcase is what the following program looks like when it reaches -; instcombine: -; -; template -; const T& max(const T& a1, const T& a2) { return a1 < a2 ? a1 : a2; } -; int main() { return max(0, 1); } -; -; This test checks to make sure the combination of instcombine and mem2reg -; perform the transformation. - -define i32 @main() { -entry: - %mem_tmp.0 = alloca i32 ; [#uses=3] - %mem_tmp.1 = alloca i32 ; [#uses=3] - store i32 0, i32* %mem_tmp.0 - store i32 1, i32* %mem_tmp.1 - %tmp.1.i = load i32, i32* %mem_tmp.1 ; [#uses=1] - %tmp.3.i = load i32, i32* %mem_tmp.0 ; [#uses=1] - %tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i ; [#uses=1] - br i1 %tmp.4.i, label %cond_true.i, label %cond_continue.i -cond_true.i: ; preds = %entry - br label %cond_continue.i -cond_continue.i: ; preds = %cond_true.i, %entry - %mem_tmp.i.0 = phi i32* [ %mem_tmp.1, %cond_true.i ], [ %mem_tmp.0, %entry ] ; [#uses=1] - %tmp.3 = load i32, i32* %mem_tmp.i.0 ; [#uses=1] - ret i32 %tmp.3 -} diff --git a/test/Transforms/ScalarRepl/select_promote.ll b/test/Transforms/ScalarRepl/select_promote.ll deleted file mode 100644 index b4ef8c46fa0..00000000000 --- a/test/Transforms/ScalarRepl/select_promote.ll +++ /dev/null @@ -1,18 +0,0 @@ -; Test promotion of loads that use the result of a select instruction. This -; should be simplified by the instcombine pass. - -; RUN: opt < %s -instcombine -mem2reg -S | not grep alloca - -define i32 @main() { - %mem_tmp.0 = alloca i32 ; [#uses=3] - %mem_tmp.1 = alloca i32 ; [#uses=3] - store i32 0, i32* %mem_tmp.0 - store i32 1, i32* %mem_tmp.1 - %tmp.1.i = load i32, i32* %mem_tmp.1 ; [#uses=1] - %tmp.3.i = load i32, i32* %mem_tmp.0 ; [#uses=1] - %tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i ; [#uses=1] - %mem_tmp.i.0 = select i1 %tmp.4.i, i32* %mem_tmp.1, i32* %mem_tmp.0 ; [#uses=1] - %tmp.3 = load i32, i32* %mem_tmp.i.0 ; [#uses=1] - ret i32 %tmp.3 -} - diff --git a/test/Transforms/ScalarRepl/sroa-fca.ll b/test/Transforms/ScalarRepl/sroa-fca.ll deleted file mode 100644 index c6e7c23ab06..00000000000 --- a/test/Transforms/ScalarRepl/sroa-fca.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt < %s -scalarrepl | llvm-dis -; Make sure that SROA "scalar conversion" can handle first class aggregates. - -define i64 @test({i32, i32} %A) { - %X = alloca i64 - %Y = bitcast i64* %X to {i32,i32}* - store {i32,i32} %A, {i32,i32}* %Y - - %Q = load i64, i64* %X - ret i64 %Q -} - -define {i32,i32} @test2(i64 %A) { - %X = alloca i64 - %Y = bitcast i64* %X to {i32,i32}* - store i64 %A, i64* %X - - %Q = load {i32,i32}, {i32,i32}* %Y - ret {i32,i32} %Q -} - diff --git a/test/Transforms/ScalarRepl/sroa_two.ll b/test/Transforms/ScalarRepl/sroa_two.ll deleted file mode 100644 index f2285ef21a4..00000000000 --- a/test/Transforms/ScalarRepl/sroa_two.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -scalarrepl | llvm-dis - -define i32 @test(i32 %X) { - %Arr = alloca [2 x i32] ; <[2 x i32]*> [#uses=3] - %tmp.0 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 0 ; [#uses=1] - store i32 1, i32* %tmp.0 - %tmp.1 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 1 ; [#uses=1] - store i32 2, i32* %tmp.1 - %tmp.3 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 %X ; [#uses=1] - %tmp.4 = load i32, i32* %tmp.3 ; [#uses=1] - ret i32 %tmp.4 -} - diff --git a/test/Transforms/ScalarRepl/union-fp-int.ll b/test/Transforms/ScalarRepl/union-fp-int.ll deleted file mode 100644 index fa64b60685f..00000000000 --- a/test/Transforms/ScalarRepl/union-fp-int.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | \ -; RUN: not grep alloca -; RUN: opt < %s -scalarrepl -S | \ -; RUN: grep "bitcast.*float.*i32" -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define i32 @test(float %X) { - %X_addr = alloca float ; [#uses=2] - store float %X, float* %X_addr - %X_addr.upgrd.1 = bitcast float* %X_addr to i32* ; [#uses=1] - %tmp = load i32, i32* %X_addr.upgrd.1 ; [#uses=1] - ret i32 %tmp -} - diff --git a/test/Transforms/ScalarRepl/union-packed.ll b/test/Transforms/ScalarRepl/union-packed.ll deleted file mode 100644 index 741de76d581..00000000000 --- a/test/Transforms/ScalarRepl/union-packed.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | \ -; RUN: not grep alloca -; RUN: opt < %s -scalarrepl -S | \ -; RUN: grep bitcast -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define <4 x i32> @test(<4 x float> %X) { - %X_addr = alloca <4 x float> ; <<4 x float>*> [#uses=2] - store <4 x float> %X, <4 x float>* %X_addr - %X_addr.upgrd.1 = bitcast <4 x float>* %X_addr to <4 x i32>* ; <<4 x i32>*> [#uses=1] - %tmp = load <4 x i32>, <4 x i32>* %X_addr.upgrd.1 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp -} - diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll deleted file mode 100644 index 6a5db1c4d6b..00000000000 --- a/test/Transforms/ScalarRepl/union-pointer.ll +++ /dev/null @@ -1,74 +0,0 @@ -; PR892 -; RUN: opt < %s -scalarrepl -S | FileCheck %s - - -target datalayout = "e-p:32:32-p1:16:16-n8:16:32" -target triple = "i686-apple-darwin8.7.2" - -%struct.Val = type { i32*, i32 } - -define i8* @test(i16* %X) { -; CHECK-LABEL: @test( -; CHECK-NOT: alloca -; CHECK: ret i8* - %X_addr = alloca i16* ; [#uses=2] - store i16* %X, i16** %X_addr - %X_addr.upgrd.1 = bitcast i16** %X_addr to i8** ; [#uses=1] - %tmp = load i8*, i8** %X_addr.upgrd.1 ; [#uses=1] - ret i8* %tmp -} - -define i8 addrspace(1)* @test_as1(i16 addrspace(1)* %x) { -; CHECK-LABEL: @test_as1( -; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16 -; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)* -; CHECK-NEXT: ret i8 addrspace(1)* %2 - %x_addr = alloca i16 addrspace(1)* - store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr - %x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)** - %tmp = load i8 addrspace(1)*, i8 addrspace(1)** %x_addr.upgrd.1 - ret i8 addrspace(1)* %tmp -} - -define i8 addrspace(1)* @test_as1_array(i16 addrspace(1)* %x) { -; CHECK-LABEL: @test_as1_array( -; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16 -; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)* -; CHECK-NEXT: ret i8 addrspace(1)* %2 - %as_ptr_array = alloca [4 x i16 addrspace(1)*] - %elem1 = getelementptr [4 x i16 addrspace(1)*], [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1 - store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1 - %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)** - %tmp = load i8 addrspace(1)*, i8 addrspace(1)** %elem1.cast - ret i8 addrspace(1)* %tmp -} - - -define void @test2(i64 %Op.0) { -; CHECK-LABEL: @test2( -; CHECK-NOT: alloca -; CHECK: ret void - - %tmp = alloca %struct.Val, align 8 ; <%struct.Val*> [#uses=3] - %tmp1 = alloca %struct.Val, align 8 ; <%struct.Val*> [#uses=3] - %tmp.upgrd.2 = call i64 @_Z3foov( ) ; [#uses=1] - %tmp1.upgrd.3 = bitcast %struct.Val* %tmp1 to i64* ; [#uses=1] - store i64 %tmp.upgrd.2, i64* %tmp1.upgrd.3 - %tmp.upgrd.4 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 0 ; [#uses=1] - %tmp2 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 0 ; [#uses=1] - %tmp.upgrd.5 = load i32*, i32** %tmp2 ; [#uses=1] - store i32* %tmp.upgrd.5, i32** %tmp.upgrd.4 - %tmp3 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 1 ; [#uses=1] - %tmp4 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 1 ; [#uses=1] - %tmp.upgrd.6 = load i32, i32* %tmp4 ; [#uses=1] - store i32 %tmp.upgrd.6, i32* %tmp3 - %tmp7 = bitcast %struct.Val* %tmp to { i64 }* ; <{ i64 }*> [#uses=1] - %tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i32 0, i32 0 ; [#uses=1] - %tmp9 = load i64, i64* %tmp8 ; [#uses=1] - call void @_Z3bar3ValS_( i64 %Op.0, i64 %tmp9 ) - ret void -} - -declare i64 @_Z3foov() - -declare void @_Z3bar3ValS_(i64, i64) diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll deleted file mode 100644 index 031ad5e38ca..00000000000 --- a/test/Transforms/ScalarRepl/vector_memcpy.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: opt < %s -scalarrepl -S > %t -; RUN: grep "ret <16 x float> %A" %t -; RUN: grep "ret <16 x float> zeroinitializer" %t -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - -define <16 x float> @foo(<16 x float> %A) nounwind { - %tmp = alloca <16 x float>, align 16 - %tmp2 = alloca <16 x float>, align 16 - store <16 x float> %A, <16 x float>* %tmp - %s = bitcast <16 x float>* %tmp to i8* - %s2 = bitcast <16 x float>* %tmp2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false) - %R = load <16 x float>, <16 x float>* %tmp2 - ret <16 x float> %R -} - -define <16 x float> @foo2(<16 x float> %A) nounwind { - %tmp2 = alloca <16 x float>, align 16 - - %s2 = bitcast <16 x float>* %tmp2 to i8* - call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false) - - %R = load <16 x float>, <16 x float>* %tmp2 - ret <16 x float> %R -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll deleted file mode 100644 index 3c2377fc0f2..00000000000 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ /dev/null @@ -1,137 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" -target triple = "x86_64-apple-darwin10.0.0" - -define void @test1(<4 x float>* %F, float %f) { -entry: - %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3] - %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2] - %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1] - store <4 x float> %tmp3, <4 x float>* %G - %G.upgrd.1 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0 ; [#uses=1] - store float %f, float* %G.upgrd.1 - %tmp4 = load <4 x float>, <4 x float>* %G ; <<4 x float>> [#uses=2] - %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp6, <4 x float>* %F - ret void -; CHECK-LABEL: @test1( -; CHECK-NOT: alloca -; CHECK: %tmp = load <4 x float>, <4 x float>* %F -; CHECK: fadd <4 x float> %tmp, %tmp -; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0 -} - -define void @test2(<4 x float>* %F, float %f) { -entry: - %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3] - %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2] - %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1] - store <4 x float> %tmp3, <4 x float>* %G - %tmp.upgrd.2 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2 ; [#uses=1] - store float %f, float* %tmp.upgrd.2 - %tmp4 = load <4 x float>, <4 x float>* %G ; <<4 x float>> [#uses=2] - %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1] - store <4 x float> %tmp6, <4 x float>* %F - ret void -; CHECK-LABEL: @test2( -; CHECK-NOT: alloca -; CHECK: %tmp = load <4 x float>, <4 x float>* %F -; CHECK: fadd <4 x float> %tmp, %tmp -; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2 -} - -define void @test3(<4 x float>* %F, float* %f) { -entry: - %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2] - %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2] - %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1] - store <4 x float> %tmp3, <4 x float>* %G - %tmp.upgrd.3 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2 ; [#uses=1] - %tmp.upgrd.4 = load float, float* %tmp.upgrd.3 ; [#uses=1] - store float %tmp.upgrd.4, float* %f - ret void -; CHECK-LABEL: @test3( -; CHECK-NOT: alloca -; CHECK: %tmp = load <4 x float>, <4 x float>* %F -; CHECK: fadd <4 x float> %tmp, %tmp -; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2 -} - -define void @test4(<4 x float>* %F, float* %f) { -entry: - %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2] - %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2] - %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1] - store <4 x float> %tmp3, <4 x float>* %G - %G.upgrd.5 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0 ; [#uses=1] - %tmp.upgrd.6 = load float, float* %G.upgrd.5 ; [#uses=1] - store float %tmp.upgrd.6, float* %f - ret void -; CHECK-LABEL: @test4( -; CHECK-NOT: alloca -; CHECK: %tmp = load <4 x float>, <4 x float>* %F -; CHECK: fadd <4 x float> %tmp, %tmp -; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0 -} - -define i32 @test5(float %X) { ;; should turn into bitcast. - %X_addr = alloca [4 x float] - %X1 = getelementptr [4 x float], [4 x float]* %X_addr, i32 0, i32 2 - store float %X, float* %X1 - %a = bitcast float* %X1 to i32* - %tmp = load i32, i32* %a - ret i32 %tmp -; CHECK-LABEL: @test5( -; CHECK-NEXT: bitcast float %X to i32 -; CHECK-NEXT: ret i32 -} - -define i64 @test6(<2 x float> %X) { - %X_addr = alloca <2 x float> - store <2 x float> %X, <2 x float>* %X_addr - %P = bitcast <2 x float>* %X_addr to i64* - %tmp = load i64, i64* %P - ret i64 %tmp -; CHECK-LABEL: @test6( -; CHECK: bitcast <2 x float> %X to i64 -; CHECK: ret i64 -} - -%struct.test7 = type { [6 x i32] } - -define void @test7() { -entry: - %memtmp = alloca %struct.test7, align 16 - %0 = bitcast %struct.test7* %memtmp to <4 x i32>* - store <4 x i32> zeroinitializer, <4 x i32>* %0, align 16 - %1 = getelementptr inbounds %struct.test7, %struct.test7* %memtmp, i64 0, i32 0, i64 5 - store i32 0, i32* %1, align 4 - ret void -; CHECK-LABEL: @test7( -; CHECK-NOT: alloca -; CHECK: and i192 -} - -; When promoting an alloca to a 1-element vector type, instructions that -; produce that same vector type should not be changed to insert one element -; into a new vector. -define <1 x i64> @test8(<1 x i64> %a) { -entry: - %a.addr = alloca <1 x i64>, align 8 - %__a = alloca <1 x i64>, align 8 - %tmp = alloca <1 x i64>, align 8 - store <1 x i64> %a, <1 x i64>* %a.addr, align 8 - %0 = load <1 x i64>, <1 x i64>* %a.addr, align 8 - store <1 x i64> %0, <1 x i64>* %__a, align 8 - %1 = load <1 x i64>, <1 x i64>* %__a, align 8 - %2 = bitcast <1 x i64> %1 to <8 x i8> - %3 = bitcast <8 x i8> %2 to <1 x i64> - %vshl_n = shl <1 x i64> %3, - store <1 x i64> %vshl_n, <1 x i64>* %tmp - %4 = load <1 x i64>, <1 x i64>* %tmp - ret <1 x i64> %4 -; CHECK-LABEL: @test8( -; CHECK-NOT: alloca -; CHECK-NOT: insertelement -; CHECK: ret <1 x i64> -} diff --git a/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll b/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll deleted file mode 100644 index 154883122df..00000000000 --- a/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt -scalarrepl -S < %s | FileCheck %s -; rdar://9786827 - -; SROA should be able to handle the mixed types and eliminate the allocas here. - -; TODO: Currently it does this by falling back to integer "bags of bits". -; With enough cleverness, it should be possible to convert between <3 x i32> -; and <2 x i64> by using a combination of a bitcast and a shuffle. - -; CHECK: { -; CHECK-NOT: alloca -; CHECK: } - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin11.0.0" - -define <2 x i64> @foo() nounwind { -entry: - %retval = alloca <3 x i32>, align 16 - %z = alloca <4 x i32>, align 16 - %tmp = load <4 x i32>, <4 x i32>* %z - %tmp1 = shufflevector <4 x i32> %tmp, <4 x i32> undef, <3 x i32> - store <3 x i32> %tmp1, <3 x i32>* %retval - %0 = bitcast <3 x i32>* %retval to <2 x i64>* - %1 = load <2 x i64>, <2 x i64>* %0, align 1 - ret <2 x i64> %1 -} diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll deleted file mode 100644 index 2a600b3b279..00000000000 --- a/test/Transforms/ScalarRepl/volatile.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s - -define i32 @voltest(i32 %T) { - %A = alloca {i32, i32} - %B = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 0 - store volatile i32 %T, i32* %B -; CHECK: store volatile - - %C = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 1 - %X = load volatile i32, i32* %C -; CHECK: load volatile - ret i32 %X -} diff --git a/utils/findoptdiff b/utils/findoptdiff index 7a2eab05d71..9a880318438 100755 --- a/utils/findoptdiff +++ b/utils/findoptdiff @@ -70,7 +70,7 @@ dis2="$llvm2/Debug/bin/llvm-dis" opt1="$llvm1/Debug/bin/opt" opt2="$llvm2/Debug/bin/opt" -all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -scalarrepl -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -scalarrepl -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify" +all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -sroa -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -sroa -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify" #counter=0 function tryit {