Remove the ScalarReplAggregates pass

Nearly all the changes to this pass have been done while maintaining and updating other parts of LLVM. LLVM has had another pass, SROA, which has superseded ScalarReplAggregates for quite some time. Differential Revision: http://reviews.llvm.org/D21316 llvm-svn: 272737
2025-01-31 12:41:49 +01:00 · 2016-06-15 00:19:09 +00:00 · 2016-06-15 00:19:09 +00:00 · c6df3d773b
commit c6df3d773b
parent 901a8186f9
83 changed files with 30 additions and 5114 deletions
--- a/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli
+++ b/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli
@ -127,17 +127,17 @@ external add_sccp
  : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
  = "llvm_add_sccp"

-(** See the [llvm::createScalarReplAggregatesPass] function. *)
+(** See the [llvm::createSROAPass] function. *)
 external add_scalar_repl_aggregation
  : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
  = "llvm_add_scalar_repl_aggregates"

-(** See the [llvm::createScalarReplAggregatesPassSSA] function. *)
+(** See the [llvm::createSROAPass] function. *)
 external add_scalar_repl_aggregation_ssa
  : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
  = "llvm_add_scalar_repl_aggregates_ssa"

-(** See the [llvm::createScalarReplAggregatesWithThreshold] function. *)
+(** See the [llvm::createSROAPass] function. *)
 external add_scalar_repl_aggregation_with_threshold
  : int -> [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit
  = "llvm_add_scalar_repl_aggregates_with_threshold"
--- a/docs/Atomics.rst
+++ b/docs/Atomics.rst
@ -398,7 +398,7 @@ operations:
  MemoryDependencyAnalysis (which is also used by other passes like GVN).

 * Folding a load: Any atomic load from a constant global can be constant-folded,
-  because it cannot be observed.  Similar reasoning allows scalarrepl with
+  because it cannot be observed.  Similar reasoning allows sroa with
  atomic loads and stores.

 Atomics and Codegen
--- a/docs/Passes.rst
+++ b/docs/Passes.rst
@ -947,7 +947,7 @@ that this should make CFG hacking much easier.  To make later hacking easier,
 the entry block is split into two, such that all introduced ``alloca``
 instructions (and nothing else) are in the entry block.

-``-scalarrepl``: Scalar Replacement of Aggregates (DT)
+``-sroa``: Scalar Replacement of Aggregates
 ------------------------------------------------------

 The well-known scalar replacement of aggregates transformation.  This transform
@ -956,12 +956,6 @@ individual ``alloca`` instructions for each member if possible.  Then, if
 possible, it transforms the individual ``alloca`` instructions into nice clean
 scalar SSA form.

-This combines a simple scalar replacement of aggregates algorithm with the
-:ref:`mem2reg <passes-mem2reg>` algorithm because they often interact,
-especially for C++ programs.  As such, iterating between ``scalarrepl``, then
-:ref:`mem2reg <passes-mem2reg>` until we run out of things to promote works
-well.
-
 .. _passes-sccp:

 ``-sccp``: Sparse Conditional Constant Propagation
--- a/docs/tutorial/LangImpl7.rst
+++ b/docs/tutorial/LangImpl7.rst
@ -224,7 +224,7 @@ variables in certain circumstances:
   class <../LangRef.html#first-class-types>`_ values (such as pointers,
   scalars and vectors), and only if the array size of the allocation is
   1 (or missing in the .ll file). mem2reg is not capable of promoting
-   structs or arrays to registers. Note that the "scalarrepl" pass is
+   structs or arrays to registers. Note that the "sroa" pass is
   more powerful and can promote structs, "unions", and arrays in many
   cases.

--- a/docs/tutorial/OCamlLangImpl7.rst
+++ b/docs/tutorial/OCamlLangImpl7.rst
@ -224,7 +224,7 @@ variables in certain circumstances:
   class <../LangRef.html#first-class-types>`_ values (such as pointers,
   scalars and vectors), and only if the array size of the allocation is
   1 (or missing in the .ll file). mem2reg is not capable of promoting
-   structs or arrays to registers. Note that the "scalarrepl" pass is
+   structs or arrays to registers. Note that the "sroa" pass is
   more powerful and can promote structs, "unions", and arrays in many
   cases.

--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@ -104,13 +104,13 @@ void LLVMAddReassociatePass(LLVMPassManagerRef PM);
 /** See llvm::createSCCPPass function. */
 void LLVMAddSCCPPass(LLVMPassManagerRef PM);

-/** See llvm::createScalarReplAggregatesPass function. */
+/** See llvm::createSROAPass function. */
 void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM);

-/** See llvm::createScalarReplAggregatesPass function. */
+/** See llvm::createSROAPass function. */
 void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM);

-/** See llvm::createScalarReplAggregatesPass function. */
+/** See llvm::createSROAPass function. */
 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
                                                  int Threshold);

--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@ -286,8 +286,6 @@ void initializeSCCPLegacyPassPass(PassRegistry &);
 void initializeSCEVAAWrapperPassPass(PassRegistry&);
 void initializeSLPVectorizerPass(PassRegistry&);
 void initializeSROALegacyPassPass(PassRegistry&);
-void initializeSROA_DTPass(PassRegistry&);
-void initializeSROA_SSAUpPass(PassRegistry&);
 void initializeSafeStackPass(PassRegistry&);
 void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
 void initializeSanitizerCoverageModulePass(PassRegistry&);
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@ -145,7 +145,7 @@ namespace {
      (void) llvm::createRegionViewerPass();
      (void) llvm::createSCCPPass();
      (void) llvm::createSafeStackPass();
-      (void) llvm::createScalarReplAggregatesPass();
+      (void) llvm::createSROAPass();
      (void) llvm::createSingleLoopExtractorPass();
      (void) llvm::createStripSymbolsPass();
      (void) llvm::createStripNonDebugSymbolsPass();
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@ -104,17 +104,6 @@ FunctionPass *createBitTrackingDCEPass();
 //
 FunctionPass *createSROAPass();

-//===----------------------------------------------------------------------===//
-//
-// ScalarReplAggregates - Break up alloca's of aggregates into multiple allocas
-// if possible.
-//
-FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1,
-                                             bool UseDomTree = true,
-                                             signed StructMemberThreshold = -1,
-                                             signed ArrayElementThreshold = -1,
-                                             signed ScalarLoadThreshold = -1);
-
 //===----------------------------------------------------------------------===//
 //
 // InductiveRangeCheckElimination - Transform loops to elide range checks on
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@ -119,8 +119,6 @@ void LTOCodeGenerator::initializeLTOPasses() {
  initializeArgPromotionPass(R);
  initializeJumpThreadingPass(R);
  initializeSROALegacyPassPass(R);
-  initializeSROA_DTPass(R);
-  initializeSROA_SSAUpPass(R);
  initializePostOrderFunctionAttrsLegacyPassPass(R);
  initializeReversePostOrderFunctionAttrsLegacyPassPass(R);
  initializeGlobalsAAWrapperPassPass(R);
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@ -2081,7 +2081,7 @@ struct x testfunc() {
 }

 We currently compile this to:
-$ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S
+$ clang t.c -S -o - -O0 -emit-llvm | opt -sroa -S


 %struct.x = type { i8, [4 x i32] }
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@ -170,7 +170,7 @@ generated for it.  The primary issue with the result is that it doesn't do any
 of the optimizations which are possible if we know the address of a va_list
 in the current function is never taken:
 1. We shouldn't spill the XMM registers because we only call va_arg with "int".
-2. It would be nice if we could scalarrepl the va_list.
+2. It would be nice if we could sroa the va_list.
 3. Probably overkill, but it'd be cool if we could peel off the first five
 iterations of the loop.

--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@ -307,7 +307,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
        }

        // Safe to transform, don't even bother trying to "promote" it.
-        // Passing the elements as a scalar will allow scalarrepl to hack on
+        // Passing the elements as a scalar will allow sroa to hack on
        // the new alloca we introduce.
        if (AllSimple) {
          ByValArgsToTransform.insert(PtrArg);
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@ -61,10 +61,6 @@ static cl::opt<bool> ExtraVectorizerPasses(
    "extra-vectorizer-passes", cl::init(false), cl::Hidden,
    cl::desc("Run cleanup optimization passes after vectorization."));

-static cl::opt<bool> UseNewSROA("use-new-sroa",
-  cl::init(true), cl::Hidden,
-  cl::desc("Enable the new, experimental SROA pass"));
-
 static cl::opt<bool>
 RunLoopRerolling("reroll-loops", cl::Hidden,
                 cl::desc("Run the loop rerolling pass"));
@ -201,10 +197,7 @@ void PassManagerBuilder::populateFunctionPassManager(
  addInitialAliasAnalysisPasses(FPM);

  FPM.add(createCFGSimplificationPass());
-  if (UseNewSROA)
-    FPM.add(createSROAPass());
-  else
-    FPM.add(createScalarReplAggregatesPass());
+  FPM.add(createSROAPass());
  FPM.add(createEarlyCSEPass());
  FPM.add(createLowerExpectIntrinsicPass());
 }
@ -225,10 +218,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
    legacy::PassManagerBase &MPM) {
  // Start of function pass.
  // Break up aggregate allocas, using SSAUpdater.
-  if (UseNewSROA)
-    MPM.add(createSROAPass());
-  else
-    MPM.add(createScalarReplAggregatesPass(-1, false));
+  MPM.add(createSROAPass());
  MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
  // Speculative execution if the target has divergent branches; otherwise nop.
  MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
@ -654,10 +644,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
  PM.add(createJumpThreadingPass());

  // Break up allocas
-  if (UseNewSROA)
-    PM.add(createSROAPass());
-  else
-    PM.add(createScalarReplAggregatesPass());
+  PM.add(createSROAPass());

  // Run a few AA driven optimizations here and now, to cleanup the code.
  PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@ -45,7 +45,6 @@ add_llvm_library(LLVMScalarOpts
  SCCP.cpp
  SROA.cpp
  Scalar.cpp
-  ScalarReplAggregates.cpp
  Scalarizer.cpp
  SeparateConstOffsetFromGEP.cpp
  SimplifyCFGPass.cpp
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@ -74,8 +74,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
  initializeSCCPLegacyPassPass(Registry);
  initializeIPSCCPLegacyPassPass(Registry);
  initializeSROALegacyPassPass(Registry);
-  initializeSROA_DTPass(Registry);
-  initializeSROA_SSAUpPass(Registry);
  initializeCFGSimplifyPassPass(Registry);
  initializeStructurizeCFGPass(Registry);
  initializeSinkingLegacyPassPass(Registry);
@ -198,16 +196,16 @@ void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
 }

 void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createScalarReplAggregatesPass());
+  unwrap(PM)->add(createSROAPass());
 }

 void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+  unwrap(PM)->add(createSROAPass());
 }

 void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
                                                  int Threshold) {
-  unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+  unwrap(PM)->add(createSROAPass());
 }

 void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: opt < %s -scalarrepl -instcombine | \
+; RUN: opt < %s -sroa -instcombine | \
 ; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp

 ; This checks that various insert/extract idiom work without going to the
--- a/test/Transforms/ArgumentPromotion/inalloca.ll
+++ b/test/Transforms/ArgumentPromotion/inalloca.ll
@ -1,10 +1,10 @@
-; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s
+; RUN: opt %s -argpromotion -sroa -S | FileCheck %s

 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

 %struct.ss = type { i32, i32 }

-; Argpromote + scalarrepl should change this to passing the two integers by value.
+; Argpromote + sroa should change this to passing the two integers by value.
 define internal i32 @f(%struct.ss* inalloca  %s) {
 entry:
  %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -inline -sroa -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

 define i32 @test1f(i32 %i) {
--- a/test/Transforms/Inline/crash2.ll
+++ b/test/Transforms/Inline/crash2.ll
@ -1,4 +1,4 @@
-; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s
+; RUN: opt  -inline -sroa -max-cg-scc-iterations=1 -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"

--- a/test/Transforms/Inline/devirtualize-3.ll
+++ b/test/Transforms/Inline/devirtualize-3.ll
@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s
+; RUN: opt -basicaa -inline -S -sroa -gvn -instcombine < %s | FileCheck %s
 ; PR5009

 ; CHECK: define i32 @main() 
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -scalarrepl -S | not grep " = alloca"
+; RUN: opt < %s -instcombine -sroa -S | not grep " = alloca"
 ; rdar://6417724
 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.

--- a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
+++ b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
+; RUN: opt < %s -sroa -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info

 define void @inflate() {
 entry:
--- a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
+++ b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl-ssa -loop-unswitch -disable-output
+; RUN: opt < %s -sroa -loop-unswitch -disable-output
 ; PR11016
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.2"
--- a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
+++ b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
@ -1,13 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-; Test that an array is not incorrectly deconstructed.
-
-define i32 @test() nounwind {
-	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
-	%Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0		; <i32*> [#uses=1]
-        ; Must preserve arrayness!
-	%Z = getelementptr i32, i32* %Y, i64 1		; <i32*> [#uses=1]
-	%A = load i32, i32* %Z		; <i32> [#uses=1]
-	ret i32 %A
-}
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@ -1,12 +0,0 @@
-; Scalar replacement was incorrectly promoting this alloca!!
-;
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-define i8* @test() {
-	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
-	%B = getelementptr [30 x i8], [30 x i8]* %A, i64 0, i64 0		; <i8*> [#uses=2]
-	%C = getelementptr i8, i8* %B, i64 1		; <i8*> [#uses=1]
-	store i8 0, i8* %B
-	ret i8* %C
-}
-; CHECK: alloca [
--- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
+++ b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep "alloca %%T"
-
-%T = type { [80 x i8], i32, i32 }
-declare i32 @.callback_1(i8*)
-
-declare void @.iter_2(i32 (i8*)*, i8*)
-
-define i32 @main() {
-	%d = alloca %T		; <{ [80 x i8], i32, i32 }*> [#uses=2]
-	%tmp.0 = getelementptr %T, %T* %d, i64 0, i32 2		; <i32*> [#uses=1]
-	store i32 0, i32* %tmp.0
-	%tmp.1 = getelementptr %T, %T* %d, i64 0, i32 0, i64 0		; <i8*> [#uses=1]
-	call void @.iter_2( i32 (i8*)* @.callback_1, i8* %tmp.1 )
-	ret i32 0
-}
-
--- a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
+++ b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
-	%vsiidx = alloca [2 x <4 x i32>], align 16		; <[2 x <4 x i32>]*> [#uses=3]
-	%tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 )		; <<4 x i32>> [#uses=2]
-	%tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64>		; <<2 x i64>> [#uses=0]
-	%tmp.upgrd.2 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0		; <<4 x i32>*> [#uses=1]
-	store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
-	%tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 )		; <<4 x i32>> [#uses=2]
-	%tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=0]
-	%tmp14 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 1		; <<4 x i32>*> [#uses=1]
-	store <4 x i32> %tmp10, <4 x i32>* %tmp14
-	%tmp15 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp.upgrd.4 = load i32, i32* %tmp15		; <i32> [#uses=1]
-	ret i32 %tmp.upgrd.4
-}
-
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
-
--- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@ -1,24 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep memcpy
-; PR1421
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
-
-%struct.LongestMember = type { i8, i32 }
-%struct.MyString = type { i32 }
-%struct.UnionType = type { %struct.LongestMember }
-
-define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
-entry:
-  %tmp = alloca %struct.UnionType, align 8
-  %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
-  %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
-  %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion
-  %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
-  %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
+++ b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
@ -1,36 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep shr
-
-; FIXME: I think this test is no longer valid.
-; It was working because SROA was aborting when
-; no datalayout was supplied
-; XFAIL: *
-
-
-%struct.S = type { i16 }
-
-define zeroext i1 @f(i16 signext  %b)   {
-entry:
-	%b_addr = alloca i16		; <i16*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
-	%tmp = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i16 %b, i16* %b_addr
-	%tmp1 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
-	%tmp2 = load i16, i16* %b_addr, align 2		; <i16> [#uses=1]
-	store i16 %tmp2, i16* %tmp1, align 2
-	%tmp3 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
-	%tmp34 = bitcast i16* %tmp3 to [2 x i1]*		; <[2 x i1]*> [#uses=1]
-	%tmp5 = getelementptr [2 x i1], [2 x i1]* %tmp34, i32 0, i32 1		; <i1*> [#uses=1]
-	%tmp6 = load i1, i1* %tmp5, align 1		; <i1> [#uses=1]
-	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
-	store i32 %tmp67, i32* %tmp, align 4
-	%tmp8 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
-	store i32 %tmp8, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval9 = load i32, i32* %retval		; <i32> [#uses=1]
-	%retval910 = trunc i32 %retval9 to i1		; <i1> [#uses=1]
-	ret i1 %retval910
-}
--- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
+++ b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
@ -1,21 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i8 17"
-; rdar://5707076
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin9.1.0"
-	%struct.T = type <{ i8, [3 x i8] }>
-
-define i8 @f() {
-entry:
-	%s = alloca [1 x %struct.T], align 4		; <[1 x %struct.T]*> [#uses=2]
-	%T3 = bitcast [1 x %struct.T]* %s to i32*
-	store i32 -61184, i32* %T3
-
-	%tmp16 = getelementptr [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0		; <%struct.T*> [#uses=1]
-	%tmp17 = getelementptr %struct.T, %struct.T* %tmp16, i32 0, i32 1		; <[3 x i8]*> [#uses=1]
-	%tmp1718 = bitcast [3 x i8]* %tmp17 to i32*		; <i32*> [#uses=1]
-	%tmp19 = load i32, i32* %tmp1718, align 4		; <i32> [#uses=1]
-	%mask = and i32 %tmp19, 16777215		; <i32> [#uses=2]
-	%mask2324 = trunc i32 %mask to i8		; <i8> [#uses=1]
-	ret i8 %mask2324
-}
-
--- a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
+++ b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-	%struct..0anon = type { <1 x i64> }
-
-define i32 @main(i32 %argc, i8** %argv) {
-entry:
-	%c = alloca %struct..0anon		; <%struct..0anon*> [#uses=2]
-	%tmp2 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
-	store <1 x i64> zeroinitializer, <1 x i64>* %tmp2, align 8
-	%tmp7 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
-	%tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]*		; <[2 x i32]*> [#uses=1]
-	%tmp9 = getelementptr [2 x i32], [2 x i32]* %tmp78, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp10 = load i32, i32* %tmp9, align 4		; <i32> [#uses=0]
-	unreachable
-}
--- a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
+++ b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
@ -1,33 +0,0 @@
-; This test shows an alloca of a struct and an array that can be reduced to
-; multiple variables easily. However, the alloca is used by a store
-; instruction, which was not possible before aggregrates were first class
-; values. This checks of scalarrepl splits up the struct and array properly.
-
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @foo() {
-	%target = alloca { i32, i32 }		; <{ i32, i32 }*> [#uses=1]
-        ; Build a first class struct to store
-	%res1 = insertvalue { i32, i32 } undef, i32 1, 0		; <{ i32, i32 }> [#uses=1]
-	%res2 = insertvalue { i32, i32 } %res1, i32 2, 1		; <{ i32, i32 }> [#uses=1]
-        ; And store it
-	store { i32, i32 } %res2, { i32, i32 }* %target
-        ; Actually use %target, so it doesn't get removed altogether
-        %ptr = getelementptr { i32, i32 }, { i32, i32 }* %target, i32 0, i32 0
-        %val = load i32, i32* %ptr
-	ret i32 %val
-}
-
-define i32 @bar() {
-	%target = alloca [ 2 x i32 ]		; <{ i32, i32 }*> [#uses=1]
-        ; Build a first class array to store
-	%res1 = insertvalue [ 2 x i32 ] undef, i32 1, 0		; <{ i32, i32 }> [#uses=1]
-	%res2 = insertvalue [ 2 x i32 ] %res1, i32 2, 1		; <{ i32, i32 }> [#uses=1]
-        ; And store it
-	store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target
-        ; Actually use %target, so it doesn't get removed altogether
-        %ptr = getelementptr [ 2 x i32 ], [ 2 x i32 ]* %target, i32 0, i32 0
-        %val = load i32, i32* %ptr
-	ret i32 %val
-}
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@ -1,17 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep "call.*mem" 
-; PR2369
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-define void @memtest1(i8* %dst, i8* %src) nounwind {
-entry:
-  %temp = alloca [200 x i8]
-  %temp1 = bitcast [200 x i8]* %temp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
-  %temp3 = bitcast [200 x i8]* %temp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@ -1,23 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep "s = alloca .struct.x"
-; PR2423
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-%struct.x = type { [1 x i32], i32, i32 }
-
-define i32 @b() nounwind {
-entry:
-  %s = alloca %struct.x
-  %r = alloca %struct.x
-  %0 = call i32 @a(%struct.x* %s) nounwind
-  %r1 = bitcast %struct.x* %r to i8*
-  %s2 = bitcast %struct.x* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
-  %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1
-  %2 = load i32, i32* %1, align 4
-  ret i32 %2
-}
-
-declare i32 @a(%struct.x*)
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@ -1,25 +0,0 @@
-; This test checks to see if scalarrepl also works when a gep with all zeroes is
-; used instead of a bitcast to prepare a memmove pointer argument. Previously,
-; this would not work when there was a vector involved in the struct, preventing
-; scalarrepl from removing the alloca below.
-
-; RUN: opt < %s -scalarrepl -S > %t
-; RUN: cat %t | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-%struct.two = type <{ < 2 x i8 >, i16 }>
-
-define void @main(%struct.two* %D, i16 %V) {
-entry:
-	%S = alloca %struct.two
-        %S.2 = getelementptr %struct.two, %struct.two* %S, i32 0, i32 1
-        store i16 %V, i16* %S.2
-        ; This gep is effectively a bitcast to i8*, but is sometimes generated
-        ; because the type of the first element in %struct.two is i8.
-	%tmpS = getelementptr %struct.two, %struct.two* %S, i32 0, i32 0, i32 0 
-	%tmpD = bitcast %struct.two* %D to i8*
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
-        ret void
-}
-
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
+++ b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i32 %x"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-%pair = type { [1 x i32], i32 }
-
-define i32 @f(i32 %x, i32 %y) {
-       %instance = alloca %pair
-       %first = getelementptr %pair, %pair* %instance, i32 0, i32 0
-       %cast = bitcast [1 x i32]* %first to i32*
-       store i32 %x, i32* %cast
-       %second = getelementptr %pair, %pair* %instance, i32 0, i32 1
-       store i32 %y, i32* %second
-       %v = load i32, i32* %cast
-       ret i32 %v
-}
--- a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
+++ b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep "ret i32 42"
-; PR3489
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "x86_64-apple-darwin10.0"
-	%struct.anon = type <{ i32, i32, i32 }>
-
-define i32 @f({ i64, i64 }) nounwind {
-entry:
-	%tmp = alloca { i64, i64 }, align 8		; <{ i64, i64 }*> [#uses=2]
-	store { i64, i64 } %0, { i64, i64 }* %tmp
-	%1 = bitcast { i64, i64 }* %tmp to %struct.anon*		; <%struct.anon*> [#uses=1]
-	%2 = load %struct.anon, %struct.anon* %1, align 8		; <%struct.anon> [#uses=1]
-        %tmp3 = extractvalue %struct.anon %2, 0
-	ret i32 %tmp3
-}
-
-define i32 @g() {
-  %a = call i32 @f({i64,i64} { i64 42, i64 1123123123123123 })
-  ret i32 %a
-}
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@ -1,19 +0,0 @@
-; The store into %p should end up with a known alignment of 1, since the memcpy
-; is only known to access it with 1-byte alignment.
-; RUN: opt < %s -scalarrepl -S | grep "store i16 1, .*, align 1"
-; PR3720
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-        %struct.st = type { i16 }
-
-define void @f(i8* %p) nounwind {
-entry:
-        %s = alloca %struct.st, align 4  ; <%struct.st*> [#uses=2]
-        %0 = getelementptr %struct.st, %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
-        store i16 1, i16* %0, align 4
-        %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
-        ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@ -1,90 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; Radar 7441282
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
-
-%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
-%struct.int16x8_t = type { <8 x i16> }
-%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
-%union..0anon = type { %struct.int16x8x2_t }
-
-define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
-; CHECK-LABEL: @test(
-; CHECK-NOT: alloca
-; CHECK: "alloca point"
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-
-entry:
-  %tmp_addr = alloca %struct.int16x8_t
-  %dst_addr = alloca %struct.int16x8x2_t*
-  %__rv = alloca %union..0anon
-  %__bx = alloca %struct.int16x8_t
-  %__ax = alloca %struct.int16x8_t
-  %tmp2 = alloca %struct.int16x8x2_t
-  %0 = alloca %struct.int16x8x2_t
-  %"alloca point" = bitcast i32 0 to i32
-  %1 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
-  store <8 x i16> %tmp.0, <8 x i16>* %1
-  store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
-  %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
-  %3 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
-  %4 = load <8 x i16>, <8 x i16>* %3, align 16
-  store <8 x i16> %4, <8 x i16>* %2, align 16
-  %5 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
-  %6 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
-  %7 = load <8 x i16>, <8 x i16>* %6, align 16
-  store <8 x i16> %7, <8 x i16>* %5, align 16
-  %8 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
-  %9 = load <8 x i16>, <8 x i16>* %8, align 16
-  %10 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
-  %11 = load <8 x i16>, <8 x i16>* %10, align 16
-  %12 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
-  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
-  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 0
-  store <8 x i16> %14, <8 x i16>* %15
-  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 1
-  store <8 x i16> %16, <8 x i16>* %17
-  %18 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
-  %19 = bitcast %struct.int16x8x2_t* %0 to i8*
-  %20 = bitcast %struct.int16x8x2_t* %18 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
-  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
-  %21 = bitcast %struct.int16x8x2_t* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
-  %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4
-  %23 = bitcast %struct.int16x8x2_t* %22 to i8*
-  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-; Radar 7466574
-%struct._NSRange = type { i64 }
-
-define void @test_memcpy_self() nounwind {
-entry:
-  %range = alloca %struct._NSRange
-  br i1 undef, label %cond.true, label %cond.false
-
-cond.true:                                        ; preds = %entry
-  %tmp3 = bitcast %struct._NSRange* %range to i8*
-  %tmp4 = bitcast %struct._NSRange* %range to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
-  ret void
-
-cond.false:                                       ; preds = %entry
-  ret void
-
-; CHECK-LABEL: @test_memcpy_self(
-; CHECK-NOT: alloca
-; CHECK: br i1
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@ -1,18 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; Radar 7552893
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
-%struct.test = type { [3 x double] }
-
-define void @test_memcpy_self() nounwind {
-; CHECK-LABEL: @test_memcpy_self(
-; CHECK-NOT: alloca
-; CHECK: ret void
-  %1 = alloca %struct.test
-  %2 = bitcast %struct.test* %1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
+++ b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
@ -1,26 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR9820
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-@func_1.l_10 = internal unnamed_addr constant [4 x i32] [i32 1, i32 0, i32 0, i32 0], align 16
-
-define i32* @noop(i32* %p_29) nounwind readnone {
-entry:
-  ret i32* %p_29
-}
-
-define i32 @main() nounwind {
-entry:
-  %l_10 = alloca [4 x i32], align 16
-  %tmp = bitcast [4 x i32]* %l_10 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false)
-; CHECK: call void @llvm.memcpy
-  %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %l_10, i64 0, i64 0
-  %call = call i32* @noop(i32* %arrayidx)
-  store i32 0, i32* %call
-  ret i32 0
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
+++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
@ -1,75 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-; RUN: opt < %s -S -scalarrepl-ssa | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.7.0"
-
-%0 = type { <2 x float>, float }
-%struct.PointC3 = type { %struct.array }
-%struct.Point_3 = type { %struct.PointC3 }
-%struct.array = type { [3 x float], [4 x i8] }
-
-; CHECK: main
-; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer, i32 0
-
-define void @main() uwtable ssp {
-entry:
-  %ref.tmp2 = alloca %0, align 16
-  %tmpcast = bitcast %0* %ref.tmp2 to %struct.Point_3*
-  %0 = getelementptr %0, %0* %ref.tmp2, i64 0, i32 0
-  store <2 x float> zeroinitializer, <2 x float>* %0, align 16
-  %1 = getelementptr inbounds %struct.Point_3, %struct.Point_3* %tmpcast, i64 0, i32 0
-  %base.i.i.i = getelementptr inbounds %struct.PointC3, %struct.PointC3* %1, i64 0, i32 0
-  %arrayidx.i.i.i.i = getelementptr inbounds %struct.array, %struct.array* %base.i.i.i, i64 0, i32 0, i64 0
-  %tmp5.i.i = load float, float* %arrayidx.i.i.i.i, align 4
-  ret void
-}
-
-; CHECK: test1
-; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer, i32 0
-
-define void @test1() uwtable ssp {
-entry:
-  %ref.tmp2 = alloca {<2 x float>, float}, align 16
-  %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
-  %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
-  store <2 x float> zeroinitializer, <2 x float>* %0, align 16
-  %tmp5.i.i = load float, float* %tmpcast, align 4
-  ret void
-}
-
-; CHECK: test2
-; CHECK-NOT: alloca
-; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> zeroinitializer, i32 0
-; CHECK: fadd float %[[A]], 1.000000e+00
-; CHECK-NOT: insertelement
-; CHECK-NOT: extractelement
-
-define float @test2() uwtable ssp {
-entry:
-  %ref.tmp2 = alloca {<2 x float>, float}, align 16
-  %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
-  %tmpcast2 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 1
-  %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
-  store <2 x float> zeroinitializer, <2 x float>* %0, align 16
-  store float 1.0, float* %tmpcast2, align 4
-  %r1 = load float, float* %tmpcast, align 4
-  %r2 = load float, float* %tmpcast2, align 4
-  %r = fadd float %r1, %r2
-  ret float %r
-}
-
-; CHECK: test3
-; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> <float 2.000000e+00, float 3.000000e+00>, i32 1
-; CHECK: ret float %[[A]]
-
-define float @test3() {
-entry:
-  %ai = alloca { <2 x float>, <2 x float> }, align 8
-  store { <2 x float>, <2 x float> } {<2 x float> <float 0.0, float 1.0>, <2 x float> <float 2.0, float 3.0>}, { <2 x float>, <2 x float> }* %ai, align 8
-  %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]*
-  %arrayidx = getelementptr inbounds [4 x float], [4 x float]* %tmpcast, i64 0, i64 3
-  %f = load float, float* %arrayidx, align 4
-  ret float %f
-}
--- a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
+++ b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
@ -1,37 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
-
-; CHECK: f
-; CHECK-NOT: alloca
-; CHECK: %[[A:[a-z0-9]*]] = and i128 undef, -16777216
-; CHECK: %[[B:[a-z0-9]*]] = bitcast i128 %[[A]] to <4 x float>
-; CHECK: %[[C:[a-z0-9]*]] = extractelement <4 x float> %[[B]], i32 0
-; CHECK: ret float %[[C]]
-
-define float @f() nounwind ssp {
-entry:
-  %a = alloca <4 x float>, align 16
-  %p = bitcast <4 x float>* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
-  %vec = load <4 x float>, <4 x float>* %a, align 8
-  %val = extractelement <4 x float> %vec, i32 0
-  ret float %val
-}
-
-; CHECK: g
-; CHECK-NOT: alloca
-; CHECK: and i128
-
-define void @g() nounwind ssp {
-entry:
-  %a = alloca { <4 x float> }, align 16
-  %p = bitcast { <4 x float> }* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
-  %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
-  %arrayidx = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* %q, i32 0, i32 0
-  store <2 x float> undef, <2 x float>* %arrayidx, align 8
-  ret void
-}
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
+++ b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
@ -1,40 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; PR10987
-
-; Make sure scalarrepl doesn't move a load across an invoke which could
-; modify the loaded value.
-; (The PHI could theoretically be transformed by splitting the critical
-; edge, but scalarrepl doesn't modify the CFG, at least at the moment.)
-
-declare void @extern_fn(i32*)
-declare i32 @extern_fn2(i32)
-declare i32 @__gcc_personality_v0(i32, i64, i8*, i8*)
-
-define void @odd_fn(i1) noinline personality i32 (i32, i64, i8*, i8*)* @__gcc_personality_v0 {
-  %retptr1 = alloca i32
-  %retptr2 = alloca i32
-  br i1 %0, label %then, label %else
-
-then:                                             ; preds = %2
-  invoke void @extern_fn(i32* %retptr1)
-          to label %join unwind label %unwind
-
-else:                                             ; preds = %2
-  store i32 3, i32* %retptr2
-  br label %join
-
-join:                                             ; preds = %then, %else
-  %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
-  %storemerge = load i32, i32* %storemerge.in
-  %x3 = call i32 @extern_fn2(i32 %storemerge)
-  ret void
-
-unwind:                                           ; preds = %then
-  %info = landingpad { i8*, i32 }
-          cleanup
-  call void @extern_fn(i32* null)
-  unreachable
-}
-
-; CHECK-LABEL: define void @odd_fn(
-; CHECK: %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
--- a/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
+++ b/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
@ -1,22 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin11.0.1"
-
-; CHECK: test
-; CHECK-NOT: alloca
-
-define void @test() nounwind {
-entry:
-  %a156286 = alloca [4 x <4 x float>], align 16
-  br i1 undef, label %cif_done, label %for_test158.preheader
-
-for_test158.preheader:                            ; preds = %entry
-  %a156286305 = bitcast [4 x <4 x float>]* %a156286 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i32 16, i1 false)
-  unreachable
-
-cif_done:                                         ; preds = %entry
-  ret void
-}
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
+++ b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
@ -1,19 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-target triple = "thumbv7-apple-ios5.0.0"
-
-%union.anon = type { <4 x float> }
-
-; CHECK-LABEL: @test(
-; CHECK-NOT: alloca
-
-define void @test() nounwind {
-entry:
-  %u = alloca %union.anon, align 16
-  %u164 = bitcast %union.anon* %u to [4 x i32]*
-  %arrayidx165 = getelementptr inbounds [4 x i32], [4 x i32]* %u164, i32 0, i32 0
-  store i32 undef, i32* %arrayidx165, align 4
-  %v186 = bitcast %union.anon* %u to <4 x float>*
-  store <4 x float> undef, <4 x float>* %v186, align 16
-  ret void
-}
--- a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
+++ b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
@ -1,26 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct.S = type { [2 x %struct.anon], double }
-%struct.anon = type {}
-
-; CHECK: @test()
-; CHECK-NOT: alloca
-; CHECK: ret double 1.0
-
-define double @test() nounwind uwtable ssp {
-entry:
-  %retval = alloca %struct.S, align 8
-  %ret = alloca %struct.S, align 8
-  %b = getelementptr inbounds %struct.S, %struct.S* %ret, i32 0, i32 1
-  store double 1.000000e+00, double* %b, align 8
-  %0 = bitcast %struct.S* %retval to i8*
-  %1 = bitcast %struct.S* %ret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
-  %2 = bitcast %struct.S* %retval to double*
-  %3 = load double, double* %2, align 1
-  ret double %3
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/AggregatePromote.ll
+++ b/test/Transforms/ScalarRepl/AggregatePromote.ll
@ -1,51 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.0.0"
-
-define i64 @test1(i64 %X) {
-	%A = alloca i64		; <i64*> [#uses=3]
-	store i64 %X, i64* %A
-	%B = bitcast i64* %A to i32*		; <i32*> [#uses=1]
-	%C = bitcast i32* %B to i8*		; <i8*> [#uses=1]
-	store i8 0, i8* %C
-	%Y = load i64, i64* %A		; <i64> [#uses=1]
-	ret i64 %Y
-}
-
-define i8 @test2(i64 %X) {
-	%X_addr = alloca i64		; <i64*> [#uses=2]
-	store i64 %X, i64* %X_addr
-	%tmp.0 = bitcast i64* %X_addr to i32*		; <i32*> [#uses=1]
-	%tmp.1 = getelementptr i32, i32* %tmp.0, i32 1		; <i32*> [#uses=1]
-	%tmp.2 = bitcast i32* %tmp.1 to i8*		; <i8*> [#uses=1]
-	%tmp.3 = getelementptr i8, i8* %tmp.2, i32 3		; <i8*> [#uses=1]
-	%tmp.2.upgrd.1 = load i8, i8* %tmp.3		; <i8> [#uses=1]
-	ret i8 %tmp.2.upgrd.1
-}
-
-define i16 @crafty(i64 %X) {
-	%a = alloca { i64 }		; <{ i64 }*> [#uses=2]
-	%tmp.0 = getelementptr { i64 }, { i64 }* %a, i32 0, i32 0		; <i64*> [#uses=1]
-	store i64 %X, i64* %tmp.0
-	%tmp.3 = bitcast { i64 }* %a to [4 x i16]*		; <[4 x i16]*> [#uses=2]
-	%tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3		; <i16*> [#uses=1]
-	%tmp.5 = load i16, i16* %tmp.4		; <i16> [#uses=1]
-	%tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2		; <i16*> [#uses=1]
-	%tmp.9 = load i16, i16* %tmp.8		; <i16> [#uses=1]
-	%tmp.10 = or i16 %tmp.9, %tmp.5		; <i16> [#uses=1]
-	ret i16 %tmp.10
-}
-
-define i16 @crafty2(i64 %X) {
-	%a = alloca i64		; <i64*> [#uses=2]
-	store i64 %X, i64* %a
-	%tmp.3 = bitcast i64* %a to [4 x i16]*		; <[4 x i16]*> [#uses=2]
-	%tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3		; <i16*> [#uses=1]
-	%tmp.5 = load i16, i16* %tmp.4		; <i16> [#uses=1]
-	%tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2		; <i16*> [#uses=1]
-	%tmp.9 = load i16, i16* %tmp.8		; <i16> [#uses=1]
-	%tmp.10 = or i16 %tmp.9, %tmp.5		; <i16> [#uses=1]
-	ret i16 %tmp.10
-}
--- a/test/Transforms/ScalarRepl/DifferingTypes.ll
+++ b/test/Transforms/ScalarRepl/DifferingTypes.ll
@ -1,16 +0,0 @@
-; This is a feature test.  Hopefully one day this will be implemented.  The 
-; generated code should perform the appropriate masking operations required 
-; depending on the endianness of the target...
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @testfunc(i32 %i, i8 %j) {
-	%I = alloca i32		; <i32*> [#uses=3]
-	store i32 %i, i32* %I
-	%P = bitcast i32* %I to i8*		; <i8*> [#uses=1]
-	store i8 %j, i8* %P
-	%t = load i32, i32* %I		; <i32> [#uses=1]
-	ret i32 %t
-}
-
--- a/test/Transforms/ScalarRepl/address-space.ll
+++ b/test/Transforms/ScalarRepl/address-space.ll
@ -1,35 +0,0 @@
-; RUN: opt -S -scalarrepl < %s | FileCheck %s
-; PR7437 - Make sure SROA preserves address space of memcpy when
-; hacking on it.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10"
-
-%struct.anon = type { [1 x float] }
-
-; CHECK-LABEL: define void @Test(
-; CHECK: load float, float addrspace(2)*
-; CHECK-NEXT: fsub float
-; CHECK: store float {{.*}}, float addrspace(2)* 
-define void @Test(%struct.anon addrspace(2)* %pPtr) nounwind {
-entry:
-  %s = alloca %struct.anon, align 4               ; <%struct.anon*> [#uses=3]
-  %arrayidx = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
-  %tmp1 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
-  %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false)
-  %tmp3 = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1]
-  %arrayidx4 = getelementptr inbounds [1 x float], [1 x float]* %tmp3, i32 0, i64 0 ; <float*> [#uses=2]
-  %tmp5 = load float, float* %arrayidx4                  ; <float> [#uses=1]
-  %sub = fsub float %tmp5, 5.000000e+00           ; <float> [#uses=1]
-  store float %sub, float* %arrayidx4
-  %arrayidx7 = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
-  %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
-  %tmp9 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
-
-declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
--- a/test/Transforms/ScalarRepl/arraytest.ll
+++ b/test/Transforms/ScalarRepl/arraytest.ll
@ -1,11 +0,0 @@
-; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @test() {
-	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
-	%Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0		; <i32*> [#uses=2]
-	store i32 0, i32* %Y
-	%Z = load i32, i32* %Y		; <i32> [#uses=1]
-	ret i32 %Z
-}
-
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ b/test/Transforms/ScalarRepl/badarray.ll
@ -1,57 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
-
-
-; PR3466
-; Off end of array, don't transform.
-define i32 @test1() {
-; CHECK-LABEL: @test1(
-; CHECK-NOT: = alloca
-	%X = alloca [4 x i32]
-	%Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 6		; <i32*> [#uses=2]
-	store i32 0, i32* %Y
-	%Z = load i32, i32* %Y		; <i32> [#uses=1]
-	ret i32 %Z
-}
-
-
-; Off end of array, don't transform.
-define i32 @test2() nounwind {
-entry:
-; CHECK-LABEL: @test2(
-; CHECK-NOT: = alloca
-        %yx2.i = alloca float, align 4          ; <float*> [#uses=1]            
-        %yx26.i = bitcast float* %yx2.i to i64*         ; <i64*> [#uses=1]      
-        %0 = load i64, i64* %yx26.i, align 8         ; <i64> [#uses=0]               
-        unreachable
-}
-
-%base = type { i32, [0 x i8] }
-%padded = type { %base, [1 x i32] }
-
-; PR5436
-define void @test3() {
-entry:
-; CHECK-LABEL: @test3(
-; CHECK-NOT: = alloca
-; CHECK: store i64
-  %var_1 = alloca %padded, align 8                ; <%padded*> [#uses=3]
-  %0 = getelementptr inbounds %padded, %padded* %var_1, i32 0, i32 0 ; <%base*> [#uses=2]
-  
-  %p2 = getelementptr inbounds %base, %base* %0, i32 0, i32 1, i32 0 ; <i8*> [#uses=1]
-  store i8 72, i8* %p2, align 1
-  
-  ; 72 -> a[0].
-
-  %callret = call %padded *@test3f() ; <i32> [#uses=2]
-  %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
-  %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-
-declare %padded* @test3f()
--- a/test/Transforms/ScalarRepl/basictest.ll
+++ b/test/Transforms/ScalarRepl/basictest.ll
@ -1,30 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @test1() {
-	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
-	%Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0		; <i32*> [#uses=2]
-	store i32 0, i32* %Y
-	%Z = load i32, i32* %Y		; <i32> [#uses=1]
-	ret i32 %Z
-; CHECK-LABEL: @test1(
-; CHECK-NOT: alloca
-; CHECK: ret i32 0
-}
-
-; PR8980
-define i64 @test2(i64 %X) {
-	%A = alloca [8 x i8]
-        %B = bitcast [8 x i8]* %A to i64*
-        
-	store i64 %X, i64* %B
-        br label %L2
-        
-L2:
-	%Z = load i64, i64* %B		; <i32> [#uses=1]
-	ret i64 %Z
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: ret i64 %X
-}
-
--- a/test/Transforms/ScalarRepl/bitfield-sroa.ll
+++ b/test/Transforms/ScalarRepl/bitfield-sroa.ll
@ -1,17 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca        
-; rdar://6532315
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-%t = type { { i32, i16, i8, i8 } }
-
-define i8 @foo(i64 %A) {
-        %ALL = alloca %t, align 8 
-        %tmp59172 = bitcast %t* %ALL to i64*
-        store i64 %A, i64* %tmp59172, align 8
-        %C = getelementptr %t, %t* %ALL, i32 0, i32 0, i32 1             
-        %D = bitcast i16* %C to i32*    
-        %E = load i32, i32* %D, align 4     
-        %F = bitcast %t* %ALL to i8* 
-        %G = load i8, i8* %F, align 8 
-	ret i8 %G
-}
-
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@ -1,107 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; PR3290
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-;; Store of integer to whole alloca struct.
-define i32 @test1(i64 %V) nounwind {
-; CHECK: test1
-; CHECK-NOT: alloca
-	%X = alloca {{i32, i32}}
-	%Y = bitcast {{i32,i32}}* %X to i64*
-	store i64 %V, i64* %Y
-
-	%A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0
-	%B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1
-	%a = load i32, i32* %A
-	%b = load i32, i32* %B
-	%c = add i32 %a, %b
-	ret i32 %c
-}
-
-;; Store of integer to whole struct/array alloca.
-define float @test2(i128 %V) nounwind {
-; CHECK: test2
-; CHECK-NOT: alloca
-	%X = alloca {[4 x float]}
-	%Y = bitcast {[4 x float]}* %X to i128*
-	store i128 %V, i128* %Y
-
-	%A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0
-	%B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3
-	%a = load float, float* %A
-	%b = load float, float* %B
-	%c = fadd float %a, %b
-	ret float %c
-}
-
-;; Load of whole alloca struct as integer
-define i64 @test3(i32 %a, i32 %b) nounwind {
-; CHECK: test3
-; CHECK-NOT: alloca
-	%X = alloca {{i32, i32}}
-
-	%A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0
-	%B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1
-        store i32 %a, i32* %A
-        store i32 %b, i32* %B
-
-	%Y = bitcast {{i32,i32}}* %X to i64*
-        %Z = load i64, i64* %Y
-	ret i64 %Z
-}
-
-;; load of integer from whole struct/array alloca.
-define i128 @test4(float %a, float %b) nounwind {
-; CHECK: test4
-; CHECK-NOT: alloca
-	%X = alloca {[4 x float]}
-	%A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0
-	%B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3
-	store float %a, float* %A
-	store float %b, float* %B
-        
-      	%Y = bitcast {[4 x float]}* %X to i128*
-	%V = load i128, i128* %Y
-	ret i128 %V
-}
-
-;; If the elements of a struct or array alloca contain padding, SROA can still
-;; split up the alloca as long as there is no padding between the elements.
-%padded = type { i16, i8 }
-define void @test5([4 x %padded]* %p, [4 x %padded]* %q) {
-entry:
-; CHECK: test5
-; CHECK-NOT: i128
-  %var = alloca [4 x %padded], align 4
-  %vari8 = bitcast [4 x %padded]* %var to i8*
-  %pi8 = bitcast [4 x %padded]* %p to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
-  %qi8 = bitcast [4 x %padded]* %q to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
-  ret void
-}
-
-;; Check that an array alloca can be split up when it is also accessed with
-;; a load or store as a homogeneous structure with the same element type and
-;; number of elements as the array.
-%homogeneous = type { <8 x i16>, <8 x i16>, <8 x i16> }
-%wrapped_array = type { [3 x <8 x i16>] }
-define void @test6(i8* %p, %wrapped_array* %arr) {
-entry:
-; CHECK: test6
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-  %var = alloca %wrapped_array, align 16
-  %res = call %homogeneous @test6callee(i8* %p)
-  %varcast = bitcast %wrapped_array* %var to %homogeneous*
-  store %homogeneous %res, %homogeneous* %varcast
-  %tmp1 = bitcast %wrapped_array* %arr to i8*
-  %tmp2 = bitcast %wrapped_array* %var to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false)
-  ret void
-}
-
-declare %homogeneous @test6callee(i8* nocapture) nounwind
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@ -1,286 +0,0 @@
-; RUN: opt -scalarrepl -disable-output < %s
-; RUN: opt -scalarrepl-ssa -disable-output < %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-; PR9017
-define void @test1() nounwind readnone ssp {
-entry:
-  %l_72 = alloca i32*, align 8
-  unreachable
-
-for.cond:                                         ; preds = %for.cond
-  %tmp1.i = load i32*, i32** %l_72, align 8
-  store i32* %tmp1.i, i32** %l_72, align 8
-  br label %for.cond
-
-if.end:                                           ; No predecessors!
-  ret void
-}
-
-
-define void @test2() {
-  %E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } }        ; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1]
-  %tmp.151 = getelementptr { { i32, float, double, i64 }, { i32, float, double, i64 } }, { { i32, float, double, i64 }, { i32, float, double, i64 } }* %E, i64 0, i32 1, i32 3          ; <i64*> [#uses=0]
-  ret void
-}
-
-define i32 @test3() {
-        %X = alloca { [4 x i32] }               ; <{ [4 x i32] }*> [#uses=1]
-        %Y = getelementptr { [4 x i32] }, { [4 x i32] }* %X, i64 0, i32 0, i64 2               ; <i32*> [#uses=2]
-        store i32 4, i32* %Y
-        %Z = load i32, i32* %Y               ; <i32> [#uses=1]
-        ret i32 %Z
-}
-
-
-%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] }
-%union.rtunion_def = type { i32 }
-
-define void @test4() {
-entry:
-        %c_addr.i = alloca i8           ; <i8*> [#uses=1]
-        switch i32 0, label %return [
-                 i32 36, label %label.7
-                 i32 34, label %label.7
-                 i32 41, label %label.5
-        ]
-label.5:                ; preds = %entry
-        ret void
-label.7:                ; preds = %entry, %entry
-        br i1 false, label %then.4, label %switchexit.0
-then.4:         ; preds = %label.7
-        %tmp.0.i = bitcast i8* %c_addr.i to i32*                ; <i32*> [#uses=1]
-        store i32 44, i32* %tmp.0.i
-        ret void
-switchexit.0:           ; preds = %label.7
-        ret void
-return:         ; preds = %entry
-        ret void
-}
-
-
-define void @test5() {
-entry:
-        %source_ptr = alloca i8*, align 4               ; <i8**> [#uses=2]
-        br i1 false, label %bb1357, label %cond_next583
-cond_next583:           ; preds = %entry
-        ret void
-bb1357:         ; preds = %entry
-        br i1 false, label %bb1365, label %bb27055
-bb1365:         ; preds = %bb1357
-        switch i32 0, label %cond_next10377 [
-                 i32 0, label %bb4679
-                 i32 1, label %bb4679
-                 i32 2, label %bb4679
-                 i32 3, label %bb4679
-                 i32 4, label %bb5115
-                 i32 5, label %bb6651
-                 i32 6, label %bb7147
-                 i32 7, label %bb8683
-                 i32 8, label %bb9131
-                 i32 9, label %bb9875
-                 i32 10, label %bb4679
-                 i32 11, label %bb4859
-                 i32 12, label %bb4679
-                 i32 16, label %bb10249
-        ]
-bb4679:         ; preds = %bb1365, %bb1365, %bb1365, %bb1365, %bb1365, %bb1365
-        ret void
-bb4859:         ; preds = %bb1365
-        ret void
-bb5115:         ; preds = %bb1365
-        ret void
-bb6651:         ; preds = %bb1365
-        ret void
-bb7147:         ; preds = %bb1365
-        ret void
-bb8683:         ; preds = %bb1365
-        ret void
-bb9131:         ; preds = %bb1365
-        ret void
-bb9875:         ; preds = %bb1365
-        %source_ptr9884 = bitcast i8** %source_ptr to i8**              ; <i8**> [#uses=1]
-        %tmp9885 = load i8*, i8** %source_ptr9884            ; <i8*> [#uses=0]
-        ret void
-bb10249:                ; preds = %bb1365
-        %source_ptr10257 = bitcast i8** %source_ptr to i16**            ; <i16**> [#uses=1]
-        %tmp10258 = load i16*, i16** %source_ptr10257         ; <i16*> [#uses=0]
-        ret void
-cond_next10377:         ; preds = %bb1365
-        ret void
-bb27055:                ; preds = %bb1357
-        ret void
-}
-
-
-        %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>" = type { %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"* }
-        %"struct.__gnu_cxx::bitmap_allocator<char>" = type { i8 }
-        %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block" = type { [8 x i8] }
-
-; PR1045
-define void @test6() {
-entry:
-        %this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"**> [#uses=3]
-        %tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", align 4                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
-        store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i
-        %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i          ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
-        %tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator<char>"*              ; <%"struct.__gnu_cxx::bitmap_allocator<char>"*> [#uses=0]
-        %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i         ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
-        %tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp1.i, i32 0, i32 0         ; <%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"**> [#uses=0]
-        unreachable
-}
-
-        %struct.CGPoint = type { float, float }
-        %struct.aal_big_range_t = type { i32, i32 }        %struct.aal_callback_t = type { i8* (i8*, i32)*, void (i8*, i8*)* }        %struct.aal_edge_pool_t = type { %struct.aal_edge_pool_t*, i32, i32, [0 x %struct.aal_edge_t] }        %struct.aal_edge_t = type { %struct.CGPoint, %struct.CGPoint, i32 }
-        %struct.aal_range_t = type { i16, i16 }
-        %struct.aal_span_pool_t = type { %struct.aal_span_pool_t*, [341 x %struct.aal_span_t] }
-        %struct.aal_span_t = type { %struct.aal_span_t*, %struct.aal_big_range_t }
-        %struct.aal_spanarray_t = type { [2 x %struct.aal_range_t] }
-        %struct.aal_spanbucket_t = type { i16, [2 x i8], %struct.anon }
-        %struct.aal_state_t = type { %struct.CGPoint, %struct.CGPoint, %struct.CGPoint, i32, float, float, float, float, %struct.CGPoint, %struct.CGPoint, float, float, float, float, i32, i32, i32, i32, float, float, i8*, i32, i32, %struct.aal_edge_pool_t*, %struct.aal_edge_pool_t*, i8*, %struct.aal_callback_t*, i32, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_pool_t*, i8, float, i8, i32 }
-        %struct.anon = type { %struct.aal_spanarray_t }
-
-
-
-define fastcc void @test7() {
-entry:
-        %SB = alloca %struct.aal_spanbucket_t, align 4          ; <%struct.aal_spanbucket_t*> [#uses=2]
-        br i1 false, label %cond_true, label %cond_next79
-
-cond_true:              ; preds = %entry
-        br i1 false, label %cond_next, label %cond_next114.i
-
-cond_next114.i:         ; preds = %cond_true
-        ret void
-
-cond_next:              ; preds = %cond_true
-        %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
-        br i1 false, label %cond_next34, label %cond_next79
-
-cond_next34:            ; preds = %cond_next
-        %i.2.reload22 = load i32, i32* null          ; <i32> [#uses=1]
-        %tmp51 = getelementptr %struct.aal_spanbucket_t, %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1      
-        ; <i16*> [#uses=0]
-        ret void
-
-cond_next79:            ; preds = %cond_next, %entry
-        ret void
-}
-
-
-       %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN }
-        %struct.c37304a__vrec___disc___XVN = type {
-%struct.c37304a__vrec___disc___XVN___O }
-        %struct.c37304a__vrec___disc___XVN___O = type {  }
-
-; PR3304
-define void @test8() {
-entry:
-        %v = alloca %struct.c37304a__vrec
-        %0 = getelementptr %struct.c37304a__vrec, %struct.c37304a__vrec* %v, i32 0, i32 0             
-        store i8 8, i8* %0, align 1
-        unreachable
-}
-
-
-
-; rdar://6808691 - ZeroLengthMemSet
-        %0 = type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
-
-define i32 @test9() {
-entry:
-        %.compoundliteral = alloca %0           
-        %tmp228 = getelementptr %0, %0* %.compoundliteral, i32 0, i32 7
-        %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
-        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
-        unreachable
-}
-
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
-
-; PR4146 - i1 handling
-%wrapper = type { i1 }
-define void @test10() {
-entry:
-        %w = alloca %wrapper, align 8           ; <%wrapper*> [#uses=1]
-        %0 = getelementptr %wrapper, %wrapper* %w, i64 0, i32 0           ; <i1*>
-        store i1 true, i1* %0
-        ret void
-}
-
-
-        %struct.singlebool = type <{ i8 }>
-; PR4286
-define zeroext i8 @test11() nounwind {
-entry:
-        %a = alloca %struct.singlebool, align 1         ; <%struct.singlebool*> [#uses=2]
-        %storetmp.i = bitcast %struct.singlebool* %a to i1*             ; <i1*> [#uses=1]
-        store i1 true, i1* %storetmp.i
-        %tmp = getelementptr %struct.singlebool, %struct.singlebool* %a, i64 0, i32 0               ; <i8*> [#uses=1]
-        %tmp1 = load i8, i8* %tmp           ; <i8> [#uses=1]
-        ret i8 %tmp1
-}
-
-
-       %struct.Item = type { [4 x i16], %struct.rule* }
-        %struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
-        %struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
-        %struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
-        %struct.list = type { i8*, %struct.list* }
-        %struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
-        %struct.plank = type { i8*, %struct.list*, i32 }
-        %struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
-        %struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
-        %struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
-        %struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
-        %struct.Index_Map = type { i32, %struct.item_set** }
-        %struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
-        %struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
-
-; VLAs.
-define void @test12() {
-bb4.i:
-        %malloccall = tail call i8* @malloc(i32 0)
-        %0 = bitcast i8* %malloccall to [0 x %struct.Item]*
-        %.sub.i.c.i = getelementptr [0 x %struct.Item], [0 x %struct.Item]* %0, i32 0, i32 0                ; <%struct.Item*> [#uses=0]
-        unreachable
-}
-declare noalias i8* @malloc(i32)
-
-; PR8680
-define void @test13() nounwind {
-entry:
-  %memtmp = alloca i32, align 4
-  %0 = bitcast i32* %memtmp to void ()*
-  call void %0() nounwind
-  ret void
-}
-
-; rdar://11861001 - The dynamic GEP here was incorrectly making all accesses
-; to the alloca think they were also dynamic.  Inserts and extracts created to
-; access the vector were all being based from the dynamic access, even in BBs
-; not dominated by the GEP.
-define fastcc void @test() optsize inlinehint ssp align 2 {
-entry:
-  %alloc.0.0 = alloca <4 x float>, align 16
-  %bitcast = bitcast <4 x float>* %alloc.0.0 to [4 x float]*
-  %idx3 = getelementptr inbounds [4 x float], [4 x float]* %bitcast, i32 0, i32 3
-  store float 0.000000e+00, float* %idx3, align 4
-  br label %for.body10
-
-for.body10:                                       ; preds = %for.body10, %entry
-  %loopidx = phi i32 [ 0, %entry ], [ undef, %for.body10 ]
-  %unusedidx = getelementptr inbounds <4 x float>, <4 x float>* %alloc.0.0, i32 0, i32 %loopidx
-  br i1 undef, label %for.end, label %for.body10
-
-for.end:                                          ; preds = %for.body10
-  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00>, <4 x float>* %alloc.0.0, align 16
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@ -1,64 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.6.0"
-
-; CHECK: f
-; CHECK-NOT: llvm.dbg.declare
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-
-define i32 @f(i32 %a, i32 %b) nounwind ssp !dbg !1 {
-entry:
-  %a.addr = alloca i32, align 4
-  %b.addr = alloca i32, align 4
-  %c = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !6, metadata !DIExpression()), !dbg !7
-  store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !8, metadata !DIExpression()), !dbg !9
-  call void @llvm.dbg.declare(metadata i32* %c, metadata !10, metadata !DIExpression()), !dbg !12
-  %tmp = load i32, i32* %a.addr, align 4, !dbg !13
-  store i32 %tmp, i32* %c, align 4, !dbg !13
-  %tmp1 = load i32, i32* %a.addr, align 4, !dbg !14
-  %tmp2 = load i32, i32* %b.addr, align 4, !dbg !14
-  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
-  store i32 %add, i32* %a.addr, align 4, !dbg !14
-  %tmp3 = load i32, i32* %c, align 4, !dbg !15
-  %tmp4 = load i32, i32* %b.addr, align 4, !dbg !15
-  %sub = sub nsw i32 %tmp3, %tmp4, !dbg !15
-  store i32 %sub, i32* %b.addr, align 4, !dbg !15
-  %tmp5 = load i32, i32* %a.addr, align 4, !dbg !16
-  %tmp6 = load i32, i32* %b.addr, align 4, !dbg !16
-  %add7 = add nsw i32 %tmp5, %tmp6, !dbg !16
-  ret i32 %add7, !dbg !16
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!20}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: FullDebug, file: !18, enums: !19, retainedTypes: !19)
-!1 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !18, scope: !2, type: !3)
-!2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
-!3 = !DISubroutineType(types: !4)
-!4 = !{!5}
-!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
-!7 = !DILocation(line: 1, column: 11, scope: !1)
-!8 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
-!9 = !DILocation(line: 1, column: 18, scope: !1)
-!10 = !DILocalVariable(name: "c", line: 2, scope: !11, file: !2, type: !5)
-!11 = distinct !DILexicalBlock(line: 1, column: 21, file: !18, scope: !1)
-!12 = !DILocation(line: 2, column: 9, scope: !11)
-!13 = !DILocation(line: 2, column: 14, scope: !11)
-!14 = !DILocation(line: 3, column: 5, scope: !11)
-!15 = !DILocation(line: 4, column: 5, scope: !11)
-!16 = !DILocation(line: 5, column: 5, scope: !11)
-!18 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
-!19 = !{}
-!20 = !{i32 1, !"Debug Info Version", i32 3}
--- a/test/Transforms/ScalarRepl/inline-vector.ll
+++ b/test/Transforms/ScalarRepl/inline-vector.ll
@ -1,53 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10.0.0"
-
-%struct.Vector4 = type { float, float, float, float }
-@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
-
-; CHECK-LABEL: define void @f(
-; CHECK-NOT: alloca
-; CHECK: phi <4 x float>
-
-define void @f() nounwind ssp {
-entry:
-  %i = alloca i32, align 4
-  %vector = alloca %struct.Vector4, align 16
-  %agg.tmp = alloca %struct.Vector4, align 16
-  %tmp = bitcast %struct.Vector4* %vector to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.body, %entry
-  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  store i32 %storemerge, i32* %i, align 4
-  %cmp = icmp slt i32 %storemerge, 1000000
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
-  %tmp3 = bitcast %struct.Vector4* %vector to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
-  %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
-  %1 = load [2 x i64], [2 x i64]* %0, align 16
-  %tmp2.i = extractvalue [2 x i64] %1, 0
-  %tmp3.i = zext i64 %tmp2.i to i128
-  %tmp10.i = bitcast i128 %tmp3.i to <4 x float>
-  %sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
-  %2 = bitcast %struct.Vector4* %vector to <4 x float>*
-  store <4 x float> %sub.i.i, <4 x float>* %2, align 16
-  %tmp4 = load i32, i32* %i, align 4
-  %inc = add nsw i32 %tmp4, 1
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  %x = getelementptr inbounds %struct.Vector4, %struct.Vector4* %vector, i32 0, i32 0
-  %tmp5 = load float, float* %x, align 16
-  %conv = fpext float %tmp5 to double
-  %call = call i32 (...) @printf(double %conv) nounwind
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare i32 @printf(...)
--- a/test/Transforms/ScalarRepl/lifetime.ll
+++ b/test/Transforms/ScalarRepl/lifetime.ll
@ -1,139 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.lifetime.start(i64, i8*)
-declare void @llvm.lifetime.end(i64, i8*)
-
-%t1 = type {i32, i32, i32}
-
-define void @test1() {
-; CHECK-LABEL: @test1(
-  %A = alloca %t1
-  %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
-  %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
-  %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
-  %B = bitcast i32* %A1 to i8*
-  store i32 0, i32* %A1
-  call void @llvm.lifetime.start(i64 -1, i8* %B)
-  ret void
-; CHECK-NEXT: ret void
-}
-
-define void @test2() {
-; CHECK-LABEL: @test2(
-  %A = alloca %t1
-  %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
-  %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
-  %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
-  %B = bitcast i32* %A2 to i8*
-  store i32 0, i32* %A2
-  call void @llvm.lifetime.start(i64 -1, i8* %B)
-  %C = load i32, i32* %A2
-  ret void
-; CHECK: ret void
-}
-
-define void @test3() {
-; CHECK-LABEL: @test3(
-  %A = alloca %t1
-  %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
-  %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
-  %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
-  %B = bitcast i32* %A2 to i8*
-  store i32 0, i32* %A2
-  call void @llvm.lifetime.start(i64 6, i8* %B)
-  %C = load i32, i32* %A2
-  ret void
-; CHECK-NEXT: ret void
-}
-
-define void @test4() {
-; CHECK-LABEL: @test4(
-  %A = alloca %t1
-  %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
-  %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
-  %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
-  %B = bitcast i32* %A2 to i8*
-  store i32 0, i32* %A2
-  call void @llvm.lifetime.start(i64 1, i8* %B)
-  %C = load i32, i32* %A2
-  ret void
-; CHECK-NEXT: ret void
-}
-
-%t2 = type {i32, [4 x i8], i32}
-
-define void @test5() {
-; CHECK-LABEL: @test5(
-  %A = alloca %t2
-; CHECK: alloca{{.*}}i8
-; CHECK: alloca{{.*}}i8
-; CHECK: alloca{{.*}}i8
-
-  %A21 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 0
-  %A22 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 1
-  %A23 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 2
-  %A24 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 3
-; CHECK-NOT: store i8 1
-  store i8 1, i8* %A21
-  store i8 2, i8* %A22
-  store i8 3, i8* %A23
-  store i8 4, i8* %A24
-
-  %A1 = getelementptr %t2, %t2* %A, i32 0, i32 0
-  %A2 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 1
-  %A3 = getelementptr %t2, %t2* %A, i32 0, i32 2
-  store i8 0, i8* %A2
-  call void @llvm.lifetime.start(i64 5, i8* %A2)
-; CHECK: llvm.lifetime{{.*}}i64 1
-; CHECK: llvm.lifetime{{.*}}i64 1
-; CHECK: llvm.lifetime{{.*}}i64 1
-  %C = load i8, i8* %A2
-  ret void
-}
-
-%t3 = type {[4 x i16], [4 x i8]}
-
-define void @test6() {
-; CHECK-LABEL: @test6(
-  %A = alloca %t3
-; CHECK: alloca i8
-; CHECK: alloca i8
-; CHECK: alloca i8
-
-  %A11 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 0
-  %A12 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 1
-  %A13 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 2
-  %A14 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 3
-  store i16 11, i16* %A11
-  store i16 12, i16* %A12
-  store i16 13, i16* %A13
-  store i16 14, i16* %A14
-; CHECK-NOT: store i16 11
-; CHECK-NOT: store i16 12
-; CHECK-NOT: store i16 13
-; CHECK-NOT: store i16 14
-
-  %A21 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 0
-  %A22 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 1
-  %A23 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 2
-  %A24 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 3
-  store i8 21, i8* %A21
-  store i8 22, i8* %A22
-  store i8 23, i8* %A23
-  store i8 24, i8* %A24
-; CHECK: store i8 21
-; CHECK: store i8 22
-; CHECK: store i8 23
-; CHECK-NOT: store i8 24
-
-  %B = bitcast i16* %A13 to i8*
-  call void @llvm.lifetime.start(i64 7, i8* %B)
-; CHECK: lifetime.start{{.*}}i64 1
-; CHECK: lifetime.start{{.*}}i64 1
-; CHECK: lifetime.start{{.*}}i64 1
-
-  ret void
-}
--- a/test/Transforms/ScalarRepl/load-store-aggregate.ll
+++ b/test/Transforms/ScalarRepl/load-store-aggregate.ll
@ -1,31 +0,0 @@
-; This testcase shows that scalarrepl is able to replace struct alloca's which
-; are directly loaded from or stored to (using the first class aggregates
-; feature).
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-; RUN: opt < %s -scalarrepl -S > %t
-; RUN: cat %t | not grep alloca
-
-%struct.foo = type { i32, i32 }
-
-define i32 @test(%struct.foo* %P) {
-entry:
-	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
-        %V = load %struct.foo, %struct.foo* %P
-        store %struct.foo %V, %struct.foo* %L
-
-	%tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
-	ret i32 %tmp5
-}
-
-define %struct.foo @test2(i32 %A, i32 %B) {
-entry:
-	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
-        %L.0 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0
-        store i32 %A, i32* %L.0
-        %L.1 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 1
-        store i32 %B, i32* %L.1
-        %V = load %struct.foo, %struct.foo* %L
-        ret %struct.foo %V
-}
--- a/test/Transforms/ScalarRepl/memcpy-align.ll
+++ b/test/Transforms/ScalarRepl/memcpy-align.ll
@ -1,32 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-; PR6832
-target datalayout =
-"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
-target triple = "arm-u-u"
-
-%0 = type { %struct.anon, %struct.anon }
-%struct.anon = type { [4 x i8] }
-
-@c = external global %0                           ; <%0*> [#uses=1]
-
-define void @good() nounwind {
-entry:
-  %x0 = alloca %struct.anon, align 4              ; <%struct.anon*> [#uses=2]
-  %tmp = bitcast %struct.anon* %x0 to i8*         ; <i8*> [#uses=1]
-  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false)
-  %tmp1 = bitcast %struct.anon* %x0 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0, %0* @c, i32
-0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false)
-  ret void
-  
-; CHECK: store i8 0, i8*{{.*}}, align 4
-; CHECK: store i8 0, i8*{{.*}}, align 1
-; CHECK: store i8 0, i8*{{.*}}, align 2
-; CHECK: store i8 0, i8*{{.*}}, align 1
-}
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
-i1) nounwind
-
--- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@ -1,23 +0,0 @@
-; PR1226
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep "call void @llvm.memcpy.p0i8.p0i8.i32"
-; RUN: opt < %s -scalarrepl -S | grep getelementptr
-; END.
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.8.0"
-	%struct.foo = type { i8, i8 }
-
-
-define i32 @test1(%struct.foo* %P) {
-entry:
-	%L = alloca %struct.foo, align 2		; <%struct.foo*> [#uses=1]
-	%L2 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i8*> [#uses=2]
-	%tmp13 = getelementptr %struct.foo, %struct.foo* %P, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false)
-	%tmp5 = load i8, i8* %L2		; <i8> [#uses=1]
-	%tmp56 = sext i8 %tmp5 to i32		; <i32> [#uses=1]
-	ret i32 %tmp56
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@ -1,67 +0,0 @@
-; PR1226
-; RUN: opt < %s -scalarrepl -S | grep "ret i32 16843009"
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i16 514"
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
-	%struct.bar = type { %struct.foo, i64, double }
-	%struct.foo = type { i32, i32 }
-
-
-define i32 @test1(%struct.foo* %P) {
-entry:
-	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
-	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
-	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
-	%tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
-	ret i32 %tmp5
-}
-
-
-define i32 @test2() {
-entry:
-	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
-	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
-        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
-	%tmp4 = getelementptr [4 x %struct.foo], [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
-	ret i32 %tmp5
-}
-
-
-define i32 @test3() {
-entry:
-	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
-	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
-	%tmp3 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp3
-	%tmp4 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
-	store double 1.000000e+01, double* %tmp4
-	%tmp6 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp7 = load i32, i32* %tmp6		; <i32> [#uses=1]
-	ret i32 %tmp7
-}
-
-
-	%struct.f = type { i32, i32, i32, i32, i32, i32 }
-
-define i16 @test4() nounwind {
-entry:
-	%A = alloca %struct.f, align 8		; <%struct.f*> [#uses=3]
-	%0 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %0, align 8
-	%1 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
-	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
-	%3 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
-	%4 = load i32, i32* %3, align 8		; <i32> [#uses=1]
-	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
-	ret i16 %retval12
-}
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/negative-memset.ll
+++ b/test/Transforms/ScalarRepl/negative-memset.ll
@ -1,20 +0,0 @@
-; PR12202
-; RUN: opt < %s -scalarrepl -S
-; Ensure that we do not hang or crash when feeding a negative value to memset
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
-target triple = "i686-pc-win32"
-
-define i32 @test() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  %buff = alloca [1 x i8], align 1
-  store i32 0, i32* %retval
-  %0 = bitcast [1 x i8]* %buff to i8*
-  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
-  %arraydecay = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false)	; Negative 8!
-  ret i32 0
-}
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/nonzero-first-index.ll
+++ b/test/Transforms/ScalarRepl/nonzero-first-index.ll
@ -1,53 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
-
-%nested = type { i32, [4 x i32] }
-
-; Check that a GEP with a non-zero first index does not prevent SROA as long
-; as the resulting offset corresponds to an element in the alloca.
-define i32 @test1() {
-; CHECK-LABEL: @test1(
-; CHECK-NOT: = i160
-; CHECK: ret i32 undef
-	%A = alloca %nested
-	%B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0
-	%C = getelementptr i32, i32* %B, i32 2
-	%D = load i32, i32* %C
-	ret i32 %D
-}
-
-; But, if the offset is out of range, then it should not be transformed.
-define i32 @test2() {
-; CHECK-LABEL: @test2(
-; CHECK: i160
-	%A = alloca %nested
-	%B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0
-	%C = getelementptr i32, i32* %B, i32 4
-	%D = load i32, i32* %C
-	ret i32 %D
-}
-
-; Try it with a bitcast and single GEP....
-define i32 @test3() {
-; CHECK-LABEL: @test3(
-; CHECK-NOT: = i160
-; CHECK: ret i32 undef
-	%A = alloca %nested
-	%B = bitcast %nested* %A to i32*
-	%C = getelementptr i32, i32* %B, i32 2
-	%D = load i32, i32* %C
-	ret i32 %D
-}
-
-; ...and again make sure that out-of-range accesses are not transformed.
-define i32 @test4() {
-; CHECK-LABEL: @test4(
-; CHECK: i160
-	%A = alloca %nested
-	%B = bitcast %nested* %A to i32*
-	%C = getelementptr i32, i32* %B, i32 -1
-	%D = load i32, i32* %C
-	ret i32 %D
-}
--- a/test/Transforms/ScalarRepl/not-a-vector.ll
+++ b/test/Transforms/ScalarRepl/not-a-vector.ll
@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -S | not grep "7 x double"
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret double %B"
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define double @test(double %A, double %B) {
-	%ARR = alloca [7 x i64]
-	%C = bitcast [7 x i64]* %ARR to double*
-	store double %A, double* %C
-
-	%D = getelementptr [7 x i64], [7 x i64]* %ARR, i32 0, i32 4
-	%E = bitcast i64* %D to double*
-	store double %B, double* %E
-
-	%F = getelementptr double, double* %C, i32 4
-	%G = load double, double* %F
-	ret double %G
-}
-
-
--- a/test/Transforms/ScalarRepl/only-memcpy-uses.ll
+++ b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
@ -1,27 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-%struct.S = type { [12 x i32] }
-
-; CHECK-LABEL: @bar4(
-define void @bar4(%struct.S* byval %s) nounwind ssp {
-entry:
-; CHECK: alloca
-; CHECK-NOT: load
-; CHECK: memcpy
-  %t = alloca %struct.S, align 4
-  %agg.tmp = alloca %struct.S, align 4
-  %tmp = bitcast %struct.S* %t to i8*
-  %tmp1 = bitcast %struct.S* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
-  %tmp2 = bitcast %struct.S* %agg.tmp to i8*
-  %tmp3 = bitcast %struct.S* %t to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false)
-  %call = call i32 (...) @bazz(%struct.S* byval %agg.tmp)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
-declare i32 @bazz(...)
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@ -1,80 +0,0 @@
-; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s
-; rdar://10589171
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.foo = type { i32, i32 }
-
-@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
-entry:
-  %f = alloca %struct.foo, align 4
-  %x.i = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0
-  store i32 1, i32* %x.i, align 4
-  %y.i = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 1
-  br label %while.cond.i
-
-; CHECK: while.cond.i:
-; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
-; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
-; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
-; CHECK-NOT: phi
-while.cond.i:                                     ; preds = %while.cond.backedge.i, %entry
-  %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
-  %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
-  %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
-  %cmp.i = icmp sgt i32 %left.0.i, 0
-  br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge
-
-while.cond.i.func.exit_crit_edge:                 ; preds = %while.cond.i
-  br label %func.exit
-
-while.body.i:                                     ; preds = %while.cond.i
-  %dec.i = add nsw i32 %left.0.i, -1
-  switch i32 1, label %while.body.i.func.exit_crit_edge [
-    i32 0, label %while.cond.backedge.i
-    i32 1, label %sw.bb.i
-  ]
-
-while.body.i.func.exit_crit_edge:                 ; preds = %while.body.i
-  br label %func.exit
-
-sw.bb.i:                                          ; preds = %while.body.i
-  %cmp2.i = icmp eq i32 %tmp, 1
-  br i1 %cmp2.i, label %if.then.i, label %if.end.i
-
-if.then.i:                                        ; preds = %sw.bb.i
-  store i32 %pos.0.i, i32* %x.i, align 4
-  br label %if.end.i
-
-; CHECK: if.end.i:
-; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
-; CHECK-NOT: phi
-if.end.i:                                         ; preds = %if.then.i, %sw.bb.i
-  %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
-  store i32 %tmp1, i32* %y.i, align 4
-  br label %while.cond.backedge.i
-
-; CHECK: while.cond.backedge.i:
-; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
-; CHECK-NOT: phi
-while.cond.backedge.i:                            ; preds = %if.end.i, %while.body.i
-  %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
-  %xtmp.i = add i32 %pos.0.i, 1
-  br label %while.cond.i
-
-; CHECK: func.exit:
-; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
-func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
-  %tmp3 = load i32, i32* %x.i, align 4
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
-  ret i32 0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-; CHECK: attributes #0 = { nounwind uwtable }
-; CHECK: attributes [[NUW]] = { nounwind }
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@ -1,153 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-; Test promotion of allocas that have phis and select users.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.2"
-
-%struct.X = type { i32 }
-%PairTy = type {i32, i32}
-
-; CHECK-LABEL: @test1(
-; CHECK: %a.0 = alloca i32
-; CHECK: %b.0 = alloca i32
-define i32 @test1(i32 %x) nounwind readnone ssp {
-entry:
-  %a = alloca %struct.X, align 8                  ; <%struct.X*> [#uses=2]
-  %b = alloca %struct.X, align 8                  ; <%struct.X*> [#uses=2]
-  %0 = getelementptr inbounds %struct.X, %struct.X* %a, i64 0, i32 0 ; <i32*> [#uses=1]
-  store i32 1, i32* %0, align 8
-  %1 = getelementptr inbounds %struct.X, %struct.X* %b, i64 0, i32 0 ; <i32*> [#uses=1]
-  store i32 2, i32* %1, align 8
-  %2 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
-  %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1]
-  %3 = getelementptr inbounds %struct.X, %struct.X* %p.0, i64 0, i32 0 ; <i32*> [#uses=1]
-  %4 = load i32, i32* %3, align 8                      ; <i32> [#uses=1]
-  ret i32 %4
-}
-
-; CHECK-LABEL: @test2(
-; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ]
-; CHECK-NEXT: ret i32 %X.ld
-define i32 @test2(i1 %c) {
-entry:
-  %A = alloca {i32, i32}
-  %B = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 0
-  store i32 1, i32* %B
-  br i1 %c, label %T, label %F
-T:
-  %C = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 1
-  store i32 2, i32* %C
-  br label %F
-F:
-  %X = phi i32* [%B, %entry], [%C, %T]
-  %Q = load i32, i32* %X
-  ret i32 %Q
-}
-
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2
-; CHECK-NEXT: ret i32 %Q
-; rdar://8904039
-define i32 @test3(i1 %c) {
-  %A = alloca {i32, i32}
-  %B = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 0
-  store i32 1, i32* %B
-  %C = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 1
-  store i32 2, i32* %C
-  
-  %X = select i1 %c, i32* %B, i32* %C
-  %Q = load i32, i32* %X
-  ret i32 %Q
-}
-
-;; We can't scalarize this, a use of the select is not an element access.
-define i64 @test4(i1 %c) {
-entry:
-  %A = alloca %PairTy
-  ; CHECK-LABEL: @test4(
-  ; CHECK: %A = alloca %PairTy
-  %B = getelementptr %PairTy, %PairTy* %A, i32 0, i32 0
-  store i32 1, i32* %B
-  %C = getelementptr %PairTy, %PairTy* %A, i32 0, i32 1
-  store i32 2, i32* %B
-  
-  %X = select i1 %c, i32* %B, i32* %C
-  %Y = bitcast i32* %X to i64*
-  %Q = load i64, i64* %Y
-  ret i64 %Q
-}
-
-
-;;
-;; Tests for promoting allocas used by selects.
-;; rdar://7339113
-;;
-
-define i32 @test5(i32 *%P) nounwind readnone ssp {
-entry:
-  %b = alloca i32, align 8 
-  store i32 2, i32* %b, align 8
-  
-  ;; Select on constant condition should be folded.
-  %p.0 = select i1 false, i32* %b, i32* %P
-  store i32 123, i32* %p.0
-  
-  %r = load i32, i32* %b, align 8
-  ret i32 %r
-  
-; CHECK-LABEL: @test5(
-; CHECK: store i32 123, i32* %P
-; CHECK: ret i32 2
-}
-
-define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp {
-  %a = alloca i32, align 8
-  %b = alloca i32, align 8
-  store i32 1, i32* %a, align 8
-  store i32 2, i32* %b, align 8
-  %p.0 = select i1 %c, i32* %b, i32* %a
-  %r = load i32, i32* %p.0, align 8
-  ret i32 %r
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1
-; CHECK-NEXT: ret i32 %r
-}
-
-; Verify that the loads happen where the loads are, not where the select is.
-define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp {
-  %a = alloca i32, align 8
-  %b = alloca i32, align 8
-  store i32 1, i32* %a
-  store i32 2, i32* %b
-  %p.0 = select i1 %c, i32* %b, i32* %a
-  
-  store i32 0, i32* %a
-  
-  %r = load i32, i32* %p.0, align 8
-  ret i32 %r
-; CHECK-LABEL: @test7(
-; CHECK-NOT: alloca i32
-; CHECK: %r = select i1 %c, i32 2, i32 0
-; CHECK: ret i32 %r
-}
-
-;; Promote allocs that are PHI'd together by moving the loads.
-define i32 @test8(i32 %x) nounwind readnone ssp {
-; CHECK-LABEL: @test8(
-; CHECK-NOT: load i32
-; CHECK-NOT: store i32
-; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ]
-; CHECK-NEXT: ret i32 %p.0.ld
-entry:
-  %a = alloca i32, align 8
-  %b = alloca i32, align 8
-  store i32 1, i32* %a, align 8
-  store i32 2, i32* %b, align 8
-  %c = icmp eq i32 %x, 0 
-  br i1 %c, label %T, label %Cont
-T:
-  br label %Cont
-Cont:
-  %p.0 = phi i32* [%b, %entry],[%a, %T]
-  %r = load i32, i32* %p.0, align 8
-  ret i32 %r
-}
--- a/test/Transforms/ScalarRepl/phinodepromote.ll
+++ b/test/Transforms/ScalarRepl/phinodepromote.ll
@ -1,34 +0,0 @@
-; RUN: opt < %s -simplifycfg -instcombine -mem2reg -S | not grep alloca
-;
-; This tests to see if mem2reg can promote alloca instructions whose addresses
-; are used by PHI nodes that are immediately loaded.  The LLVM C++ front-end
-; often generates code that looks like this (when it codegen's ?: exprs as
-; lvalues), so handling this simple extension is quite useful.
-;
-; This testcase is what the following program looks like when it reaches
-; instcombine:
-;
-; template<typename T>
-; const T& max(const T& a1, const T& a2) { return a1 < a2 ? a1 : a2; }
-; int main() { return max(0, 1); }
-;
-; This test checks to make sure the combination of instcombine and mem2reg
-; perform the transformation.
-
-define i32 @main() {
-entry:
-	%mem_tmp.0 = alloca i32		; <i32*> [#uses=3]
-	%mem_tmp.1 = alloca i32		; <i32*> [#uses=3]
-	store i32 0, i32* %mem_tmp.0
-	store i32 1, i32* %mem_tmp.1
-	%tmp.1.i = load i32, i32* %mem_tmp.1		; <i32> [#uses=1]
-	%tmp.3.i = load i32, i32* %mem_tmp.0		; <i32> [#uses=1]
-	%tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i		; <i1> [#uses=1]
-	br i1 %tmp.4.i, label %cond_true.i, label %cond_continue.i
-cond_true.i:		; preds = %entry
-	br label %cond_continue.i
-cond_continue.i:		; preds = %cond_true.i, %entry
-	%mem_tmp.i.0 = phi i32* [ %mem_tmp.1, %cond_true.i ], [ %mem_tmp.0, %entry ]		; <i32*> [#uses=1]
-	%tmp.3 = load i32, i32* %mem_tmp.i.0		; <i32> [#uses=1]
-	ret i32 %tmp.3
-}
--- a/test/Transforms/ScalarRepl/select_promote.ll
+++ b/test/Transforms/ScalarRepl/select_promote.ll
@ -1,18 +0,0 @@
-; Test promotion of loads that use the result of a select instruction.  This
-; should be simplified by the instcombine pass.
-
-; RUN: opt < %s -instcombine -mem2reg -S | not grep alloca
-
-define i32 @main() {
-	%mem_tmp.0 = alloca i32		; <i32*> [#uses=3]
-	%mem_tmp.1 = alloca i32		; <i32*> [#uses=3]
-	store i32 0, i32* %mem_tmp.0
-	store i32 1, i32* %mem_tmp.1
-	%tmp.1.i = load i32, i32* %mem_tmp.1		; <i32> [#uses=1]
-	%tmp.3.i = load i32, i32* %mem_tmp.0		; <i32> [#uses=1]
-	%tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i		; <i1> [#uses=1]
-	%mem_tmp.i.0 = select i1 %tmp.4.i, i32* %mem_tmp.1, i32* %mem_tmp.0		; <i32*> [#uses=1]
-	%tmp.3 = load i32, i32* %mem_tmp.i.0		; <i32> [#uses=1]
-	ret i32 %tmp.3
-}
-
--- a/test/Transforms/ScalarRepl/sroa-fca.ll
+++ b/test/Transforms/ScalarRepl/sroa-fca.ll
@ -1,21 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-; Make sure that SROA "scalar conversion" can handle first class aggregates.
-
-define i64 @test({i32, i32} %A) {
-	%X = alloca i64
-	%Y = bitcast i64* %X to {i32,i32}*
-	store {i32,i32} %A, {i32,i32}* %Y
-	
-	%Q = load i64, i64* %X
-	ret i64 %Q
-}
-
-define {i32,i32} @test2(i64 %A) {
-	%X = alloca i64
-	%Y = bitcast i64* %X to {i32,i32}*
-	store i64 %A, i64* %X
-	
-	%Q = load {i32,i32}, {i32,i32}* %Y
-	ret {i32,i32} %Q
-}
-
--- a/test/Transforms/ScalarRepl/sroa_two.ll
+++ b/test/Transforms/ScalarRepl/sroa_two.ll
@ -1,13 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-
-define i32 @test(i32 %X) {
-	%Arr = alloca [2 x i32]		; <[2 x i32]*> [#uses=3]
-	%tmp.0 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %tmp.0
-	%tmp.1 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 2, i32* %tmp.1
-	%tmp.3 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 %X		; <i32*> [#uses=1]
-	%tmp.4 = load i32, i32* %tmp.3		; <i32> [#uses=1]
-	ret i32 %tmp.4
-}
-
--- a/test/Transforms/ScalarRepl/union-fp-int.ll
+++ b/test/Transforms/ScalarRepl/union-fp-int.ll
@ -1,14 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   grep "bitcast.*float.*i32"
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @test(float %X) {
-	%X_addr = alloca float		; <float*> [#uses=2]
-	store float %X, float* %X_addr
-	%X_addr.upgrd.1 = bitcast float* %X_addr to i32*		; <i32*> [#uses=1]
-	%tmp = load i32, i32* %X_addr.upgrd.1		; <i32> [#uses=1]
-	ret i32 %tmp
-}
-
--- a/test/Transforms/ScalarRepl/union-packed.ll
+++ b/test/Transforms/ScalarRepl/union-packed.ll
@ -1,14 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   grep bitcast
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define <4 x i32> @test(<4 x float> %X) {
-	%X_addr = alloca <4 x float>		; <<4 x float>*> [#uses=2]
-	store <4 x float> %X, <4 x float>* %X_addr
-	%X_addr.upgrd.1 = bitcast <4 x float>* %X_addr to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%tmp = load <4 x i32>, <4 x i32>* %X_addr.upgrd.1		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %tmp
-}
-
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@ -1,74 +0,0 @@
-; PR892
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-
-target datalayout = "e-p:32:32-p1:16:16-n8:16:32"
-target triple = "i686-apple-darwin8.7.2"
-
-%struct.Val = type { i32*, i32 }
-
-define i8* @test(i16* %X) {
-; CHECK-LABEL: @test(
-; CHECK-NOT: alloca
-; CHECK: ret i8*
-	%X_addr = alloca i16*		; <i16**> [#uses=2]
-	store i16* %X, i16** %X_addr
-	%X_addr.upgrd.1 = bitcast i16** %X_addr to i8**		; <i8**> [#uses=1]
-	%tmp = load i8*, i8** %X_addr.upgrd.1		; <i8*> [#uses=1]
-	ret i8* %tmp
-}
-
-define i8 addrspace(1)* @test_as1(i16 addrspace(1)* %x) {
-; CHECK-LABEL: @test_as1(
-; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
-; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
-; CHECK-NEXT: ret i8 addrspace(1)* %2
-    %x_addr = alloca i16 addrspace(1)*
-	store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr
-	%x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)**
-	%tmp = load i8 addrspace(1)*, i8 addrspace(1)** %x_addr.upgrd.1
-	ret i8 addrspace(1)* %tmp
-}
-
-define i8 addrspace(1)* @test_as1_array(i16 addrspace(1)* %x) {
-; CHECK-LABEL: @test_as1_array(
-; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
-; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
-; CHECK-NEXT: ret i8 addrspace(1)* %2
-  %as_ptr_array = alloca [4 x i16 addrspace(1)*]
-  %elem1 = getelementptr [4 x i16 addrspace(1)*], [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1
-  store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1
-  %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)**
-  %tmp = load i8 addrspace(1)*, i8 addrspace(1)** %elem1.cast
-  ret i8 addrspace(1)* %tmp
-}
-
-
-define void @test2(i64 %Op.0) {
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: ret void
-
-	%tmp = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
-	%tmp1 = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
-	%tmp.upgrd.2 = call i64 @_Z3foov( )		; <i64> [#uses=1]
-	%tmp1.upgrd.3 = bitcast %struct.Val* %tmp1 to i64*		; <i64*> [#uses=1]
-	store i64 %tmp.upgrd.2, i64* %tmp1.upgrd.3
-	%tmp.upgrd.4 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 0		; <i32**> [#uses=1]
-	%tmp2 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 0		; <i32**> [#uses=1]
-	%tmp.upgrd.5 = load i32*, i32** %tmp2		; <i32*> [#uses=1]
-	store i32* %tmp.upgrd.5, i32** %tmp.upgrd.4
-	%tmp3 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp4 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp.upgrd.6 = load i32, i32* %tmp4		; <i32> [#uses=1]
-	store i32 %tmp.upgrd.6, i32* %tmp3
-	%tmp7 = bitcast %struct.Val* %tmp to { i64 }*		; <{ i64 }*> [#uses=1]
-	%tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i32 0, i32 0		; <i64*> [#uses=1]
-	%tmp9 = load i64, i64* %tmp8		; <i64> [#uses=1]
-	call void @_Z3bar3ValS_( i64 %Op.0, i64 %tmp9 )
-	ret void
-}
-
-declare i64 @_Z3foov()
-
-declare void @_Z3bar3ValS_(i64, i64)
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/test/Transforms/ScalarRepl/vector_memcpy.ll
@ -1,28 +0,0 @@
-; RUN: opt < %s -scalarrepl -S > %t
-; RUN: grep "ret <16 x float> %A" %t
-; RUN: grep "ret <16 x float> zeroinitializer" %t
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define <16 x float> @foo(<16 x float> %A) nounwind {
-	%tmp = alloca <16 x float>, align 16
-	%tmp2 = alloca <16 x float>, align 16
-	store <16 x float> %A, <16 x float>* %tmp
-	%s = bitcast <16 x float>* %tmp to i8*
-	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false)
-	%R = load <16 x float>, <16 x float>* %tmp2
-	ret <16 x float> %R
-}
-
-define <16 x float> @foo2(<16 x float> %A) nounwind {
-	%tmp2 = alloca <16 x float>, align 16
-
-	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false)
-	
-	%R = load <16 x float>, <16 x float>* %tmp2
-	ret <16 x float> %R
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@ -1,137 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-define void @test1(<4 x float>* %F, float %f) {
-entry:
-	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
-	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp3, <4 x float>* %G
-	%G.upgrd.1 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
-	store float %f, float* %G.upgrd.1
-	%tmp4 = load <4 x float>, <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp6, <4 x float>* %F
-	ret void
-; CHECK-LABEL: @test1(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0
-}
-
-define void @test2(<4 x float>* %F, float %f) {
-entry:
-	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
-	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp3, <4 x float>* %G
-	%tmp.upgrd.2 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
-	store float %f, float* %tmp.upgrd.2
-	%tmp4 = load <4 x float>, <4 x float>* %G		; <<4 x float>> [#uses=2]
-	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp6, <4 x float>* %F
-	ret void
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2
-}
-
-define void @test3(<4 x float>* %F, float* %f) {
-entry:
-	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
-	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp3, <4 x float>* %G
-	%tmp.upgrd.3 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
-	%tmp.upgrd.4 = load float, float* %tmp.upgrd.3		; <float> [#uses=1]
-	store float %tmp.upgrd.4, float* %f
-	ret void
-; CHECK-LABEL: @test3(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2
-}
-
-define void @test4(<4 x float>* %F, float* %f) {
-entry:
-	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
-	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
-	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp3, <4 x float>* %G
-	%G.upgrd.5 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp.upgrd.6 = load float, float* %G.upgrd.5		; <float> [#uses=1]
-	store float %tmp.upgrd.6, float* %f
-	ret void
-; CHECK-LABEL: @test4(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0
-}
-
-define i32 @test5(float %X) {  ;; should turn into bitcast.
-	%X_addr = alloca [4 x float]
-        %X1 = getelementptr [4 x float], [4 x float]* %X_addr, i32 0, i32 2
-	store float %X, float* %X1
-	%a = bitcast float* %X1 to i32*
-	%tmp = load i32, i32* %a
-	ret i32 %tmp
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: bitcast float %X to i32
-; CHECK-NEXT: ret i32
-}
-
-define i64 @test6(<2 x float> %X) {
-	%X_addr = alloca <2 x float>
-        store <2 x float> %X, <2 x float>* %X_addr
-	%P = bitcast <2 x float>* %X_addr to i64*
-	%tmp = load i64, i64* %P
-	ret i64 %tmp
-; CHECK-LABEL: @test6(
-; CHECK: bitcast <2 x float> %X to i64
-; CHECK: ret i64
-}
-
-%struct.test7 = type { [6 x i32] }
-
-define void @test7() {
-entry:
-  %memtmp = alloca %struct.test7, align 16
-  %0 = bitcast %struct.test7* %memtmp to <4 x i32>*
-  store <4 x i32> zeroinitializer, <4 x i32>* %0, align 16
-  %1 = getelementptr inbounds %struct.test7, %struct.test7* %memtmp, i64 0, i32 0, i64 5
-  store i32 0, i32* %1, align 4
-  ret void
-; CHECK-LABEL: @test7(
-; CHECK-NOT: alloca
-; CHECK: and i192
-}
-
-; When promoting an alloca to a 1-element vector type, instructions that
-; produce that same vector type should not be changed to insert one element
-; into a new vector. <rdar://problem/14249078>
-define <1 x i64> @test8(<1 x i64> %a) {
-entry:
-  %a.addr = alloca <1 x i64>, align 8
-  %__a = alloca <1 x i64>, align 8
-  %tmp = alloca <1 x i64>, align 8
-  store <1 x i64> %a, <1 x i64>* %a.addr, align 8
-  %0 = load <1 x i64>, <1 x i64>* %a.addr, align 8
-  store <1 x i64> %0, <1 x i64>* %__a, align 8
-  %1 = load <1 x i64>, <1 x i64>* %__a, align 8
-  %2 = bitcast <1 x i64> %1 to <8 x i8>
-  %3 = bitcast <8 x i8> %2 to <1 x i64>
-  %vshl_n = shl <1 x i64> %3, <i64 4>
-  store <1 x i64> %vshl_n, <1 x i64>* %tmp
-  %4 = load <1 x i64>, <1 x i64>* %tmp
-  ret <1 x i64> %4
-; CHECK-LABEL: @test8(
-; CHECK-NOT: alloca
-; CHECK-NOT: insertelement
-; CHECK: ret <1 x i64>
-}
--- a/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
+++ b/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
@ -1,27 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-; rdar://9786827
-
-; SROA should be able to handle the mixed types and eliminate the allocas here.
-
-; TODO: Currently it does this by falling back to integer "bags of bits".
-; With enough cleverness, it should be possible to convert between <3 x i32>
-; and <2 x i64> by using a combination of a bitcast and a shuffle.
-
-; CHECK: {
-; CHECK-NOT: alloca
-; CHECK: }
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin11.0.0"
-
-define <2 x i64> @foo() nounwind {
-entry:
-  %retval = alloca <3 x i32>, align 16
-  %z = alloca <4 x i32>, align 16
-  %tmp = load <4 x i32>, <4 x i32>* %z
-  %tmp1 = shufflevector <4 x i32> %tmp, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-  store <3 x i32> %tmp1, <3 x i32>* %retval
-  %0 = bitcast <3 x i32>* %retval to <2 x i64>*
-  %1 = load <2 x i64>, <2 x i64>* %0, align 1
-  ret <2 x i64> %1
-}
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@ -1,13 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-define i32 @voltest(i32 %T) {
-	%A = alloca {i32, i32}
-	%B = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 0
-	store volatile i32 %T, i32* %B
-; CHECK: store volatile
-
-	%C = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 1
-	%X = load volatile i32, i32* %C
-; CHECK: load volatile
-	ret i32 %X
-}
--- a/utils/findoptdiff
+++ b/utils/findoptdiff
@ -70,7 +70,7 @@ dis2="$llvm2/Debug/bin/llvm-dis"
 opt1="$llvm1/Debug/bin/opt"
 opt2="$llvm2/Debug/bin/opt"

-all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -scalarrepl -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -scalarrepl -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify"
+all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -sroa -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -sroa -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify"

 #counter=0
 function tryit {