commit e6b9b382e2028496706d6a2a1a4394274503ab0c Author: Chris Lattner Date: Wed Jun 6 20:29:01 2001 +0000 Initial revision llvm-svn: 2 diff --git a/Makefile b/Makefile new file mode 100644 index 00000000000..6698a545eb7 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +LEVEL = . +DIRS = lib tools + +include $(LEVEL)/Makefile.common + +test :: all + cd test; $(MAKE) diff --git a/Makefile.common b/Makefile.common new file mode 100644 index 00000000000..b28b4997f61 --- /dev/null +++ b/Makefile.common @@ -0,0 +1,170 @@ +# Makefile.common +# +# This file is included by all of the LLVM makefiles. This file defines common +# rules to do things like compile a .cpp file or generate dependancy info. +# These are platform dependant, so this is the file used to specify these +# system dependant operations. +# +# The following functionality may be set by setting incoming variables: +# +# 1. LEVEL - The level of the current subdirectory from the top of the +# MagicStats view. This level should be expressed as a path, for +# example, ../.. for two levels deep. +# +# 2. DIRS - A list of subdirectories to be built. Fake targets are set up +# so that each of the targets "all", "install", and "clean" each build. +# the subdirectories before the local target. +# +# 3. Source - If specified, this sets the source code filenames. If this +# is not set, it defaults to be all of the .cpp, .c, .y, and .l files +# in the current directory. +# + +# Default Rule: +all :: + +# Default for install is to at least build everything... +install :: + +#-------------------------------------------------------------------- +# Installation configuration options... +#-------------------------------------------------------------------- + +#BinInstDir=/usr/local/bin +#LibInstDir=/usrl/local/lib/xxx +#DocInstDir=/usr/doc/xxx + +#--------------------------------------------------------- +# Compilation options... +#--------------------------------------------------------- + +# Add -L options to the link command lines... +LibPathsO = -L $(LEVEL)/lib/VMCore/Release \ + -L $(LEVEL)/lib/Assembly/Parser/Release \ + -L $(LEVEL)/lib/Assembly/Writer/Release \ + -L $(LEVEL)/lib/Analysis/Release \ + -L $(LEVEL)/lib/Bytecode/Writer/Release \ + -L $(LEVEL)/lib/Bytecode/Reader/Release \ + -L $(LEVEL)/lib/Optimizations/Release + +LibPathsG = $(LibPathsO:Release=Debug) + +# Enable this for profiling support with 'gprof' +#Prof = -pg + +# TODO: Get rid of exceptions! : -fno-exceptions -fno-rtti +CompileCommonOpts = $(Prof) -Wall -Winline -W -Wwrite-strings -Wno-unused -I$(LEVEL)/include + +# Compile a file, don't link... +Compile = $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CompileCommonOpts) +CompileG = $(Compile) -g -D_DEBUG +# Add This for DebugMalloc: -fno-defer-pop +CompileO = $(Compile) -O3 -DNDEBUG -finline-functions -felide-constructors -fnonnull-objects -freg-struct-return -fshort-enums + +# Link final executable +Link = $(CXX) $(Prof) +LinkG = $(Link) -g $(LibPathsG) +LinkO = $(Link) -O3 $(LibPathsO) + +# Create a .so file from a .cpp file... +#MakeSO = $(CXX) -shared $(Prof) +MakeSO = $(CXX) -G $(Prof) +MakeSOG = $(MakeSO) -g +MakeSOO = $(MakeSO) -O3 + +# Create dependancy file from CPP file, send to stdout. +Depend = $(CXX) -MM -I$(LEVEL)/include $(CPPFLAGS) + +# Archive a bunch of .o files into a .a file... +AR = ar cq + +#---------------------------------------------------------- + +# Source includes all of the cpp files, and objects are derived from the +# source files... +ifndef Source +Source = $(wildcard *.cpp *.c *.y *.l) +endif +Objs = $(sort $(addsuffix .o,$(basename $(Source)))) +ObjectsO = $(addprefix Release/,$(Objs)) +ObjectsG = $(addprefix Debug/,$(Objs)) + +#--------------------------------------------------------- +# Handle the DIRS option +#--------------------------------------------------------- + +ifdef DIRS # Only do this if we're using DIRS! + +all :: $(addsuffix /.makeall , $(DIRS)) +install :: $(addsuffix /.makeinstall, $(DIRS)) +clean :: $(addsuffix /.makeclean , $(DIRS)) + +%/.makeall %/.makeclean %/.makeinstall: + cd $(@D); $(MAKE) $(subst $(@D)/.make,,$@) +endif + +#--------------------------------------------------------- +# Handle the LIBRARYNAME option - used when building libs... +#--------------------------------------------------------- + +ifdef LIBRARYNAME +LIBNAME_O := Release/lib$(LIBRARYNAME).so +LIBNAME_G := Debug/lib$(LIBRARYNAME).so + +all:: $(LIBNAME_G) +#$(LIBNAME_O) +# TODO: Enable optimized builds + +$(LIBNAME_O): $(ObjectsO) $(LibSubDirs) Release/.dir + @echo ======= Linking $(LIBRARYNAME) release library ======= + $(MakeSOO) -o $@ $(ObjectsO) $(LibSubDirs) $(LibLinkOpts) + +$(LIBNAME_G): $(ObjectsG) $(LibSubDirs) Debug/.dir + @echo ======= Linking $(LIBRARYNAME) debug library ======= + $(MakeSOG) -o $@ $(ObjectsG) $(LibSubDirs) $(LibLinkOpts) + +endif + + +#--------------------------------------------------------- + +# Create dependacies for the cpp files... +Depend/%.d: %.cpp Depend/.dir + $(Depend) $< | sed 's|$*\.o *|Release/& Debug/& Depend/$(@F)|g' > $@ + +# Create .o files in the ObjectFiles directory from the .cpp files... +Release/%.o: %.cpp Release/.dir Depend/.dir + $(CompileO) $< -o $@ + +Debug/%.o: %.cpp Debug/.dir Depend/.dir + $(CompileG) $< -o $@ + +# Create a .cpp source file from a flex input file... this uses sed to cut down +# on the warnings emited by GCC... +%.cpp: %.l + flex -t $< | sed '/^find_rule/d' | sed 's/void yyunput/inline void yyunput/' | sed 's/void \*yy_flex_realloc/inline void *yy_flex_realloc/' > $@ + +# Rule for building the bison parsers... + +%.cpp %.h : %.y + bison -d -p $(<:%Parser.y=%) $(basename $@).y + mv -f $(basename $@).tab.c $(basename $@).cpp + mv -f $(basename $@).tab.h $(basename $@).h + +# To create the directories... +%/.dir: + mkdir -p $(@D) + @date > $@ + +# Clean does not remove the output files... just the temporaries +clean:: + rm -rf Debug Release Depend + rm -f core *.o *.d *.so *~ *.flc + +# If dependancies were generated for the file that included this file, +# include the dependancies now... +# +SourceDepend = $(addsuffix .d,$(addprefix Depend/,$(basename $(Source)))) +ifneq ($(SourceDepend),) +include $(SourceDepend) +endif diff --git a/Makefile.rules b/Makefile.rules new file mode 100644 index 00000000000..b28b4997f61 --- /dev/null +++ b/Makefile.rules @@ -0,0 +1,170 @@ +# Makefile.common +# +# This file is included by all of the LLVM makefiles. This file defines common +# rules to do things like compile a .cpp file or generate dependancy info. +# These are platform dependant, so this is the file used to specify these +# system dependant operations. +# +# The following functionality may be set by setting incoming variables: +# +# 1. LEVEL - The level of the current subdirectory from the top of the +# MagicStats view. This level should be expressed as a path, for +# example, ../.. for two levels deep. +# +# 2. DIRS - A list of subdirectories to be built. Fake targets are set up +# so that each of the targets "all", "install", and "clean" each build. +# the subdirectories before the local target. +# +# 3. Source - If specified, this sets the source code filenames. If this +# is not set, it defaults to be all of the .cpp, .c, .y, and .l files +# in the current directory. +# + +# Default Rule: +all :: + +# Default for install is to at least build everything... +install :: + +#-------------------------------------------------------------------- +# Installation configuration options... +#-------------------------------------------------------------------- + +#BinInstDir=/usr/local/bin +#LibInstDir=/usrl/local/lib/xxx +#DocInstDir=/usr/doc/xxx + +#--------------------------------------------------------- +# Compilation options... +#--------------------------------------------------------- + +# Add -L options to the link command lines... +LibPathsO = -L $(LEVEL)/lib/VMCore/Release \ + -L $(LEVEL)/lib/Assembly/Parser/Release \ + -L $(LEVEL)/lib/Assembly/Writer/Release \ + -L $(LEVEL)/lib/Analysis/Release \ + -L $(LEVEL)/lib/Bytecode/Writer/Release \ + -L $(LEVEL)/lib/Bytecode/Reader/Release \ + -L $(LEVEL)/lib/Optimizations/Release + +LibPathsG = $(LibPathsO:Release=Debug) + +# Enable this for profiling support with 'gprof' +#Prof = -pg + +# TODO: Get rid of exceptions! : -fno-exceptions -fno-rtti +CompileCommonOpts = $(Prof) -Wall -Winline -W -Wwrite-strings -Wno-unused -I$(LEVEL)/include + +# Compile a file, don't link... +Compile = $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CompileCommonOpts) +CompileG = $(Compile) -g -D_DEBUG +# Add This for DebugMalloc: -fno-defer-pop +CompileO = $(Compile) -O3 -DNDEBUG -finline-functions -felide-constructors -fnonnull-objects -freg-struct-return -fshort-enums + +# Link final executable +Link = $(CXX) $(Prof) +LinkG = $(Link) -g $(LibPathsG) +LinkO = $(Link) -O3 $(LibPathsO) + +# Create a .so file from a .cpp file... +#MakeSO = $(CXX) -shared $(Prof) +MakeSO = $(CXX) -G $(Prof) +MakeSOG = $(MakeSO) -g +MakeSOO = $(MakeSO) -O3 + +# Create dependancy file from CPP file, send to stdout. +Depend = $(CXX) -MM -I$(LEVEL)/include $(CPPFLAGS) + +# Archive a bunch of .o files into a .a file... +AR = ar cq + +#---------------------------------------------------------- + +# Source includes all of the cpp files, and objects are derived from the +# source files... +ifndef Source +Source = $(wildcard *.cpp *.c *.y *.l) +endif +Objs = $(sort $(addsuffix .o,$(basename $(Source)))) +ObjectsO = $(addprefix Release/,$(Objs)) +ObjectsG = $(addprefix Debug/,$(Objs)) + +#--------------------------------------------------------- +# Handle the DIRS option +#--------------------------------------------------------- + +ifdef DIRS # Only do this if we're using DIRS! + +all :: $(addsuffix /.makeall , $(DIRS)) +install :: $(addsuffix /.makeinstall, $(DIRS)) +clean :: $(addsuffix /.makeclean , $(DIRS)) + +%/.makeall %/.makeclean %/.makeinstall: + cd $(@D); $(MAKE) $(subst $(@D)/.make,,$@) +endif + +#--------------------------------------------------------- +# Handle the LIBRARYNAME option - used when building libs... +#--------------------------------------------------------- + +ifdef LIBRARYNAME +LIBNAME_O := Release/lib$(LIBRARYNAME).so +LIBNAME_G := Debug/lib$(LIBRARYNAME).so + +all:: $(LIBNAME_G) +#$(LIBNAME_O) +# TODO: Enable optimized builds + +$(LIBNAME_O): $(ObjectsO) $(LibSubDirs) Release/.dir + @echo ======= Linking $(LIBRARYNAME) release library ======= + $(MakeSOO) -o $@ $(ObjectsO) $(LibSubDirs) $(LibLinkOpts) + +$(LIBNAME_G): $(ObjectsG) $(LibSubDirs) Debug/.dir + @echo ======= Linking $(LIBRARYNAME) debug library ======= + $(MakeSOG) -o $@ $(ObjectsG) $(LibSubDirs) $(LibLinkOpts) + +endif + + +#--------------------------------------------------------- + +# Create dependacies for the cpp files... +Depend/%.d: %.cpp Depend/.dir + $(Depend) $< | sed 's|$*\.o *|Release/& Debug/& Depend/$(@F)|g' > $@ + +# Create .o files in the ObjectFiles directory from the .cpp files... +Release/%.o: %.cpp Release/.dir Depend/.dir + $(CompileO) $< -o $@ + +Debug/%.o: %.cpp Debug/.dir Depend/.dir + $(CompileG) $< -o $@ + +# Create a .cpp source file from a flex input file... this uses sed to cut down +# on the warnings emited by GCC... +%.cpp: %.l + flex -t $< | sed '/^find_rule/d' | sed 's/void yyunput/inline void yyunput/' | sed 's/void \*yy_flex_realloc/inline void *yy_flex_realloc/' > $@ + +# Rule for building the bison parsers... + +%.cpp %.h : %.y + bison -d -p $(<:%Parser.y=%) $(basename $@).y + mv -f $(basename $@).tab.c $(basename $@).cpp + mv -f $(basename $@).tab.h $(basename $@).h + +# To create the directories... +%/.dir: + mkdir -p $(@D) + @date > $@ + +# Clean does not remove the output files... just the temporaries +clean:: + rm -rf Debug Release Depend + rm -f core *.o *.d *.so *~ *.flc + +# If dependancies were generated for the file that included this file, +# include the dependancies now... +# +SourceDepend = $(addsuffix .d,$(addprefix Depend/,$(basename $(Source)))) +ifneq ($(SourceDepend),) +include $(SourceDepend) +endif diff --git a/docs/ChrisNotes.txt b/docs/ChrisNotes.txt new file mode 100644 index 00000000000..f0ea5c6f06c --- /dev/null +++ b/docs/ChrisNotes.txt @@ -0,0 +1,50 @@ +* Provide a pass that eliminates critical edges from the CFG +* Provide a print hook to print out xvcg format files for vis +* I need to provide an option to the bytecode loader to ignore memory + dependance edges. Instead, the VM would just treat memory operations + (load, store, getfield, putfield, call) as pinned instructions. +* I need to have a way to prevent taking the address of a constant pool + reference. You should only be able to take the address of a variable. + Maybe taking the address of a constant copies it? What about virtual + function tables? Maybe a const pointer would be better... +* Structures should be accessed something like this: ubyte is ok. Limits + structure size to 256 members. This can be fixed later by either: + 1. adding varient that takes ushort + 2. Splitting structures into nested structures each of half size + %f = loadfield *{int, {float}} Str, ubyte 1, ubyte 0 + storefield float %f, *{int, {float}} Str, ubyte 1, ubyte 0 +* I'm noticing me writing a lot of code that looks like this (dtor material here): + ConstPool.dropAllReferences(); + ConstPool.delete_all(); + ConstPool.setParent(0); + ~ConstPool + +* Need a way to attach bytecode block info at various levels of asm code. +* Rename "ConstantPool" to "ConstPool" +* Maybe ConstantPool objects should keep themselves sorted as things are + inserted. +* Need to be able to inflate recursive types. %x = { *%x }, %x = %x () +* Recognize and save comments in assembly and bytecode format +* Encode line number table in bytecode (like #line), optional table + +* Encode negative relative offsets in the bytecode file + +* Implement switch to switch on a constant pool array of type: + [{ label, int }] or [label] (lookup vs index switch) +* Apparently bison has a %pure_parser option. Maybe useful for Assembly/Parser + +* Implement a header file that can read either assembly or bytecode, implement + a writer that can output either based on what is read with this reader.. +* Implement the following derived types: + * structure/record { int %foo, int %bar} or { %foo = int, int } + * pointer int * + * "packed format", like this: [4 x sbyte]: Packed SIMD datatype +* Maybe 'tailcall' also? +* It might be nice to support enumerations of some sort... especially for use + as a compiler IR +* Include a method level bytecode block that defines a mapping between values + and registers that defines a minimally register allocated code. This can + make me finally address how to encode extensions in assembly. +* Bytecode reader should use extensions that may or may not be linked into the + application to read blocks. Thus an easy way to ignore symbol table info + would be to not link in that reader into the app. diff --git a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt new file mode 100644 index 00000000000..f0861811920 --- /dev/null +++ b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt @@ -0,0 +1,74 @@ +Date: Sat, 18 Nov 2000 09:19:35 -0600 (CST) +From: Vikram Adve +To: Chris Lattner +Subject: a few thoughts + +I've been mulling over the virtual machine problem and I had some +thoughts about some things for us to think about discuss: + +1. We need to be clear on our goals for the VM. Do we want to emphasize + portability and safety like the Java VM? Or shall we focus on the + architecture interface first (i.e., consider the code generation and + processor issues), since the architecture interface question is also + important for portable Java-type VMs? + + This is important because the audiences for these two goals are very + different. Architects and many compiler people care much more about + the second question. The Java compiler and OS community care much more + about the first one. + + Also, while the architecture interface question is important for + Java-type VMs, the design constraints are very different. + + +2. Design issues to consider (an initial list that we should continue + to modify). Note that I'm not trying to suggest actual solutions here, + but just various directions we can pursue: + + a. A single-assignment VM, which we've both already been thinking about. + + b. A strongly-typed VM. One question is do we need the types to be + explicitly declared or should they be inferred by the dynamic compiler? + + c. How do we get more high-level information into the VM while keeping + to a low-level VM design? + + o Explicit array references as operands? An alternative is + to have just an array type, and let the index computations be + separate 3-operand instructions. + + o Explicit instructions to handle aliasing, e.g.s: + -- an instruction to say "I speculate that these two values are not + aliased, but check at runtime", like speculative execution in + EPIC? + -- or an instruction to check whether two values are aliased and + execute different code depending on the answer, somewhat like + predicated code in EPIC + + o (This one is a difficult but powerful idea.) + A "thread-id" field on every instruction that allows the static + compiler to generate a set of parallel threads, and then have + the runtime compiler and hardware do what they please with it. + This has very powerful uses, but thread-id on every instruction + is expensive in terms of instruction size and code size. + We would need to compactly encode it somehow. + + Also, this will require some reading on at least two other + projects: + -- Multiscalar architecture from Wisconsin + -- Simultaneous multithreading architecture from Washington + + o Or forget all this and stick to a traditional instruction set? + + +BTW, on an unrelated note, after the meeting yesterday, I did remember +that you had suggested doing instruction scheduling on SSA form instead +of a dependence DAG earlier in the semester. When we talked about +it yesterday, I didn't remember where the idea had come from but I +remembered later. Just giving credit where its due... + +Perhaps you can save the above as a file under RCS so you and I can +continue to expand on this. + +--Vikram + diff --git a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt new file mode 100644 index 00000000000..1c725f5aa71 --- /dev/null +++ b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt @@ -0,0 +1,199 @@ +Date: Sun, 19 Nov 2000 16:23:57 -0600 (CST) +From: Chris Lattner +To: Vikram Adve +Subject: Re: a few thoughts + +Okay... here are a few of my thoughts on this (it's good to know that we +think so alike!): + +> 1. We need to be clear on our goals for the VM. Do we want to emphasize +> portability and safety like the Java VM? Or shall we focus on the +> architecture interface first (i.e., consider the code generation and +> processor issues), since the architecture interface question is also +> important for portable Java-type VMs? + +I forsee the architecture looking kinda like this: (which is completely +subject to change) + +1. The VM code is NOT guaranteed safe in a java sense. Doing so makes it + basically impossible to support C like languages. Besides that, + certifying a register based language as safe at run time would be a + pretty expensive operation to have to do. Additionally, we would like + to be able to statically eliminate many bounds checks in Java + programs... for example. + + 2. Instead, we can do the following (eventually): + * Java bytecode is used as our "safe" representation (to avoid + reinventing something that we don't add much value to). When the + user chooses to execute Java bytecodes directly (ie, not + precompiled) the runtime compiler can do some very simple + transformations (JIT style) to convert it into valid input for our + VM. Performance is not wonderful, but it works right. + * The file is scheduled to be compiled (rigorously) at a later + time. This could be done by some background process or by a second + processor in the system during idle time or something... + * To keep things "safe" ie to enforce a sandbox on Java/foreign code, + we could sign the generated VM code with a host specific private + key. Then before the code is executed/loaded, we can check to see if + the trusted compiler generated the code. This would be much quicker + than having to validate consistency (especially if bounds checks have + been removed, for example) + +> This is important because the audiences for these two goals are very +> different. Architects and many compiler people care much more about +> the second question. The Java compiler and OS community care much more +> about the first one. + +3. By focusing on a more low level virtual machine, we have much more room + for value add. The nice safe "sandbox" VM can be provided as a layer + on top of it. It also lets us focus on the more interesting compilers + related projects. + +> 2. Design issues to consider (an initial list that we should continue +> to modify). Note that I'm not trying to suggest actual solutions here, +> but just various directions we can pursue: + +Understood. :) + +> a. A single-assignment VM, which we've both already been thinking +> about. + +Yup, I think that this makes a lot of sense. I am still intrigued, +however, by the prospect of a minimally allocated VM representation... I +think that it could have definate advantages for certain applications +(think very small machines, like PDAs). I don't, however, think that our +initial implementations should focus on this. :) + +Here are some other auxilliary goals that I think we should consider: + +1. Primary goal: Support a high performance dynamic compilation + system. This means that we have an "ideal" division of labor between + the runtime and static compilers. Of course, the other goals of the + system somewhat reduce the importance of this point (f.e. portability + reduces performance, but hopefully not much) +2. Portability to different processors. Since we are most familiar with + x86 and solaris, I think that these two are excellent candidates when + we get that far... +3. Support for all languages & styles of programming (general purpose + VM). This is the point that disallows java style bytecodes, where all + array refs are checked for bounds, etc... +4. Support linking between different language families. For example, call + C functions directly from Java without using the nasty/slow/gross JNI + layer. This involves several subpoints: + A. Support for languages that require garbage collectors and integration + with languages that don't. As a base point, we could insist on + always using a conservative GC, but implement free as a noop, f.e. + +> b. A strongly-typed VM. One question is do we need the types to be +> explicitly declared or should they be inferred by the dynamic +> compiler? + + B. This is kind of similar to another idea that I have: make OOP + constructs (virtual function tables, class heirarchies, etc) explicit + in the VM representation. I believe that the number of additional + constructs would be fairly low, but would give us lots of important + information... something else that would/could be important is to + have exceptions as first class types so that they would be handled in + a uniform way for the entire VM... so that C functions can call Java + functions for example... + +> c. How do we get more high-level information into the VM while keeping +> to a low-level VM design? +> o Explicit array references as operands? An alternative is +> to have just an array type, and let the index computations be +> separate 3-operand instructions. + + C. In the model I was thinking of (subject to change of course), we + would just have an array type (distinct from the pointer + types). This would allow us to have arbitrarily complex index + expressions, while still distinguishing "load" from "Array load", + for example. Perhaps also, switch jump tables would be first class + types as well? This would allow better reasoning about the program. + +5. Support dynamic loading of code from various sources. Already + mentioned above was the example of loading java bytecodes, but we want + to support dynamic loading of VM code as well. This makes the job of + the runtime compiler much more interesting: it can do interprocedural + optimizations that the static compiler can't do, because it doesn't + have all of the required information (for example, inlining from + shared libraries, etc...) + +6. Define a set of generally useful annotations to add to the VM + representation. For example, a function can be analysed to see if it + has any sideeffects when run... also, the MOD/REF sets could be + calculated, etc... we would have to determine what is reasonable. This + would generally be used to make IP optimizations cheaper for the + runtime compiler... + +> o Explicit instructions to handle aliasing, e.g.s: +> -- an instruction to say "I speculate that these two values are not +> aliased, but check at runtime", like speculative execution in +> EPIC? +> -- or an instruction to check whether two values are aliased and +> execute different code depending on the answer, somewhat like +> predicated code in EPIC + +These are also very good points... if this can be determined at compile +time. I think that an epic style of representation (not the instruction +packing, just the information presented) could be a very interesting model +to use... more later... + +> o (This one is a difficult but powerful idea.) +> A "thread-id" field on every instruction that allows the static +> compiler to generate a set of parallel threads, and then have +> the runtime compiler and hardware do what they please with it. +> This has very powerful uses, but thread-id on every instruction +> is expensive in terms of instruction size and code size. +> We would need to compactly encode it somehow. + +Yes yes yes! :) I think it would be *VERY* useful to include this kind +of information (which EPIC architectures *implicitly* encode. The trend +that we are seeing supports this greatly: + +1. Commodity processors are getting massive SIMD support: + * Intel/Amd MMX/MMX2 + * AMD's 3Dnow! + * Intel's SSE/SSE2 + * Sun's VIS +2. SMP is becoming much more common, especially in the server space. +3. Multiple processors on a die are right around the corner. + +If nothing else, not designing this in would severely limit our future +expansion of the project... + +> Also, this will require some reading on at least two other +> projects: +> -- Multiscalar architecture from Wisconsin +> -- Simultaneous multithreading architecture from Washington +> +> o Or forget all this and stick to a traditional instruction set? + +Heh... :) Well, from a pure research point of view, it is almost more +attactive to go with the most extreme/different ISA possible. On one axis +you get safety and conservatism, and on the other you get degree of +influence that the results have. Of course the problem with pure research +is that often times there is no concrete product of the research... :) + +> BTW, on an unrelated note, after the meeting yesterday, I did remember +> that you had suggested doing instruction scheduling on SSA form instead +> of a dependence DAG earlier in the semester. When we talked about +> it yesterday, I didn't remember where the idea had come from but I +> remembered later. Just giving credit where its due... + +:) Thanks. + +> Perhaps you can save the above as a file under RCS so you and I can +> continue to expand on this. + +I think it makes sense to do so when we get our ideas more formalized and +bounce it back and forth a couple of times... then I'll do a more formal +writeup of our goals and ideas. Obviously our first implementation will +not want to do all of the stuff that I pointed out above... be we will +want to design the project so that we do not artificially limit ourselves +at sometime in the future... + +Anyways, let me know what you think about these ideas... and if they sound +reasonable... + +-Chris + diff --git a/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt b/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt new file mode 100644 index 00000000000..8c452924dd1 --- /dev/null +++ b/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt @@ -0,0 +1,30 @@ +From: Chris Lattner [mailto:sabre@nondot.org] +Sent: Wednesday, December 06, 2000 6:41 PM +To: Vikram S. Adve +Subject: Additional idea with respect to encoding + +Here's another idea with respect to keeping the common case instruction +size down (less than 32 bits ideally): + +Instead of encoding an instruction to operate on two register numbers, +have it operate on two negative offsets based on the current register +number. Therefore, instead of using: + +r57 = add r55, r56 (r57 is the implicit dest register, of course) + +We could use: + +r57 = add -2, -1 + +My guess is that most SSA references are to recent values (especially if +they correspond to expressions like (x+y*z+p*q/ ...), so the negative +numbers would tend to stay small, even at the end of the procedure (where +the implicit register destination number could be quite large). Of course +the negative sign is reduntant, so you would be storing small integers +almost all of the time, and 5-6 bits worth of register number would be +plenty for most cases... + +What do you think? + +-Chris + diff --git a/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt new file mode 100644 index 00000000000..b66e18556f5 --- /dev/null +++ b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt @@ -0,0 +1,83 @@ +SUMMARY +------- + +We met to discuss the LLVM instruction format and bytecode representation: + +ISSUES RESOLVED +--------------- + +1. We decided that we shall use a flat namespace to represent our + variables in SSA form, as opposed to having a two dimensional namespace + of the original variable and the SSA instance subscript. + +ARGUMENT AGAINST: + * A two dimensional namespace would be valuable when doing alias + analysis because the extra information can help limit the scope of + analysis. + +ARGUMENT FOR: + * Including this information would require that all users of the LLVM + bytecode would have to parse and handle it. This would slow down the + common case and inflate the instruction representation with another + infinite variable space. + +REASONING: + * It was decided that because original variable sources could be + reconstructed from SSA form in linear time, that it would be an + unjustified expense for the common case to include the extra + information for one optimization. Alias analysis itself is typically + greater than linear in asymptotic complexity, so this extra analaysis + would not affect the runtime of the optimization in a significant + way. Additionally, this would be an unlikely optimization to do at + runtime. + + +IDEAS TO CONSIDER +----------------- + +1. Including dominator information in the LLVM bytecode + representation. This is one example of an analysis result that may be + packaged with the bytecodes themselves. As a conceptual implementation + idea, we could include an immediate dominator number for each basic block + in the LLVM bytecode program. Basic blocks could be numbered according + to the order of occurance in the bytecode representation. + +2. Including loop header and body information. This would facilitate + detection of intervals and natural loops. + +UNRESOLVED ISSUES +----------------- + +1. Will oSUIF provide enough of an infrastructure to support the research + that we will be doing? We know that it has less than stellar + performance, but hope that this will be of little importance for our + static compiler. This could affect us if we decided to do some IP + research. Also we do not yet understand the level of exception support + currently implemented. + +2. Should we consider the requirements of a direct hardware implementation + of the LLVM when we design it? If so, several design issues should + have their priorities shifted. The other option is to focus on a + software layer interpreting the LLVM in all cases. + +3. Should we use some form of packetized format to improve forward + compatibility? For example, we could design the system to encode a + packet type and length field before analysis information, to allow a + runtime to skip information that it didn't understand in a bytecode + stream. The obvious benefit would be for compatibility, the drawback + is that it would tend to splinter that 'standard' LLVM definition. + +4. Should we use fixed length instructions or variable length + instructions? Fetching variable length instructions is expensive (for + either hardware or software based LLVM runtimes), but we have several + 'infinite' spaces that instructions operate in (SSA register numbers, + type spaces, or packet length [if packets were implemented]). Several + options were mentioned including: + A. Using 16 or 32 bit numbers, which would be 'big enough' + B. A scheme similar to how UTF-8 works, to encode infinite numbers + while keeping small number small. + C. Use something similar to Huffman encoding, so that the most common + numbers are the smallest. + +-Chris + diff --git a/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt b/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt new file mode 100644 index 00000000000..111706a3447 --- /dev/null +++ b/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt @@ -0,0 +1,39 @@ +Date: Wed, 31 Jan 2001 12:04:33 -0600 +From: Vikram S. Adve +To: Chris Lattner +Subject: another thought + +I have a budding idea about making LLVM a little more ambitious: a +customizable runtime system that can be used to implement language-specific +virtual machines for many different languages. E.g., a C vm, a C++ vm, a +Java vm, a Lisp vm, .. + +The idea would be that LLVM would provide a standard set of runtime features +(some low-level like standard assembly instructions with code generation and +static and runtime optimization; some higher-level like type-safety and +perhaps a garbage collection library). Each language vm would select the +runtime features needed for that language, extending or customizing them as +needed. Most of the machine-dependent code-generation and optimization +features as well as low-level machine-independent optimizations (like PRE) +could be provided by LLVM and should be sufficient for any language, +simplifying the language compiler. (This would also help interoperability +between languages.) Also, some or most of the higher-level +machine-independent features like type-safety and access safety should be +reusable by different languages, with minor extensions. The language +compiler could then focus on language-specific analyses and optimizations. + +The risk is that this sounds like a universal IR -- something that the +compiler community has tried and failed to develop for decades, and is +universally skeptical about. No matter what we say, we won't be able to +convince anyone that we have a universal IR that will work. We need to +think about whether LLVM is different or if has something novel that might +convince people. E.g., the idea of providing a package of separable +features that different languages select from. Also, using SSA with or +without type-safety as the intermediate representation. + +One interesting starting point would be to discuss how a JVM would be +implemented on top of LLVM a bit more. That might give us clues on how to +structure LLVM to support one or more language VMs. + +--Vikram + diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt new file mode 100644 index 00000000000..c09cf1f03cc --- /dev/null +++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt @@ -0,0 +1,67 @@ +Date: Tue, 6 Feb 2001 20:27:37 -0600 (CST) +From: Chris Lattner +To: Vikram S. Adve +Subject: Type notation debate... + +This is the way that I am currently planning on implementing types: + +Primitive Types: +type ::= void|bool|sbyte|ubyte|short|ushort|int|uint|long|ulong + +Method: +typelist ::= typelisth | /*empty*/ +typelisth ::= type | typelisth ',' type +type ::= type (typelist) + +Arrays (without and with size): +type ::= '[' type ']' | '[' INT ',' type ']' + +Pointer: +type ::= type '*' + +Structure: +type ::= '{' typelist '}' + +Packed: +type ::= '<' INT ',' type '>' + +Simple examples: + +[[ %4, int ]] - array of (array of 4 (int)) +[ { int, int } ] - Array of structure +[ < %4, int > ] - Array of 128 bit SIMD packets +int (int, [[int, %4]]) - Method taking a 2d array and int, returning int + + +Okay before you comment, please look at: + +http://www.research.att.com/~bs/devXinterview.html + +Search for "In another interview, you defined the C declarator syntax as +an experiment that failed. However, this syntactic construct has been +around for 27 years and perhaps more; why do you consider it problematic +(except for its cumbersome syntax)?" and read that response for me. :) + +Now with this syntax, his example would be represented as: + +[ %10, bool (int, int) * ] * + +vs + +bool (*(*)[10])(int, int) + +in C. + +Basically, my argument for this type construction system is that it is +VERY simple to use and understand (although it IS different than C, it is +very simple and straightforward, which C is NOT). In fact, I would assert +that most programmers TODAY do not understand pointers to member +functions, and have to look up an example when they have to write them. + +In my opinion, it is critically important to have clear and concise type +specifications, because types are going to be all over the programs. + +Let me know your thoughts on this. :) + +-Chris + diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt new file mode 100644 index 00000000000..8bfefbf69f6 --- /dev/null +++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt @@ -0,0 +1,75 @@ +Date: Thu, 8 Feb 2001 08:42:04 -0600 +From: Vikram S. Adve +To: Chris Lattner +Subject: RE: Type notation debate... + +Chris, + +> Okay before you comment, please look at: +> +> http://www.research.att.com/~bs/devXinterview.html + +I read this argument. Even before that, I was already in agreement with you +and him that the C declarator syntax is difficult and confusing. + +But in fact, if you read the entire answer carefully, he came to the same +conclusion I do: that you have to go with familiar syntax over logical +syntax because familiarity is such a strong force: + + "However, familiarity is a strong force. To compare, in English, we +live +more or less happily with the absurd rules for "to be" (am, are, is, been, +was, were, ...) and all attempts to simplify are treated with contempt or +(preferably) humor. It be a curious world and it always beed." + +> Basically, my argument for this type construction system is that it is +> VERY simple to use and understand (although it IS different than C, it is +> very simple and straightforward, which C is NOT). In fact, I would assert +> that most programmers TODAY do not understand pointers to member +> functions, and have to look up an example when they have to write them. + +Again, I don't disagree with this at all. But to some extent this +particular problem is inherently difficult. Your syntax for the above +example may be easier for you to read because this is the way you have been +thinking about it. Honestly, I don't find it much easier than the C syntax. +In either case, I would have to look up an example to write pointers to +member functions. + +But pointers to member functions are nowhere near as common as arrays. And +the old array syntax: + type [ int, int, ...] +is just much more familiar and clear to people than anything new you +introduce, no matter how logical it is. Introducing a new syntax that may +make function pointers easier but makes arrays much more difficult seems +very risky to me. + +> In my opinion, it is critically important to have clear and concise type +> specifications, because types are going to be all over the programs. + +I absolutely agree. But the question is, what is more clear and concise? +The syntax programmers are used to out of years of experience or a new +syntax that they have never seen that has a more logical structure. I think +the answer is the former. Sometimes, you have to give up a better idea +because you can't overcome sociological barriers to it. Qwerty keyboards +and Windows are two classic examples of bad technology that are difficult to +root out. + +P.S. Also, while I agree that most your syntax is more logical, there is +one part that isn't: + +Arrays (without and with size): +type ::= '[' type ']' | '[' INT ',' type ']'. + +The arrays with size lists the dimensions and the type in a single list. +That is just too confusing: + [10, 40, int] +This seems to be a 3-D array where the third dimension is something strange. +It is too confusing to have a list of 3 things, some of which are dimensions +and one is a type. Either of the following would be better: + + array [10, 40] of int +or + int [10, 40] + +--Vikram + diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt new file mode 100644 index 00000000000..6e9784158a3 --- /dev/null +++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt @@ -0,0 +1,53 @@ +Date: Thu, 8 Feb 2001 14:31:05 -0600 (CST) +From: Chris Lattner +To: Vikram S. Adve +Subject: RE: Type notation debate... + +> Arrays (without and with size): +> type ::= '[' type ']' | '[' INT ',' type ']'. +> +> The arrays with size lists the dimensions and the type in a single list. +> That is just too confusing: + +> [10, 40, int] +> This seems to be a 3-D array where the third dimension is something strange. +> It is too confusing to have a list of 3 things, some of which are dimensions +> and one is a type. + +The above grammar indicates that there is only one integer parameter, ie +the upper bound. The lower bound is always implied to be zero, for +several reasons: + +* As a low level VM, we want to expose addressing computations + explicitly. Since the lower bound must always be known in a high level + language statically, the language front end can do the translation + automatically. +* This fits more closely with what Java needs, ie what we need in the + short term. Java arrays are always zero based. + +If a two element list is too confusing, I would recommend an alternate +syntax of: + +type ::= '[' type ']' | '[' INT 'x' type ']'. + +For example: + [12 x int] + [12x int] + [ 12 x [ 4x int ]] + +Which is syntactically nicer, and more explicit. + +> Either of the following would be better: +> array [10, 40] of int + +I considered this approach for arrays in general (ie array of int/ array +of 12 int), but found that it made declarations WAY too long. Remember +that because of the nature of llvm, you get a lot of types strewn all over +the program, and using the 'typedef' like facility is not a wonderful +option, because then types aren't explicit anymore. + +I find this email interesting, because you contradict the previous email +you sent, where you recommend that we stick to C syntax.... + +-Chris + diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt new file mode 100644 index 00000000000..7b9032742a2 --- /dev/null +++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt @@ -0,0 +1,89 @@ +> But in fact, if you read the entire answer carefully, he came to the same +> conclusion I do: that you have to go with familiar syntax over logical +> syntax because familiarity is such a strong force: +> "However, familiarity is a strong force. To compare, in English, we +live +> more or less happily with the absurd rules for "to be" (am, are, is, been, +> was, were, ...) and all attempts to simplify are treated with contempt or +> (preferably) humor. It be a curious world and it always beed." + +Although you have to remember that his situation was considerably +different than ours. He was in a position where he was designing a high +level language that had to be COMPATIBLE with C. Our language is such +that a new person would have to learn the new, different, syntax +anyways. Making them learn about the type system does not seem like much +of a stretch from learning the opcodes and how SSA form works, and how +everything ties together... + +> > Basically, my argument for this type construction system is that it is +> > VERY simple to use and understand (although it IS different than C, it is +> > very simple and straightforward, which C is NOT). In fact, I would assert +> > that most programmers TODAY do not understand pointers to member +> > functions, and have to look up an example when they have to write them. + +> Again, I don't disagree with this at all. But to some extent this +> particular problem is inherently difficult. Your syntax for the above +> example may be easier for you to read because this is the way you have been +> thinking about it. Honestly, I don't find it much easier than the C syntax. +> In either case, I would have to look up an example to write pointers to +> member functions. + +I would argue that because the lexical structure of the language is self +consistent, any person who spent a significant amount of time programming +in LLVM directly would understand how to do it without looking it up in a +manual. The reason this does not work for C is because you rarely have to +declare these pointers, and the syntax is inconsistent with the method +declaration and calling syntax. + +> But pointers to member functions are nowhere near as common as arrays. + +Very true. If you're implementing an object oriented language, however, +remember that you have to do all the pointer to member function stuff +yourself.... so everytime you invoke a virtual method one is involved +(instead of having C++ hide it for you behind "syntactic sugar"). + +> And the old array syntax: +> type [ int, int, ...] +> is just much more familiar and clear to people than anything new you +> introduce, no matter how logical it is. + +Erm... excuse me but how is this the "old array syntax"? If you are +arguing for consistency with C, you should be asking for 'type int []', +which is significantly different than the above (beside the above +introduces a new operator and duplicates information +needlessly). Basically what I am suggesting is exactly the above without +the fluff. So instead of: + + type [ int, int, ...] + +you use: + + type [ int ] + +> Introducing a new syntax that may +> make function pointers easier but makes arrays much more difficult seems +> very risky to me. + +This is not about function pointers. This is about consistency in the +type system, and consistency with the rest of the language. The point +above does not make arrays any more difficult to use, and makes the +structure of types much more obvious than the "c way". + +> > In my opinion, it is critically important to have clear and concise type +> > specifications, because types are going to be all over the programs. +> +> I absolutely agree. But the question is, what is more clear and concise? +> The syntax programmers are used to out of years of experience or a new +> syntax that they have never seen that has a more logical structure. I think +> the answer is the former. Sometimes, you have to give up a better idea +> because you can't overcome sociological barriers to it. Qwerty keyboards +> and Windows are two classic examples of bad technology that are difficult to +> root out. + +Very true, but you seem to be advocating a completely different Type +system than C has, in addition to it not offering the advantages of clear +structure that the system I recommended does... so you seem to not have a +problem with changing this, just with what I change it to. :) + +-Chris + diff --git a/docs/HistoricalNotes/2001-02-09-AdveComments.txt b/docs/HistoricalNotes/2001-02-09-AdveComments.txt new file mode 100644 index 00000000000..5503233c1ed --- /dev/null +++ b/docs/HistoricalNotes/2001-02-09-AdveComments.txt @@ -0,0 +1,120 @@ +Ok, here are my comments and suggestions about the LLVM instruction set. +We should discuss some now, but can discuss many of them later, when we +revisit synchronization, type inference, and other issues. +(We have discussed some of the comments already.) + + +o We should consider eliminating the type annotation in cases where it is + essentially obvious from the instruction type, e.g., in br, it is obvious + that the first arg. should be a bool and the other args should be labels: + + br bool , label , label + + I think your point was that making all types explicit improves clarity + and readability. I agree to some extent, but it also comes at the cost + of verbosity. And when the types are obvious from people's experience + (e.g., in the br instruction), it doesn't seem to help as much. + + +o On reflection, I really like your idea of having the two different switch + types (even though they encode implementation techniques rather than + semantics). It should simplify building the CFG and my guess is it could + enable some significant optimizations, though we should think about which. + + +o In the lookup-indirect form of the switch, is there a reason not to make + the val-type uint? Most HLL switch statements (including Java and C++) + require that anyway. And it would also make the val-type uniform + in the two forms of the switch. + + I did see the switch-on-bool examples and, while cute, we can just use + the branch instructions in that particular case. + + +o I agree with your comment that we don't need 'neg'. + + +o There's a trade-off with the cast instruction: + + it avoids having to define all the upcasts and downcasts that are + valid for the operands of each instruction (you probably have thought + of other benefits also) + - it could make the bytecode significantly larger because there could + be a lot of cast operations + + +o Making the second arg. to 'shl' a ubyte seems good enough to me. + 255 positions seems adequate for several generations of machines + and is more compact than uint. + + +o I still have some major concerns about including malloc and free in the + language (either as builtin functions or instructions). LLVM must be + able to represent code from many different languages. Languages such as + C, C++ Java and Fortran 90 would not be able to use our malloc anyway + because each of them will want to provide a library implementation of it. + + This gets even worse when code from different languages is linked + into a single executable (which is fairly common in large apps). + Having a single malloc would just not suffice, and instead would simply + complicate the picture further because it adds an extra variant in + addition to the one each language provides. + + Instead, providing a default library version of malloc and free + (and perhaps a malloc_gc with garbage collection instead of free) + would make a good implementation available to anyone who wants it. + + I don't recall all your arguments in favor so let's discuss this again, + and soon. + + +o 'alloca' on the other hand sounds like a good idea, and the + implementation seems fairly language-independent so it doesn't have the + problems with malloc listed above. + + +o About indirect call: + Your option #2 sounded good to me. I'm not sure I understand your + concern about an explicit 'icall' instruction? + + +o A pair of important synchronization instr'ns to think about: + load-linked + store-conditional + + +o Other classes of instructions that are valuable for pipeline performance: + conditional-move + predicated instructions + + +o I believe tail calls are relatively easy to identify; do you know why + .NET has a tailcall instruction? + + +o I agree that we need a static data space. Otherwise, emulating global + data gets unnecessarily complex. + + +o About explicit parallelism: + + We once talked about adding a symbolic thread-id field to each + instruction. (It could be optional so single-threaded codes are + not penalized.) This could map well to multi-threaded architectures + while providing easy ILP for single-threaded onces. But it is probably + too radical an idea to include in a base version of LLVM. Instead, it + could a great topic for a separate study. + + What is the semantics of the IA64 stop bit? + + + + +o And finally, another thought about the syntax for arrays :-) + + Although this syntax: + array of + is verbose, it will be used only in the human-readable assembly code so + size should not matter. I think we should consider it because I find it + to be the clearest syntax. It could even make arrays of function + pointers somewhat readable. + diff --git a/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt new file mode 100644 index 00000000000..4d2879554a4 --- /dev/null +++ b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt @@ -0,0 +1,245 @@ +From: Chris Lattner +To: "Vikram S. Adve" +Subject: Re: LLVM Feedback + +I've included your feedback in the /home/vadve/lattner/llvm/docs directory +so that it will live in CVS eventually with the rest of LLVM. I've +significantly updated the documentation to reflect the changes you +suggested, as specified below: + +> We should consider eliminating the type annotation in cases where it is +> essentially obvious from the instruction type: +> br bool , label , label +> I think your point was that making all types explicit improves clarity +> and readability. I agree to some extent, but it also comes at the +> cost of verbosity. And when the types are obvious from people's +> experience (e.g., in the br instruction), it doesn't seem to help as +> much. + +Very true. We should discuss this more, but my reasoning is more of a +consistency argument. There are VERY few instructions that can have all +of the types eliminated, and doing so when available unnecesarily makes +the language more difficult to handle. Especially when you see 'int +%this' and 'bool %that' all over the place, I think it would be +disorienting to see: + + br %predicate, %iftrue, %iffalse + +for branches. Even just typing that once gives me the creeps. ;) Like I +said, we should probably discuss this further in person... + +> On reflection, I really like your idea of having the two different +> switch types (even though they encode implementation techniques rather +> than semantics). It should simplify building the CFG and my guess is it +> could enable some significant optimizations, though we should think +> about which. + +Great. I added a note to the switch section commenting on how the VM +should just use the instruction type as a hint, and that the +implementation may choose altermate representations (such as predicated +branches). + +> In the lookup-indirect form of the switch, is there a reason not to +> make the val-type uint? + +No. This was something I was debating for a while, and didn't really feel +strongly about either way. It is common to switch on other types in HLL's +(for example signed int's are particually common), but in this case, all +that will be added is an additional 'cast' instruction. I removed that +from the spec. + +> I agree with your comment that we don't need 'neg' + +Removed. + +> There's a trade-off with the cast instruction: +> + it avoids having to define all the upcasts and downcasts that are +> valid for the operands of each instruction (you probably have +> thought of other benefits also) +> - it could make the bytecode significantly larger because there could +> be a lot of cast operations + + + You NEED casts to represent things like: + void foo(float); + ... + int x; + ... + foo(x); + in a language like C. Even in a Java like language, you need upcasts + and some way to implement dynamic downcasts. + + Not all forms of instructions take every type (for example you can't + shift by a floating point number of bits), thus SOME programs will need + implicit casts. + +To be efficient and to avoid your '-' point above, we just have to be +careful to specify that the instructions shall operate on all common +types, therefore casting should be relatively uncommon. For example all +of the arithmetic operations work on almost all data types. + +> Making the second arg. to 'shl' a ubyte seems good enough to me. +> 255 positions seems adequate for several generations of machines + +Okay, that comment is removed. + +> and is more compact than uint. + +No, it isn't. Remember that the bytecode encoding saves value slots into +the bytecode instructions themselves, not constant values. This is +another case where we may introduce more cast instructions (but we will +also reduce the number of opcode variants that must be supported by a +virtual machine). Because most shifts are by constant values, I don't +think that we'll have to cast many shifts. :) + +> I still have some major concerns about including malloc and free in the +> language (either as builtin functions or instructions). + +Agreed. How about this proposal: + +malloc/free are either built in functions or actual opcodes. They provide +all of the type safety that the document would indicate, blah blah +blah. :) + +Now, because of all of the excellent points that you raised, an +implementation may want to override the default malloc/free behavior of +the program. To do this, they simply implement a "malloc" and +"free" function. The virtual machine will then be defined to use the user +defined malloc/free function (which return/take void*'s, not type'd +pointers like the builtin function would) if one is available, otherwise +fall back on a system malloc/free. + +Does this sound like a good compromise? It would give us all of the +typesafety/elegance in the language while still allowing the user to do +all the cool stuff they want to... + +> 'alloca' on the other hand sounds like a good idea, and the +> implementation seems fairly language-independent so it doesn't have the +> problems with malloc listed above. + +Okay, once we get the above stuff figured out, I'll put it all in the +spec. + +> About indirect call: +> Your option #2 sounded good to me. I'm not sure I understand your +> concern about an explicit 'icall' instruction? + +I worry too much. :) The other alternative has been removed. 'icall' is +now up in the instruction list next to 'call'. + +> I believe tail calls are relatively easy to identify; do you know why +> .NET has a tailcall instruction? + +Although I am just guessing, I believe it probably has to do with the fact +that they want languages like Haskell and lisp to be efficiently runnable +on their VM. Of course this means that the VM MUST implement tail calls +'correctly', or else life will suck. :) I would put this into a future +feature bin, because it could be pretty handy... + +> A pair of important synchronization instr'ns to think about: +> load-linked +> store-conditional + +What is 'load-linked'? I think that (at least for now) I should add these +to the 'possible extensions' section, because they are not immediately +needed... + +> Other classes of instructions that are valuable for pipeline +> performance: +> conditional-move +> predicated instructions + +Conditional move is effectly a special case of a predicated +instruction... and I think that all predicated instructions can possibly +be implemented later in LLVM. It would significantly change things, and +it doesn't seem to be very neccesary right now. It would seem to +complicate flow control analysis a LOT in the virtual machine. I would +tend to prefer that a predicated architecture like IA64 convert from a +"basic block" representation to a predicated rep as part of it's dynamic +complication phase. Also, if a basic block contains ONLY a move, then +that can be trivally translated into a conditional move... + +> I agree that we need a static data space. Otherwise, emulating global +> data gets unnecessarily complex. + +Definately. Also a later item though. :) + +> We once talked about adding a symbolic thread-id field to each +> .. +> Instead, it could a great topic for a separate study. + +Agreed. :) + +> What is the semantics of the IA64 stop bit? + +Basically, the IA64 writes instructions like this: +mov ... +add ... +sub ... +op xxx +op xxx +;; +mov ... +add ... +sub ... +op xxx +op xxx +;; + +Where the ;; delimits a group of instruction with no dependencies between +them, which can all be executed concurrently (to the limits of the +available functional units). The ;; gets translated into a bit set in one +of the opcodes. + +The advantages of this representation is that you don't have to do some +kind of 'thread id scheduling' pass by having to specify ahead of time how +many threads to use, and the representation doesn't have a per instruction +overhead... + +> And finally, another thought about the syntax for arrays :-) +> Although this syntax: +> array of +> is verbose, it will be used only in the human-readable assembly code so +> size should not matter. I think we should consider it because I find it +> to be the clearest syntax. It could even make arrays of function +> pointers somewhat readable. + +My only comment will be to give you an example of why this is a bad +idea. :) + +Here is an example of using the switch statement (with my recommended +syntax): + +switch uint %val, label %otherwise, + [%3 x {uint, label}] [ { uint %57, label %l1 }, + { uint %20, label %l2 }, + { uint %14, label %l3 } ] + +Here it is with the syntax you are proposing: + +switch uint %val, label %otherwise, + array %3 of {uint, label} + array of {uint, label} + { uint %57, label %l1 }, + { uint %20, label %l2 }, + { uint %14, label %l3 } + +Which is ambiguous and very verbose. It would be possible to specify +constants with [] brackets as in my syntax, which would look like this: + +switch uint %val, label %otherwise, + array %3 of {uint, label} [ { uint %57, label %l1 }, + { uint %20, label %l2 }, + { uint %14, label %l3 } ] + +But then the syntax is inconsistent between type definition and constant +definition (why do []'s enclose the constants but not the types??). + +Anyways, I'm sure that there is much debate still to be had over +this... :) + +-Chris + +http://www.nondot.org/~sabre/os/ +http://www.nondot.org/MagicStats/ +http://korbit.sourceforge.net/ + + diff --git a/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt b/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt new file mode 100644 index 00000000000..2c7534d9da1 --- /dev/null +++ b/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt @@ -0,0 +1,39 @@ +Date: Tue, 13 Feb 2001 13:29:52 -0600 (CST) +From: Chris Lattner +To: Vikram S. Adve +Subject: LLVM Concerns... + + +I've updated the documentation to include load store and allocation +instructions (please take a look and let me know if I'm on the right +track): + +file:/home/vadve/lattner/llvm/docs/LangRef.html#memoryops + +I have a couple of concerns I would like to bring up: + +1. Reference types + Right now, I've spec'd out the language to have a pointer type, which + works fine for lots of stuff... except that Java really has + references: constrained pointers that cannot be manipulated: added and + subtracted, moved, etc... Do we want to have a type like this? It + could be very nice for analysis (pointer always points to the start of + an object, etc...) and more closely matches Java semantics. The + pointer type would be kept for C++ like semantics. Through analysis, + C++ pointers could be promoted to references in the LLVM + representation. + +2. Our "implicit" memory references in assembly language: + After thinking about it, this model has two problems: + A. If you do pointer analysis and realize that two stores are + independent and can share the same memory source object, there is + no way to represent this in either the bytecode or assembly. + B. When parsing assembly/bytecode, we effectively have to do a full + SSA generation/PHI node insertion pass to build the dependencies + when we don't want the "pinned" representation. This is not + cool. + I'm tempted to make memory references explicit in both the assembly and + bytecode to get around this... what do you think? + +-Chris + diff --git a/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt b/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt new file mode 100644 index 00000000000..505343378df --- /dev/null +++ b/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt @@ -0,0 +1,47 @@ +Date: Tue, 13 Feb 2001 18:25:42 -0600 +From: Vikram S. Adve +To: Chris Lattner +Subject: RE: LLVM Concerns... + +> 1. Reference types +> Right now, I've spec'd out the language to have a pointer type, which +> works fine for lots of stuff... except that Java really has +> references: constrained pointers that cannot be manipulated: added and +> subtracted, moved, etc... Do we want to have a type like this? It +> could be very nice for analysis (pointer always points to the start of +> an object, etc...) and more closely matches Java semantics. The +> pointer type would be kept for C++ like semantics. Through analysis, +> C++ pointers could be promoted to references in the LLVM +> representation. + + +You're right, having references would be useful. Even for C++ the *static* +compiler could generate references instead of pointers with fairly +straightforward analysis. Let's include a reference type for now. But I'm +also really concerned that LLVM is becoming big and complex and (perhaps) +too high-level. After we get some initial performance results, we may have +a clearer idea of what our goals should be and we should revisit this +question then. + +> 2. Our "implicit" memory references in assembly language: +> After thinking about it, this model has two problems: +> A. If you do pointer analysis and realize that two stores are +> independent and can share the same memory source object, + +not sure what you meant by "share the same memory source object" + +> there is +> no way to represent this in either the bytecode or assembly. +> B. When parsing assembly/bytecode, we effectively have to do a full +> SSA generation/PHI node insertion pass to build the dependencies +> when we don't want the "pinned" representation. This is not +> cool. + +I understand the concern. But again, let's focus on the performance first +and then look at the language design issues. E.g., it would be good to know +how big the bytecode files are before expanding them further. I am pretty +keen to explore the implications of LLVM for mobile devices. Both bytecode +size and power consumption are important to consider there. + +--Vikram + diff --git a/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt b/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt new file mode 100644 index 00000000000..923aa62037a --- /dev/null +++ b/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt @@ -0,0 +1,12 @@ +By Chris: + +LLVM has been designed with two primary goals in mind. First we strive to enable the best possible division of labor between static and dynamic compilers, and second, we need a flexible and powerful interface between these two complementary stages of compilation. We feel that providing a solution to these two goals will yield an excellent solution to the performance problem faced by modern architectures and programming languages. + +A key insight into current compiler and runtime systems is that a compiler may fall in anywhere in a "continuum of compilation" to do its job. On one side, scripting languages statically compile nothing and dynamically compile (or equivalently, interpret) everything. On the far other side, traditional static compilers process everything statically and nothing dynamically. These approaches have typically been seen as a tradeoff between performance and portability. On a deeper level, however, there are two reasons that optimal system performance may be obtained by a system somewhere in between these two extremes: Dynamic application behavior and social constraints. + +From a technical perspective, pure static compilation cannot ever give optimal performance in all cases, because applications have varying dynamic behavior that the static compiler cannot take into consideration. Even compilers that support profile guided optimization generate poor code in the real world, because using such optimization tunes that application to one particular usage pattern, whereas real programs (as opposed to benchmarks) often have several different usage patterns. + +On a social level, static compilation is a very shortsighted solution to the performance problem. Instruction set architectures (ISAs) continuously evolve, and each implementation of an ISA (a processor) must choose a set of tradeoffs that make sense in the market context that it is designed for. With every new processor introduced, the vendor faces two fundamental problems: First, there is a lag time between when a processor is introduced to when compilers generate quality code for the architecture. Secondly, even when compilers catch up to the new architecture there is often a large body of legacy code that was compiled for previous generations and will not or can not be upgraded. Thus a large percentage of code running on a processor may be compiled quite sub-optimally for the current characteristics of the dynamic execution environment. + +For these reasons, LLVM has been designed from the beginning as a long-term solution to these problems. Its design allows the large body of platform independent, static, program optimizations currently in compilers to be reused unchanged in their current form. It also provides important static type information to enable powerful dynamic and link time optimizations to be performed quickly and efficiently. This combination enables an increase in effective system performance for real world environments. + diff --git a/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt b/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt new file mode 100644 index 00000000000..2e0b7940750 --- /dev/null +++ b/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt @@ -0,0 +1,202 @@ +Meeting notes: Implementation idea: Exception Handling in C++/Java + +The 5/18/01 meeting discussed ideas for implementing exceptions in LLVM. +We decided that the best solution requires a set of library calls provided by +the VM, as well as an extension to the LLVM function invocation syntax. + +The LLVM function invocation instruction previously looks like this (ignoring +types): + + call func(arg1, arg2, arg3) + +The extension discussed today adds an optional "with" clause that +associates a label with the call site. The new syntax looks like this: + + call func(arg1, arg2, arg3) with funcCleanup + +This funcHandler always stays tightly associated with the call site (being +encoded directly into the call opcode itself), and should be used whenever +there is cleanup work that needs to be done for the current function if +an exception is thrown by func (or if we are in a try block). + +To support this, the VM/Runtime provide the following simple library +functions (all syntax in this document is very abstract): + +typedef struct { something } %frame; + The VM must export a "frame type", that is an opaque structure used to + implement different types of stack walking that may be used by various + language runtime libraries. We imagine that it would be typical to + represent a frame with a PC and frame pointer pair, although that is not + required. + +%frame getStackCurrentFrame(); + Get a frame object for the current function. Note that if the current + function was inlined into its caller, the "current" frame will belong to + the "caller". + +bool isFirstFrame(%frame f); + Returns true if the specified frame is the top level (first activated) frame + for this thread. For the main thread, this corresponds to the main() + function, for a spawned thread, it corresponds to the thread function. + +%frame getNextFrame(%frame f); + Return the previous frame on the stack. This function is undefined if f + satisfies the predicate isFirstFrame(f). + +Label *getFrameLabel(%frame f); + If a label was associated with f (as discussed below), this function returns + it. Otherwise, it returns a null pointer. + +doNonLocalBranch(Label *L); + At this point, it is not clear whether this should be a function or + intrinsic. It should probably be an intrinsic in LLVM, but we'll deal with + this issue later. + + +Here is a motivating example that illustrates how these facilities could be +used to implement the C++ exception model: + +void TestFunction(...) { + A a; B b; + foo(); // Any function call may throw + bar(); + C c; + + try { + D d; + baz(); + } catch (int) { + ...int Stuff... + // execution continues after the try block: the exception is consumed + } catch (double) { + ...double stuff... + throw; // Exception is propogated + } +} + +This function would compile to approximately the following code (heavy +pseudo code follows): + +Func: + %a = alloca A + A::A(%a) // These ctors & dtors could throw, but we ignore this + %b = alloca B // minor detail for this example + B::B(%b) + + call foo() with fooCleanup // An exception in foo is propogated to fooCleanup + call bar() with barCleanup // An exception in bar is propogated to barCleanup + + %c = alloca C + C::C(c) + %d = alloca D + D::D(d) + call baz() with bazCleanup // An exception in baz is propogated to bazCleanup + d->~D(); +EndTry: // This label corresponds to the end of the try block + c->~C() // These could also throw, these are also ignored + b->~B() + a->~A() + return + +Note that this is a very straight forward and literal translation: exactly +what we want for zero cost (when unused) exception handling. Especially on +platforms with many registers (ie, the IA64) setjmp/longjmp style exception +handling is *very* impractical. Also, the "with" clauses describe the +control flow paths explicitly so that analysis is not adversly effected. + +The foo/barCleanup labels are implemented as: + +TryCleanup: // Executed if an exception escapes the try block + c->~C() +barCleanup: // Executed if an exception escapes from bar() + // fall through +fooCleanup: // Executed if an exception escapes from foo() + b->~B() + a->~A() + Exception *E = getThreadLocalException() + call throw(E) // Implemented by the C++ runtime, described below + +Which does the work one would expect. getThreadLocalException is a function +implemented by the C++ support library. It returns the current exception +object for the current thread. Note that we do not attempt to recycle the +shutdown code from before, because performance of the mainline code is +critically important. Also, obviously fooCleanup and barCleanup may be +merged and one of them eliminated. This just shows how the code generator +would most likely emit code. + +The bazCleanup label is more interesting. Because the exception may be caught +by the try block, we must dispatch to its handler... but it does not exist +on the call stack (it does not have a VM Call->Label mapping installed), so +we must dispatch statically with a goto. The bazHandler thus appears as: + +bazHandler: + d->~D(); // destruct D as it goes out of scope when entering catch clauses + goto TryHandler + +In general, TryHandler is not the same as bazHandler, because multiple +function calls could be made from the try block. In this case, trivial +optimization could merge the two basic blocks. TryHandler is the code +that actually determines the type of exception, based on the Exception object +itself. For this discussion, assume that the exception object contains *at +least*: + +1. A pointer to the RTTI info for the contained object +2. A pointer to the dtor for the contained object +3. The contained object itself + +Note that it is neccesary to maintain #1 & #2 in the exception object itself +because objects without virtual function tables may be thrown (as in this +example). Assuming this, TryHandler would look something like this: + +TryHandler: + Exception *E = getThreadLocalException(); + switch (E->RTTIType) { + case IntRTTIInfo: + ...int Stuff... // The action to perform from the catch block + break; + case DoubleRTTIInfo: + ...double Stuff... // The action to perform from the catch block + goto TryCleanup // This catch block rethrows the exception + break; // Redundant, eliminated by the optimizer + default: + goto TryCleanup // Exception not caught, rethrow + } + + // Exception was consumed + if (E->dtor) + E->dtor(E->object) // Invoke the dtor on the object if it exists + goto EndTry // Continue mainline code... + +And that is all there is to it. + +The throw(E) function would then be implemented like this (which may be +inlined into the caller through standard optimization): + +function throw(Exception *E) { + // Get the start of the stack trace... + %frame %f = call getStackCurrentFrame() + + // Get the label information that corresponds to it + label * %L = call getFrameLabel(%f) + while (%L == 0 && !isFirstFrame(%f)) { + // Loop until a cleanup handler is found + %f = call getNextFrame(%f) + %L = call getFrameLabel(%f) + } + + if (%L != 0) { + call setThreadLocalException(E) // Allow handlers access to this... + call doNonLocalBranch(%L) + } + // No handler found! + call BlowUp() // Ends up calling the terminate() method in use +} + +That's a brief rundown of how C++ exception handling could be implemented in +llvm. Java would be very similar, except it only uses destructors to unlock +synchronized blocks, not to destroy data. Also, it uses two stack walks: a +nondestructive walk that builds a stack trace, then a destructive walk that +unwinds the stack as shown here. + +It would be trivial to get exception interoperability between C++ and Java. + diff --git a/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt b/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt new file mode 100644 index 00000000000..3375365f54c --- /dev/null +++ b/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt @@ -0,0 +1,45 @@ +Date: Sat, 19 May 2001 19:09:13 -0500 (CDT) +From: Chris Lattner +To: Vikram S. Adve +Subject: RE: Meeting writeup + +> I read it through and it looks great! + +Thanks! + +> The finally clause in Java may need more thought. The code for this clause +> is like a subroutine because it needs to be entered from many points (end of +> try block and beginning of each catch block), and then needs to *return to +> the place from where the code was entered*. That's why JVM has the +> jsr/jsr_w instruction. + +Hrm... I guess that is an implementation decision. It can either be +modelled as a subroutine (as java bytecodes do), which is really +gross... or it can be modelled as code duplication (emitted once inline, +then once in the exception path). Because this could, at worst, +slightly less than double the amount of code in a function (it is +bounded) I don't think this is a big deal. One of the really nice things +about the LLVM representation is that it still allows for runtime code +generation for exception paths (exceptions paths are not compiled until +needed). Obviously a static compiler couldn't do this though. :) + +In this case, only one copy of the code would be compiled... until the +other one is needed on demand. Also this strategy fits with the "zero +cost" exception model... the standard case is not burdened with extra +branches or "call"s. + +> I suppose you could save the return address in a particular register +> (specific to this finally block), jump to the finally block, and then at the +> end of the finally block, jump back indirectly through this register. It +> will complicate building the CFG but I suppose that can be handled. It is +> also unsafe in terms of checking where control returns (which is I suppose +> why the JVM doesn't use this). + +I think that a code duplication method would be cleaner, and would avoid +the caveats that you mention. Also, it does not slow down the normal case +with an indirect branch... + +Like everything, we can probably defer a final decision until later. :) + +-Chris + diff --git a/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt b/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt new file mode 100644 index 00000000000..d542fb478cd --- /dev/null +++ b/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt @@ -0,0 +1,63 @@ +Date: Fri, 1 Jun 2001 16:38:17 -0500 (CDT) +From: Chris Lattner +To: Vikram S. Adve +Subject: Interesting: GCC passes + + +Take a look at this document (which describes the order of optimizations +that GCC performs): + +http://gcc.gnu.org/onlinedocs/gcc_17.html + +The rundown is that after RTL generation, the following happens: + +1 . [t] jump optimization (jumps to jumps, etc) +2 . [t] Delete unreachable code +3 . Compute live ranges for CSE +4 . [t] Jump threading (jumps to jumps with identical or inverse conditions) +5 . [t] CSE +6 . *** Conversion to SSA +7 . [t] SSA Based DCE +8 . *** Conversion to LLVM +9 . UnSSA +10. GCSE +11. LICM +12. Strength Reduction +13. Loop unrolling +14. [t] CSE +15. [t] DCE +16. Instruction combination, register movement, scheduling... etc. + +I've marked optimizations with a [t] to indicate things that I believe to +be relatively trivial to implement in LLVM itself. The time consuming +things to reimplement would be SSA based PRE, Strength reduction & loop +unrolling... these would be the major things we would miss out on if we +did LLVM creation from tree code [inlining and other high level +optimizations are done on the tree representation]. + +Given the lack of "strong" optimizations that would take a long time to +reimplement, I am leaning a bit more towards creating LLVM from the tree +code. Especially given that SGI has GPL'd their compiler, including many +SSA based optimizations that could be adapted (besides the fact that their +code looks MUCH nicer than GCC :) + +Even if we choose to do LLVM code emission from RTL, we will almost +certainly want to move LLVM emission from step 8 down until at least CSE +has been rerun... which causes me to wonder if the SSA generation code +will still work (due to global variable dependancies and stuff). I assume +that it can be made to work, but might be a little more involved than we +would like. + +I'm continuing to look at the Tree -> RTL code. It is pretty gross +because they do some of the translation a statement at a time, and some +of it a function at a time... I'm not quite clear why and how the +distinction is drawn, but it does not appear that there is a wonderful +place to attach extra info. + +Anyways, I'm proceeding with the RTL -> LLVM conversion phase for now. We +can talk about this more on Monday. + +Wouldn't it be nice if there were a obvious decision to be made? :) + +-Chris + diff --git a/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt new file mode 100644 index 00000000000..6c9e0971a04 --- /dev/null +++ b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt @@ -0,0 +1,71 @@ +Date: Fri, 1 Jun 2001 17:08:44 -0500 (CDT) +From: Chris Lattner +To: Vikram S. Adve +Subject: RE: Interesting: GCC passes + +> That is very interesting. I agree that some of these could be done on LLVM +> at link-time, but it is the extra time required that concerns me. Link-time +> optimization is severely time-constrained. + +If we were to reimplement any of these optimizations, I assume that we +could do them a translation unit at a time, just as GCC does now. This +would lead to a pipeline like this: + +Static optimizations, xlation unit at a time: +.c --GCC--> .llvm --llvmopt--> .llvm + +Link time optimizations: +.llvm --llvm-ld--> .llvm --llvm-link-opt--> .llvm + +Of course, many optimizations could be shared between llvmopt and +llvm-link-opt, but the wouldn't need to be shared... Thus compile time +could be faster, because we are using a "smarter" IR (SSA based). + +> BTW, about SGI, "borrowing" SSA-based optimizations from one compiler and +> putting it into another is not necessarily easier than re-doing it. +> Optimization code is usually heavily tied in to the specific IR they use. + +Understood. The only reason that I brought this up is because SGI's IR is +more similar to LLVM than it is different in many respects (SSA based, +relatively low level, etc), and could be easily adapted. Also their +optimizations are written in C++ and are actually somewhat +structured... of course it would be no walk in the park, but it would be +much less time consuming to adapt, say, SSA-PRE than to rewrite it. + +> But your larger point is valid that adding SSA based optimizations is +> feasible and should be fun. (Again, link time cost is the issue.) + +Assuming linktime cost wasn't an issue, the question is: +Does using GCC's backend buy us anything? + +> It also occurs to me that GCC is probably doing quite a bit of back-end +> optimization (step 16 in your list). Do you have a breakdown of that? + +Not really. The irritating part of GCC is that it mixes it all up and +doesn't have a clean seperation of concerns. A lot of the "back end +optimization" happens right along with other data optimizations (ie, CSE +of machine specific things). + +As far as REAL back end optimizations go, it looks something like this: + +1. Instruction combination: try to make CISCy instructions, if available +2. Register movement: try to get registers in the right places for the +architecture to avoid register to register moves. For example, try to get +the first argument of a function to naturally land in %o0 for sparc. +3. Instruction scheduling: 'nuff said :) +4. Register class preferencing: ?? +5. Local register allocation +6. global register allocation +7. Spilling +8. Local regalloc +9. Jump optimization +10. Delay slot scheduling +11. Branch shorting for CISC machines +12. Instruction selection & peephole optimization +13. Debug info output + +But none of this would be usable for LLVM anyways, unless we were using +GCC as a static compiler. + +-Chris + diff --git a/docs/LangRef.html b/docs/LangRef.html new file mode 100644 index 00000000000..b3d6d521218 --- /dev/null +++ b/docs/LangRef.html @@ -0,0 +1,1376 @@ + +llvm Assembly Language Reference Manual + + + + +
  llvm Assembly Language Reference Manual
+ +
    +
  1. Abstract +
  2. Introduction +
  3. Identifiers +
  4. Type System +
      +
    1. Primitive Types +
        +
      1. Type Classifications +
      +
    2. Derived Types +
        +
      1. Array Type +
      2. Method Type +
      3. Pointer Type +
      4. Structure Type +
      5. Packed Type +
      +
    +
  5. High Level Structure +
      +
    1. Module Structure +
    2. Method Structure +
    +
  6. Instruction Reference +
      +
    1. Terminator Instructions +
        +
      1. 'ret' Instruction +
      2. 'br' Instruction +
      3. 'switch' Instruction +
      4. 'call .. with' Instruction +
      +
    2. Unary Operations +
        +
      1. 'not' Instruction +
      2. 'cast .. to' Instruction +
      +
    3. Binary Operations +
        +
      1. 'add' Instruction +
      2. 'sub' Instruction +
      3. 'mul' Instruction +
      4. 'div' Instruction +
      5. 'rem' Instruction +
      6. 'setcc' Instructions +
      +
    4. Bitwise Binary Operations +
        +
      1. 'and' Instruction +
      2. 'or' Instruction +
      3. 'xor' Instruction +
      4. 'shl' Instruction +
      5. 'shr' Instruction +
      +
    5. Memory Access Operations +
        +
      1. 'malloc' Instruction +
      2. 'free' Instruction +
      3. 'alloca' Instruction +
      4. 'load' Instruction +
      5. 'store' Instruction +
      6. 'getfieldptr' Instruction +
      +
    6. Other Operations +
        +
      1. 'call' Instruction +
      2. 'icall' Instruction +
      3. 'phi' Instruction +
      +
    7. Builtin Functions +
    +
  7. TODO List +
      +
    1. Exception Handling Instructions +
    2. Synchronization Instructions +
    +
  8. Possible Extensions +
      +
    1. 'tailcall' Instruction +
    2. Global Variables +
    3. Explicit Parrellelism +
    +
  9. Related Work +
+ + + +

+Abstract +

    + + +
    + This document describes the LLVM assembly language IR/VM. LLVM is an SSA + based representation that attempts to be a useful midlevel IR by providing + type safety, low level operations, flexibility, and the capability to + represent 'all' high level languages cleanly. +
    + + + + + +
+Introduction +
    + + +The LLVM is designed to exhibit a dual nature: on one hand, it is a useful compiler IR, on the other hand, it is a bytecode representation for dynamic compilation. We contend that this is a natural and good thing, making LLVM a natural form of communication between different compiler phases, and also between a static and dynamic compiler.

    + +This dual nature leads to three different representations of LLVM (the human readable assembly representation, the compact bytecode representation, and the in memory, pointer based, representation). This document describes the human readable representation and notation.

    + +The LLVM representation aims to be a light weight and low level while being expressive, type safe, and extensible at the same time. It aims to be a "universal IR" of sorts, by being at a low enough level that high level ideas may be cleanly mapped to it. By providing type safety, LLVM can be used as the target of optimizations: for example, through pointer analysis, it can be proven that a C automatic variable is never accessed outside of the current function... allowing it to be promoted to a simple SSA value instead of a memory location.

    + + +


Well Formedness

+Identifiers +
    + + +LLVM uses three different forms of identifiers, for different purposes:

    + +

      +
    1. Numeric constants are represented as you would expect: 12, -3 123.421, etc. +
    2. Named values are represented as a string of characters with a '%' prefix. For example, %foo, %DivisionByZero, %a.really.long.identifier. The actual regular expression used is '%[a-zA-Z$._][a-zA-Z$._0-9]*'. +
    3. Unnamed values are represented as an unsigned numeric value with a '%' prefix. For example, %12, %2, %44. +

    + +LLVM requires the values start with a '%' sign for two reasons: Compilers don't need to worry about name clashes with reserved words, and the set of reserved words may be expanded in the future without penalty. Additionally, unnamed identifiers allow a compiler to quickly come up with a temporary variable without having to avoid symbol table conflicts.

    + +Reserved words in LLVM are very similar to reserved words in other languages. There are keywords for different opcodes ('add', 'cast', 'ret', etc...), for primitive type names ('void', 'uint', etc...), and others. These reserved words cannot conflict with variable names, because none of them may start with a '%' character.

    + +Here is an example of LLVM code to multiply the integer variable '%X' by 8:

    + +The easy way: +

    +  %result = mul int %X, 8
    +
    + +After strength reduction: +
    +  %result = shl int %X, ubyte 3
    +
    + +And the hard way: +
    +  add int %X, %X           ; yields {int}:%0
    +  add int %0, %0           ; yields {int}:%1
    +  %result = add int %1, %1
    +
    + +This last way of multiplying %X by 8 illustrates several important lexical features of LLVM:

    + +

      +
    1. Comments are delimited with a ';' and go until the end of line. +
    2. Unnamed temporaries are created when the result of a computation is not assigned to a named value. +
    3. Unnamed temporaries are numbered sequentially +

    + +...and it also show a convention that we follow in this document. When demonstrating instructions, we will follow an instruction with a comment that defines the type and name of value produced. Comments are shown in italic text.

    + + + + +

+Type System +
    + + +The LLVM type system is important to the overall usefulness of the language and VM runtime. By being strongly typed, a number of optimizations may be performed on the IR directly, without having to do extra analysis to derive types. A strong type system also makes it easier to comprehend generated code and assists with safety concerns.

    + +The assembly language form for the type system was heavily influenced by the type problems in the C language1.

    + + + + +

   +Primitive Types +
    + +The primitive types are the fundemental building blocks of the LLVM system. The current set of primitive types are as follows:

    + +
    + + + + + + + + + +
    void No value
    ubyte Unsigned 8 bit value
    ushortUnsigned 16 bit value
    uint Unsigned 32 bit value
    ulong Unsigned 64 bit value
    float 32 bit floating point value
    label Branch destination
    + +
    + + + + + + + + + +
    bool True or False value
    sbyte Signed 8 bit value
    short Signed 16 bit value
    int Signed 32 bit value
    long Signed 64 bit value
    double64 bit floating point value
    lock Recursive mutex value
    + +

    + + + + +


Type Classifications

    + +These different primitive types fall into a few useful classifications:

    + + + + + + + +
    signed sbyte, short, int, long, float, double
    unsignedubyte, ushort, uint, ulong
    integralubyte, sbyte, ushort, short, uint, int, ulong, long
    floating pointfloat, double
    first classbool, ubyte, sbyte, ushort, short, uint, int, ulong, long, float, double, lock

    + + + + + + +

   +Derived Types +
    + +The real power in LLVM comes from the derived types in the system. This is what allows a programmer to represent arrays, methods, pointers, and other useful types. Note that these derived types may be recursive: For example, it is possible to have a two dimensional array.

    + + + + +


Array Type


Method Type

    + +
    Overview:
    + +The method type can be thought of as a method signature. It consists of a return type and a list of formal parameter types. Method types are usually used when to build virtual function tables (which are structures of pointers to methods) and for indirect method calls.

    + +

    Syntax:
    +
    +  <returntype> (<parameter list>)
    +
    + +Where '<parameter list>' is a comma seperated list of type specifiers.

    + +

    Examples:
    +
      + + + +
      int (int): method taking an int, returning an int
      float (int, int *) *: Pointer to a method that takes an int and a pointer to int, returning float.
      +
    + + + + +


Structure Type


Pointer Type

    + + + +


Packed Type

    + +Mention/decide that packed types work with saturation or not. Maybe have a packed+saturated type in addition to just a packed type.

    + +Packed types should be 'nonsaturated' because standard data types are not saturated. Maybe have a saturated packed type?

    + + + +

+High Level Structure +
    + + + + +
   +Module Structure +
    + + +talk about the elements of a module: constant pool and method list.

    + + + +

   +Method Structure +
    + + +talk about the constant pool

    +talk about how basic blocks delinate labels

    +talk about how basic blocks end with terminators

    + + + +

+Instruction Reference +
    + + +List all of the instructions, list valid types that they accept. Tell what they +do and stuff also. + + +
   +Terminator Instructions +
    + + + +As was mentioned previously, every basic block in +a program ends with a "Terminator" instruction. Additionally, all terminators yield a 'void' value: they produce control flow, not values.

    + +There are three different terminator instructions: the 'ret' instruction, the 'br' instruction, and the 'switch' instruction.

    + + + +


'ret' Instruction


'br' Instruction


'switch' Instruction


'call .. with' Instruction

   +Unary Operations +
    + +Unary operators are used to do a simple operation to a single value.

    + +There are two different unary operators: the 'not' instruction and the 'cast' instruction.

    + + + +


'not' Instruction


'cast .. to' Instruction

   +Binary Operations +


'add' Instruction


'sub' Instruction


'mul' Instruction


'div' Instruction


'rem' Instruction


'setcc' Instructions

   +Bitwise Binary Operations +
    + +Bitwise binary operators are used to do various forms of bit-twiddling in a program. They are generally very efficient instructions, and can commonly be strength reduced from other instructions. They require two operands, execute an operation on them, and produce a single value. The resulting value of the bitwise binary operators is always the same type as its first operand.

    + + +


'and' Instruction


'or' Instruction


'xor' Instruction


'shl' Instruction


'shr' Instruction

   +Memory Access Operations +
    + +Accessing memory in SSA form is, well, sticky at best. This section describes how to read and write memory in LLVM.

    + + + +


'malloc' Instruction


'free' Instruction


'alloca' Instruction


'load' Instruction


'store' Instruction


'getfieldptr' Instruction

    + +
    Syntax:
    +
    +
    +
    + +
    Overview:
    + +getfield takes a structure pointer, and an unsigned byte. It returns a pointer to the specified element, of the correct type. At the implementation level, this would be compiled down to an addition of a constant int. + +
    Arguments:
    + + +
    Semantics:
    + + +
    Example:
    +
    +
    +
    + + + + +
   +Other Operations +
    + +The instructions in this catagory are the "miscellaneous" functions, that defy better classification.

    + + + +


'call' Instruction

    + +
    Syntax:
    +
    +
    +
    + +
    Overview:
    + + +
    Arguments:
    + + +
    Semantics:
    + + +
    Example:
    +
    +  %retval = call int %test(int %argc)
    +
    + + +


'icall' Instruction

    + +Indirect calls are desperately needed to implement virtual function tables (C++, java) and function pointers (C, C++, ...).

    + +A new instruction icall or similar should be introduced to represent an indirect call.

    + +Example: +

    +  %retval = icall int %funcptr(int %arg1)          ; yields {int}:%retval
    +
    + + + + +


'phi' Instruction

    + +
    Syntax:
    +
    +
    + +
    Overview:
    + + +
    Arguments:
    + + +
    Semantics:
    + + +
    Example:
    +
    +
    + + + +
   +Builtin Functions +
+TODO List +
    + + +This list of random topics includes things that will need to be addressed before the llvm may be used to implement a java like langauge. Right now, it is pretty much useless for any language, given to unavailable of structure types

    + + +


Synchronization Instructions

    + +We will need some type of synchronization instructions to be able to implement stuff in Java well. The way I currently envision doing this is to introduce a 'lock' type, and then add two (builtin or instructions) operations to lock and unlock the lock.

    + + + +

+Possible Extensions +


'tailcall' Instruction


Global Variables

    + +In order to represent programs written in languages like C, we need to be able to support variables at the module (global) scope. Perhaps they should be written outside of the module definition even. Maybe global functions should be handled like this as well.

    + + + +


Explicit Parrellelism

    + +With the rise of massively parrellel architectures (like the IA64 architecture, multithreaded CPU cores, and SIMD data sets) it is becoming increasingly more important to extract all of the ILP from a code stream possible. It would be interesting to research encoding methods that can explicitly represent this. One straightforward way to do this would be to introduce a "stop" instruction that is equilivent to the IA64 stop bit.

    + + + + +

+Related Work +


Vectorized Architectures

+ + + +
+ +
Chris Lattner
+ + +Last modified: Thu May 31 17:36:39 CDT 2001 + +
+ diff --git a/getsomesrcs.sh b/getsomesrcs.sh new file mode 100755 index 00000000000..1d15db93a9c --- /dev/null +++ b/getsomesrcs.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# This script prints out some of the source files that are useful when +# editing. I use this like this: xemacs `./getsomesrcs.sh` & +./getsrcs.sh | grep -v Assembly | grep -v Byte | grep -v \\.ll | grep -v tools | grep -v Makefile | grep -v Opt | grep -v CommandLi | grep -v String | grep -v DataType + diff --git a/getsrcs.sh b/getsrcs.sh new file mode 100755 index 00000000000..e9b45974840 --- /dev/null +++ b/getsrcs.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# This is useful because it prints out all of the source files. Useful for +# greps. +find . -name \*.\[chyl\]\* | grep -v Lexer.cpp | grep -v llvmAsmParser.cpp | grep -v llvmAsmParser.h | grep -v '~$' | grep -v '\.ll$' | grep -v test | grep -v .flc diff --git a/include/llvm/Analysis/ModuleAnalyzer.h b/include/llvm/Analysis/ModuleAnalyzer.h new file mode 100644 index 00000000000..3abdd49afe3 --- /dev/null +++ b/include/llvm/Analysis/ModuleAnalyzer.h @@ -0,0 +1,113 @@ +//===-- llvm/Analysis/ModuleAnalyzer.h - Module analysis driver --*- C++ -*-==// +// +// This class provides a nice interface to traverse a module in a predictable +// way. This is used by the AssemblyWriter, BytecodeWriter, and SlotCalculator +// to do analysis of a module. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MODULEANALYZER_H +#define LLVM_ANALYSIS_MODULEANALYZER_H + +#include "llvm/ConstantPool.h" +#include + +class Module; +class Method; +class BasicBlock; +class Instruction; +class ConstPoolVal; +class MethodType; +class MethodArgument; + +class ModuleAnalyzer { + ModuleAnalyzer(const ModuleAnalyzer &); // do not impl + const ModuleAnalyzer &operator=(const ModuleAnalyzer &); // do not impl +public: + ModuleAnalyzer() {} + virtual ~ModuleAnalyzer() {} + +protected: + // processModule - Driver function to call all of my subclasses virtual + // methods. Commonly called by derived type's constructor. + // + bool processModule(const Module *M); + + //===--------------------------------------------------------------------===// + // Stages of processing Module level information + // + virtual bool processConstPool(const ConstantPool &CP, bool isMethod); + + // processType - This callback occurs when an derived type is discovered + // at the class level. This activity occurs when processing a constant pool. + // + virtual bool processType(const Type *Ty) { return false; } + + // processMethods - The default implementation of this method loops through + // all of the methods in the module and processModule's them. + // + virtual bool processMethods(const Module *M); + + //===--------------------------------------------------------------------===// + // Stages of processing a constant pool + // + + // processConstPoolPlane - Called once for every populated plane in the + // constant pool. The default action is to do nothing. The processConstPool + // method does the iteration over constants. + // + virtual bool processConstPoolPlane(const ConstantPool &CP, + const ConstantPool::PlaneType &Pl, + bool isMethod) { + return false; + } + + // processConstant is called once per each constant in the constant pool. It + // traverses the constant pool such that it visits each constant in the + // order of its type. Thus, all 'int' typed constants shall be visited + // sequentially, etc... + // + virtual bool processConstant(const ConstPoolVal *CPV) { return false; } + + // visitMethod - This member is called after the constant pool has been + // processed. The default implementation of this is a noop. + // + virtual bool visitMethod(const Method *M) { return false; } + + //===--------------------------------------------------------------------===// + // Stages of processing Method level information + // + // (processConstPool is also used above, with the isMethod flag set to true) + // + + // processMethod - Process all aspects of a method. + // + virtual bool processMethod(const Method *M); + + // processMethodArgument - This member is called for every argument that + // is passed into the method. + // + virtual bool processMethodArgument(const MethodArgument *MA) { return false; } + + // processBasicBlock - This member is called for each basic block in a methd. + // + virtual bool processBasicBlock(const BasicBlock *BB); + + //===--------------------------------------------------------------------===// + // Stages of processing BasicBlock level information + // + + // preProcessInstruction - This member is called for each Instruction in a + // method before processInstruction. + // + virtual bool preProcessInstruction(const Instruction *I); + + // processInstruction - This member is called for each Instruction in a method + // + virtual bool processInstruction(const Instruction *I) { return false; } + +private: + bool handleType(set &TypeSet, const Type *T); +}; + +#endif diff --git a/include/llvm/Analysis/Verifier.h b/include/llvm/Analysis/Verifier.h new file mode 100644 index 00000000000..2feadca7798 --- /dev/null +++ b/include/llvm/Analysis/Verifier.h @@ -0,0 +1,28 @@ +//===-- llvm/Analysis/Verifier.h - Module Verifier ---------------*- C++ -*-==// +// +// This file defines the method verifier interface, that can be used for some +// sanity checking of input to the system. +// +// Note that this does not provide full 'java style' security and verifications, +// instead it just tries to ensure that code is well formed. +// +// To see what specifically is checked, look at the top of Verifier.cpp +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VERIFIER_H +#define LLVM_ANALYSIS_VERIFIER_H + +#include +#include +class Module; +class Method; + +// verify - Check a module or method for validity. If errors are detected, +// error messages corresponding to the problem are added to the errorMsgs +// vectors, and a value of true is returned. +// +bool verify(const Module *M, vector &ErrorMsgs); +bool verify(const Method *M, vector &ErrorMsgs); + +#endif diff --git a/include/llvm/Assembly/Parser.h b/include/llvm/Assembly/Parser.h new file mode 100644 index 00000000000..5ac6ec20fa1 --- /dev/null +++ b/include/llvm/Assembly/Parser.h @@ -0,0 +1,66 @@ +//===-- llvm/assembly/Parser.h - Parser for VM assembly files ----*- C++ -*--=// +// +// These classes are implemented by the lib/AssemblyParser library. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ASSEMBLY_PARSER_H +#define LLVM_ASSEMBLY_PARSER_H + +#include + +class Module; +class ToolCommandLine; +class ParseException; + + +// The useful interface defined by this file... Parse an ascii file, and return +// the internal representation in a nice slice'n'dice'able representation. +// +Module *ParseAssemblyFile(const ToolCommandLine &Opts) throw (ParseException); + +//===------------------------------------------------------------------------=== +// Helper Classes +//===------------------------------------------------------------------------=== + +// ParseException - For when an exceptional event is generated by the parser. +// This class lets you print out the exception message +// +class ParseException { +public: + ParseException(const ToolCommandLine &Opts, const string &message, + int LineNo = -1, int ColNo = -1); + + ParseException(const ParseException &E); + + // getMessage - Return the message passed in at construction time plus extra + // information extracted from the options used to parse with... + // + const string getMessage() const; + + inline const string getRawMessage() const { // Just the raw message... + return Message; + } + + inline const ToolCommandLine &getOptions() const { + return Opts; // Get the options obj used to parse. + } + + // getErrorLocation - Return the line and column number of the error in the + // input source file. The source filename can be derived from the + // ParserOptions in effect. If positional information is not applicable, + // these will return a value of -1. + // + inline const void getErrorLocation(int &Line, int &Column) const { + Line = LineNo; Column = ColumnNo; + } + +private : + const ToolCommandLine &Opts; + string Message; + int LineNo, ColumnNo; // -1 if not relevant + + ParseException &operator=(const ParseException &E); // objects by reference +}; + +#endif diff --git a/include/llvm/Assembly/Writer.h b/include/llvm/Assembly/Writer.h new file mode 100644 index 00000000000..71e2f4e0543 --- /dev/null +++ b/include/llvm/Assembly/Writer.h @@ -0,0 +1,79 @@ +//===-- llvm/assembly/Writer.h - Printer for VM assembly files ---*- C++ -*--=// +// +// This functionality is implemented by the lib/AssemblyWriter library. +// This library is used to print VM assembly language files to an iostream. It +// can print VM code at a variety of granularities, ranging from a whole class +// down to an individual instruction. This makes it useful for debugging. +// +// This library uses the Analysis library to figure out offsets for +// variables in the method tables... +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ASSEMBLY_WRITER_H +#define LLVM_ASSEMBLY_WRITER_H + +#include +#include "llvm/Type.h" + +class Module; +class Method; +class BasicBlock; +class Instruction; + +// The only interface defined by this file... convert the internal +// representation of an object into an ascii bytestream that the parser can +// understand later... (the parser only understands whole classes though) +// +void WriteToAssembly(const Module *Module, ostream &o); +void WriteToAssembly(const Method *Method, ostream &o); +void WriteToAssembly(const BasicBlock *BB, ostream &o); +void WriteToAssembly(const Instruction *In, ostream &o); +void WriteToAssembly(const ConstPoolVal *V, ostream &o); + + + +// Define operator<< to work on the various classes that we can send to an +// ostream... +// +inline ostream &operator<<(ostream &o, const Module *C) { + WriteToAssembly(C, o); return o; +} + +inline ostream &operator<<(ostream &o, const Method *M) { + WriteToAssembly(M, o); return o; +} + +inline ostream &operator<<(ostream &o, const BasicBlock *B) { + WriteToAssembly(B, o); return o; +} + +inline ostream &operator<<(ostream &o, const Instruction *I) { + WriteToAssembly(I, o); return o; +} + +inline ostream &operator<<(ostream &o, const ConstPoolVal *I) { + WriteToAssembly(I, o); return o; +} + + +inline ostream &operator<<(ostream &o, const Type *T) { + if (!T) return o << ""; + return o << T->getName(); +} + +inline ostream &operator<<(ostream &o, const Value *I) { + switch (I->getValueType()) { + case Value::TypeVal: return o << (const Type*)I; + case Value::ConstantVal: WriteToAssembly((const ConstPoolVal*)I, o); break; + case Value::MethodArgumentVal: return o <getType() << " " << I->getName(); + case Value::InstructionVal: WriteToAssembly((const Instruction *)I, o); break; + case Value::BasicBlockVal: WriteToAssembly((const BasicBlock *)I, o); break; + case Value::MethodVal: WriteToAssembly((const Method *)I, o); break; + case Value::ModuleVal: WriteToAssembly((const Module *)I, o); break; + default: return o << "getValueType() << ">"; + } + return o; +} + +#endif diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h new file mode 100644 index 00000000000..6873ef2deb7 --- /dev/null +++ b/include/llvm/BasicBlock.h @@ -0,0 +1,246 @@ +//===-- llvm/BasicBlock.h - Represent a basic block in the VM ----*- C++ -*--=// +// +// This file contains the declaration of the BasicBlock class, which represents +// a single basic block in the VM. +// +// Note that basic blocks themselves are Def's, because they are referenced +// by instructions like branches and can go in switch tables and stuff... +// +// This may see wierd at first, but it's really pretty cool. :) +// +//===----------------------------------------------------------------------===// +// +// Note that well formed basic blocks are formed of a list of instructions +// followed by a single TerminatorInst instruction. TerminatorInst's may not +// occur in the middle of basic blocks, and must terminate the blocks. +// +// This code allows malformed basic blocks to occur, because it may be useful +// in the intermediate stage of analysis or modification of a program. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BASICBLOCK_H +#define LLVM_BASICBLOCK_H + +#include "llvm/Value.h" // Get the definition of Value +#include "llvm/ValueHolder.h" +#include "llvm/InstrTypes.h" +#include + +class Instruction; +class Method; +class BasicBlock; +class TerminatorInst; + +typedef UseTy BasicBlockUse; + +class BasicBlock : public Value { // Basic blocks are data objects also +public: + typedef ValueHolder InstListType; +private : + InstListType InstList; + + friend class ValueHolder; + void setParent(Method *parent); + +public: + BasicBlock(const string &Name = "", Method *Parent = 0); + ~BasicBlock(); + + // Specialize setName to take care of symbol table majik + virtual void setName(const string &name); + + const Method *getParent() const { return (const Method*)InstList.getParent();} + Method *getParent() { return (Method*)InstList.getParent(); } + + const InstListType &getInstList() const { return InstList; } + InstListType &getInstList() { return InstList; } + + // getTerminator() - If this is a well formed basic block, then this returns + // a pointer to the terminator instruction. If it is not, then you get a null + // pointer back. + // + TerminatorInst *getTerminator(); + const TerminatorInst *const getTerminator() const; + + // hasConstantPoolReferences() - This predicate is true if there is a + // reference to this basic block in the constant pool for this method. For + // example, if a block is reached through a switch table, that table resides + // in the constant pool, and the basic block is reference from it. + // + bool hasConstantPoolReferences() const; + + // dropAllReferences() - This function causes all the subinstructions to "let + // go" of all references that they are maintaining. This allows one to + // 'delete' a whole class at a time, even though there may be circular + // references... first all references are dropped, and all use counts go to + // zero. Then everything is delete'd for real. Note that no operations are + // valid on an object that has "dropped all references", except operator + // delete. + // + void dropAllReferences(); + + // splitBasicBlock - This splits a basic block into two at the specified + // instruction. Note that all instructions BEFORE the specified iterator stay + // as part of the original basic block, an unconditional branch is added to + // the new BB, and the rest of the instructions in the BB are moved to the new + // BB, including the old terminator. The newly formed BasicBlock is returned. + // This function invalidates the specified iterator. + // + // Note that this only works on well formed basic blocks (must have a + // terminator), and 'I' must not be the end of instruction list (which would + // cause a degenerate basic block to be formed, having a terminator inside of + // the basic block). + // + BasicBlock *splitBasicBlock(InstListType::iterator I); + + //===--------------------------------------------------------------------===// + // Predecessor iterator code + //===--------------------------------------------------------------------===// + // + // This is used to figure out what basic blocks we could be coming from. + // + + // Forward declare iterator class template... + template class PredIterator; + + typedef PredIterator pred_iterator; + typedef PredIterator pred_const_iterator; + + inline pred_iterator pred_begin() ; + inline pred_const_iterator pred_begin() const; + inline pred_iterator pred_end() ; + inline pred_const_iterator pred_end() const; + + //===--------------------------------------------------------------------===// + // Successor iterator code + //===--------------------------------------------------------------------===// + // + // This is used to figure out what basic blocks we could be going to... + // + + // Forward declare iterator class template... + template class SuccIterator; + + typedef SuccIterator succ_iterator; + typedef SuccIterator succ_const_iterator; + + inline succ_iterator succ_begin() ; + inline succ_const_iterator succ_begin() const; + inline succ_iterator succ_end() ; + inline succ_const_iterator succ_end() const; + + //===--------------------------------------------------------------------===// + // END of interesting code... + //===--------------------------------------------------------------------===// + // + // Thank god C++ compilers are good at stomping out tons of templated code... + // + template // Predecessor Iterator + class PredIterator { + const _Ptr ThisBB; + _USE_iterator It; + public: + typedef PredIterator<_Ptr,_USE_iterator> _Self; + + typedef bidirectional_iterator_tag iterator_category; + typedef _Ptr pointer; + + inline PredIterator(_Ptr BB) : ThisBB(BB), It(BB->use_begin()) {} + inline PredIterator(_Ptr BB, bool) : ThisBB(BB), It(BB->use_end()) {} + + inline bool operator==(const _Self& x) const { return It == x.It; } + inline bool operator!=(const _Self& x) const { return !operator==(x); } + + inline pointer operator*() const { + assert ((*It)->getValueType() == Value::InstructionVal); + return ((Instruction *)(*It))->getParent(); + } + inline pointer *operator->() const { return &(operator*()); } + + inline _Self& operator++() { // Preincrement + do { // Loop to ignore constant pool references + ++It; + } while (It != ThisBB->use_end() && + ((*It)->getValueType() != Value::ConstantVal)); + + // DOES THIS WORK??? + //((*It)->getValueType() != Value::BasicBlockVal)); + return *this; + } + + inline _Self operator++(int) { // Postincrement + _Self tmp = *this; ++*this; return tmp; + } + + inline _Self& operator--() { --It; return *this; } // Predecrement + inline _Self operator--(int) { // Postdecrement + _Self tmp = *this; --*this; return tmp; + } + }; + + template // Successor Iterator + class SuccIterator { + const _Term Term; + unsigned idx; + public: + typedef SuccIterator<_Term, _BB> _Self; + typedef forward_iterator_tag iterator_category; + typedef _BB pointer; + + inline SuccIterator(_Term T) : Term(T), idx(0) {} // begin iterator + inline SuccIterator(_Term T, bool) + : Term(T), idx(Term->getNumSuccessors()) {} // end iterator + + inline bool operator==(const _Self& x) const { return idx == x.idx; } + inline bool operator!=(const _Self& x) const { return !operator==(x); } + + inline pointer operator*() const { return Term->getSuccessor(idx); } + inline pointer *operator->() const { return &(operator*()); } + + inline _Self& operator++() { ++idx; return *this; } // Preincrement + inline _Self operator++(int) { // Postincrement + _Self tmp = *this; ++*this; return tmp; + } + + inline _Self& operator--() { --idx; return *this; } // Predecrement + inline _Self operator--(int) { // Postdecrement + _Self tmp = *this; --*this; return tmp; + } + }; +}; + + +//===--------------------------------------------------------------------===// +// Implement some stuff prototyped above... +//===--------------------------------------------------------------------===// + +inline BasicBlock::pred_iterator BasicBlock::pred_begin() { + return pred_iterator(this); +} +inline BasicBlock::pred_const_iterator BasicBlock::pred_begin() const { + return pred_const_iterator(this); +} +inline BasicBlock::pred_iterator BasicBlock::pred_end() { + return pred_iterator(this,true); +} +inline BasicBlock::pred_const_iterator BasicBlock::pred_end() const { + return pred_const_iterator(this,true); +} + +inline BasicBlock::succ_iterator BasicBlock::succ_begin() { + return succ_iterator(getTerminator()); +} +inline BasicBlock::succ_const_iterator BasicBlock::succ_begin() const { + return succ_const_iterator(getTerminator()); +} +inline BasicBlock::succ_iterator BasicBlock::succ_end() { + return succ_iterator(getTerminator(),true); +} +inline BasicBlock::succ_const_iterator BasicBlock::succ_end() const { + return succ_const_iterator(getTerminator(),true); +} + +#endif diff --git a/include/llvm/Bytecode/Format.h b/include/llvm/Bytecode/Format.h new file mode 100644 index 00000000000..a87f8d18a62 --- /dev/null +++ b/include/llvm/Bytecode/Format.h @@ -0,0 +1,33 @@ +//===-- llvm/Bytecode/Format.h - VM bytecode file format info ----*- C++ -*--=// +// +// This header defines intrinsic constants that are useful to libraries that +// need to hack on bytecode files directly, like the reader and writer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BYTECODE_FORMAT_H +#define LLVM_BYTECODE_FORMAT_H + +class BytecodeFormat { // Throw the constants into a poorman's namespace... + BytecodeFormat(); // do not implement +public: + + // ID Numbers that are used in bytecode files... + enum FileBlockIDs { + // File level identifiers... + Module = 0x01, + + // Module subtypes: + Method = 0x11, + ConstantPool, + SymbolTable, + ModuleGlobalInfo, + + // Method subtypes: + MethodInfo = 0x21, + // Can also have ConstantPool block + // Can also have SymbolTable block + BasicBlock = 0x31, // May contain many basic blocks + }; +}; +#endif diff --git a/include/llvm/Bytecode/Primitives.h b/include/llvm/Bytecode/Primitives.h new file mode 100644 index 00000000000..f4b232b3687 --- /dev/null +++ b/include/llvm/Bytecode/Primitives.h @@ -0,0 +1,237 @@ +//===-- llvm/Bytecode/Primitives.h - Bytecode file format prims --*- C++ -*--=// +// +// This header defines some basic functions for reading and writing basic +// primitive types to a bytecode stream. +// +// Using the routines defined in this file does not require linking to any +// libraries, as all of the services are small self contained units that are to +// be inlined as neccesary. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BYTECODE_PRIMITIVES_H +#define LLVM_BYTECODE_PRIMITIVES_H + +#include "llvm/Tools/DataTypes.h" +#include +#include + +//===----------------------------------------------------------------------===// +// Reading Primitives +//===----------------------------------------------------------------------===// + +static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf, + unsigned &Result) { + if (Buf+4 > EndBuf) return true; +#ifdef LITTLE_ENDIAN + Result = *(unsigned*)Buf; +#else + Result = Buf[0] | (Buf[1] << 8) | (Buf[2] << 16) | (Buf[3] << 24); +#endif + Buf += 4; + return false; +} + +static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf, + uint64_t &Result) { + if (Buf+8 > EndBuf) return true; + +#ifdef LITTLE_ENDIAN + Result = *(uint64_t*)Buf; +#else + Result = Buf[0] | (Buf[1] << 8) | (Buf[2] << 16) | (Buf[3] << 24) | + ((uint64_t)(Buf[4] | (Buf[5] << 8) | (Buf[6] << 16) | (Buf[7] << 24)) <<32); +#endif + Buf += 8; + return false; +} + +static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf, + int &Result) { + return read(Buf, EndBuf, (unsigned &)Result); +} + +static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf, + int64_t &Result) { + return read(Buf, EndBuf, (uint64_t &)Result); +} + + +// read_vbr - Read an unsigned integer encoded in variable bitrate format. +// +static inline bool read_vbr(const unsigned char *&Buf, + const unsigned char *EndBuf, unsigned &Result) { + unsigned Shift = Result = 0; + + do { + Result |= (unsigned)((*Buf++) & 0x7F) << Shift; + Shift += 7; + } while (Buf[-1] & 0x80 && Buf < EndBuf); + + return Buf > EndBuf; +} + +static inline bool read_vbr(const unsigned char *&Buf, + const unsigned char *EndBuf, uint64_t &Result) { + unsigned Shift = 0; Result = 0; + + do { + Result |= (uint64_t)((*Buf++) & 0x7F) << Shift; + Shift += 7; + } while (Buf[-1] & 0x80 && Buf < EndBuf); + return Buf > EndBuf; +} + +// read_vbr (signed) - Read a signed number stored in sign-magnitude format +static inline bool read_vbr(const unsigned char *&Buf, + const unsigned char *EndBuf, int &Result) { + unsigned R; + if (read_vbr(Buf, EndBuf, R)) return true; + if (R & 1) + Result = -(int)(R >> 1); + else + Result = (int)(R >> 1); + + return false; +} + + +static inline bool read_vbr(const unsigned char *&Buf, + const unsigned char *EndBuf, int64_t &Result) { + uint64_t R; + if (read_vbr(Buf, EndBuf, R)) return true; + if (R & 1) + Result = -(int64_t)(R >> 1); + else + Result = (int64_t)(R >> 1); + + return false; +} + +// align32 - Round up to multiple of 32 bits... +static inline bool align32(const unsigned char *&Buf, + const unsigned char *EndBuf) { + Buf = (const unsigned char *)((unsigned long)(Buf+3) & (~3UL)); + return Buf > EndBuf; +} + +static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf, + string &Result, bool Aligned = true) { + unsigned Size; + if (read_vbr(Buf, EndBuf, Size)) return true; // Failure reading size? + if (Buf+Size > EndBuf) return true; // Size invalid? + + Result = string((char*)Buf, Size); + Buf += Size; + + if (Aligned) // If we should stay aligned do so... + if (align32(Buf, EndBuf)) return true; // Failure aligning? + + return false; +} + + +//===----------------------------------------------------------------------===// +// Writing Primitives +//===----------------------------------------------------------------------===// + +// output - If a position is specified, it must be in the valid portion of the +// string... note that this should be inlined always so only the relevant IF +// body should be included... +// +static inline void output(unsigned i, vector &Out, int pos = -1){ +#ifdef LITTLE_ENDIAN + if (pos == -1) + Out.insert(Out.end(), (unsigned char*)&i, (unsigned char*)&i+4); + else + *(unsigned*)&Out[pos] = i; +#else + if (pos == -1) { // Be endian clean, little endian is our friend + Out.push_back((unsigned char)i); + Out.push_back((unsigned char)(i >> 8)); + Out.push_back((unsigned char)(i >> 16)); + Out.push_back((unsigned char)(i >> 24)); + } else { + Out[pos ] = (unsigned char)i; + Out[pos+1] = (unsigned char)(i >> 8); + Out[pos+2] = (unsigned char)(i >> 16); + Out[pos+3] = (unsigned char)(i >> 24); + } +#endif +} + +static inline void output(int i, vector &Out) { + output((unsigned)i, Out); +} + +// output_vbr - Output an unsigned value, by using the least number of bytes +// possible. This is useful because many of our "infinite" values are really +// very small most of the time... but can be large a few times... +// +// Data format used: If you read a byte with the night bit set, use the low +// seven bits as data and then read another byte... +// +// Note that using this may cause the output buffer to become unaligned... +// +static inline void output_vbr(uint64_t i, vector &out) { + while (1) { + if (i < 0x80) { // done? + out.push_back((unsigned char)i); // We know the high bit is clear... + return; + } + + // Nope, we are bigger than a character, output the next 7 bits and set the + // high bit to say that there is more coming... + out.push_back(0x80 | (i & 0x7F)); + i >>= 7; // Shift out 7 bits now... + } +} + +static inline void output_vbr(unsigned i, vector &out) { + while (1) { + if (i < 0x80) { // done? + out.push_back((unsigned char)i); // We know the high bit is clear... + return; + } + + // Nope, we are bigger than a character, output the next 7 bits and set the + // high bit to say that there is more coming... + out.push_back(0x80 | (i & 0x7F)); + i >>= 7; // Shift out 7 bits now... + } +} + +static inline void output_vbr(int64_t i, vector &out) { + if (i < 0) + output_vbr(((uint64_t)(-i) << 1) | 1, out); // Set low order sign bit... + else + output_vbr((uint64_t)i << 1, out); // Low order bit is clear. +} + + +static inline void output_vbr(int i, vector &out) { + if (i < 0) + output_vbr(((unsigned)(-i) << 1) | 1, out); // Set low order sign bit... + else + output_vbr((unsigned)i << 1, out); // Low order bit is clear. +} + +// align32 - emit the minimal number of bytes that will bring us to 32 bit +// alignment... +// +static inline void align32(vector &Out) { + int NumPads = (4-(Out.size() & 3)) & 3; // Bytes to get padding to 32 bits + while (NumPads--) Out.push_back((unsigned char)0xAB); +} + +static inline void output(const string &s, vector &Out, + bool Aligned = true) { + unsigned Len = s.length(); + output_vbr(Len, Out); // Strings may have an arbitrary length... + Out.insert(Out.end(), s.begin(), s.end()); + + if (Aligned) + align32(Out); // Make sure we are now aligned... +} + +#endif diff --git a/include/llvm/Bytecode/Reader.h b/include/llvm/Bytecode/Reader.h new file mode 100644 index 00000000000..17ddfc8fb8b --- /dev/null +++ b/include/llvm/Bytecode/Reader.h @@ -0,0 +1,24 @@ +//===-- llvm/Bytecode/Reader.h - Reader for VM bytecode files ----*- C++ -*--=// +// +// This functionality is implemented by the lib/BytecodeReader library. +// This library is used to read VM bytecode files from an iostream. +// +// Note that performance of this library is _crucial_ for performance of the +// JIT type applications, so we have designed the bytecode format to support +// quick reading. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BYTECODE_READER_H +#define LLVM_BYTECODE_READER_H + +#include + +class Module; + +// Parse and return a class... +// +Module *ParseBytecodeFile(const string &Filename); +Module *ParseBytecodeBuffer(const char *Buffer, unsigned BufferSize); + +#endif diff --git a/include/llvm/Bytecode/Writer.h b/include/llvm/Bytecode/Writer.h new file mode 100644 index 00000000000..e28ea77f07b --- /dev/null +++ b/include/llvm/Bytecode/Writer.h @@ -0,0 +1,25 @@ +//===-- llvm/Bytecode/Writer.h - Writer for VM bytecode files ----*- C++ -*--=// +// +// This functionality is implemented by the lib/BytecodeWriter library. +// This library is used to write VM bytecode files to an iostream. First, you +// have to make a BytecodeStream object, which you can then put a class into +// by using operator <<. +// +// This library uses the Analysis library to figure out offsets for +// variables in the method tables... +// +// Note that performance of this library is not as crucial as performance of the +// bytecode reader (which is to be used in JIT type applications), so we have +// designed the bytecode format to support quick reading. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BYTECODE_WRITER_H +#define LLVM_BYTECODE_WRITER_H + +#include + +class Module; +void WriteBytecodeToFile(const Module *C, ostream &Out); + +#endif diff --git a/include/llvm/ConstPoolVals.h b/include/llvm/ConstPoolVals.h new file mode 100644 index 00000000000..dbdda62f92d --- /dev/null +++ b/include/llvm/ConstPoolVals.h @@ -0,0 +1,234 @@ +//===-- llvm/ConstPoolVals.h - Constant Value nodes --------------*- C++ -*--=// +// +// This file contains the declarations for the ConstPoolVal class and all of +// its subclasses, which represent the different type of constant pool values +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CONSTPOOLVALS_H +#define LLVM_CONSTPOOLVALS_H + +#include "llvm/User.h" +#include "llvm/SymTabValue.h" +#include "llvm/Tools/DataTypes.h" +#include + +class ArrayType; +class StructType; + +//===----------------------------------------------------------------------===// +// ConstPoolVal Class +//===----------------------------------------------------------------------===// + +class ConstPoolVal; +typedef UseTy ConstPoolUse; + +class ConstPoolVal : public User { + SymTabValue *Parent; + + friend class ValueHolder; + inline void setParent(SymTabValue *parent) { + Parent = parent; + } + +public: + inline ConstPoolVal(const Type *Ty, const string &Name = "") + : User(Ty, Value::ConstantVal, Name) { Parent = 0; } + + // Specialize setName to handle symbol table majik... + virtual void setName(const string &name); + + // Static constructor to create a '0' constant of arbitrary type... + static ConstPoolVal *getNullConstant(const Type *Ty); + + // clone() - Create a copy of 'this' value that is identical in all ways + // except the following: + // * The value has no parent + // * The value has no name + // + virtual ConstPoolVal *clone() const = 0; + + virtual string getStrValue() const = 0; + virtual bool equals(const ConstPoolVal *V) const = 0; + + inline const SymTabValue *getParent() const { return Parent; } + inline SymTabValue *getParent() { return Parent; } + + // if i > the number of operands, then getOperand() returns 0, and setOperand + // returns false. setOperand() may also return false if the operand is of + // the wrong type. + // + // Note that some subclasses may change this default no argument behavior + // + virtual Value *getOperand(unsigned i) { return 0; } + virtual const Value *getOperand(unsigned i) const { return 0; } + virtual bool setOperand(unsigned i, Value *Val) { return false; } + virtual void dropAllReferences() {} +}; + + + +//===----------------------------------------------------------------------===// +// Classes to represent constant pool variable defs +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------------- +// ConstPoolBool - Boolean Values +// +class ConstPoolBool : public ConstPoolVal { + bool Val; + ConstPoolBool(const ConstPoolBool &CP); +public: + ConstPoolBool(bool V, const string &Name = ""); + + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + virtual ConstPoolVal *clone() const { return new ConstPoolBool(*this); } + + inline bool getValue() const { return Val; } + + // setValue - Be careful... if there is more than one 'use' of this node, then + // they will ALL see the value that you set... + // + inline void setValue(bool v) { Val = v; } +}; + + +//===--------------------------------------------------------------------------- +// ConstPoolSInt - Signed Integer Values [sbyte, short, int, long] +// +class ConstPoolSInt : public ConstPoolVal { + int64_t Val; + ConstPoolSInt(const ConstPoolSInt &CP); +public: + ConstPoolSInt(const Type *Ty, int64_t V, const string &Name = ""); + + virtual ConstPoolVal *clone() const { return new ConstPoolSInt(*this); } + + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + static bool isValueValidForType(const Type *Ty, int64_t V); + inline int64_t getValue() const { return Val; } +}; + + +//===--------------------------------------------------------------------------- +// ConstPoolUInt - Unsigned Integer Values [ubyte, ushort, uint, ulong] +// +class ConstPoolUInt : public ConstPoolVal { + uint64_t Val; + ConstPoolUInt(const ConstPoolUInt &CP); +public: + ConstPoolUInt(const Type *Ty, uint64_t V, const string &Name = ""); + + virtual ConstPoolVal *clone() const { return new ConstPoolUInt(*this); } + + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + static bool isValueValidForType(const Type *Ty, uint64_t V); + inline uint64_t getValue() const { return Val; } +}; + + +//===--------------------------------------------------------------------------- +// ConstPoolFP - Floating Point Values [float, double] +// +class ConstPoolFP : public ConstPoolVal { + double Val; + ConstPoolFP(const ConstPoolFP &CP); +public: + ConstPoolFP(const Type *Ty, double V, const string &Name = ""); + + virtual ConstPoolVal *clone() const { return new ConstPoolFP(*this); } + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + static bool isValueValidForType(const Type *Ty, double V); + inline double getValue() const { return Val; } +}; + + +//===--------------------------------------------------------------------------- +// ConstPoolType - Type Declarations +// +class ConstPoolType : public ConstPoolVal { + const Type *Val; + ConstPoolType(const ConstPoolType &CPT); +public: + ConstPoolType(const Type *V, const string &Name = ""); + + virtual ConstPoolVal *clone() const { return new ConstPoolType(*this); } + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + inline const Type *getValue() const { return Val; } +}; + + +//===--------------------------------------------------------------------------- +// ConstPoolArray - Constant Array Declarations +// +class ConstPoolArray : public ConstPoolVal { + vector Val; + ConstPoolArray(const ConstPoolArray &CPT); +public: + ConstPoolArray(const ArrayType *T, vector &V, + const string &Name = ""); + inline ~ConstPoolArray() { dropAllReferences(); } + + virtual ConstPoolVal *clone() const { return new ConstPoolArray(*this); } + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + inline const vector &getValues() const { return Val; } + + // Implement User stuff... + // + virtual Value *getOperand(unsigned i) { + return (i < Val.size()) ? Val[i] : 0; + } + virtual const Value *getOperand(unsigned i) const { + return (i < Val.size()) ? Val[i] : 0; + } + + // setOperand fails! You can't change a constant! + virtual bool setOperand(unsigned i, Value *Val) { return false; } + virtual void dropAllReferences() { Val.clear(); } +}; + + +//===--------------------------------------------------------------------------- +// ConstPoolStruct - Constant Struct Declarations +// +class ConstPoolStruct : public ConstPoolVal { + vector Val; + ConstPoolStruct(const ConstPoolStruct &CPT); +public: + ConstPoolStruct(const StructType *T, vector &V, + const string &Name = ""); + inline ~ConstPoolStruct() { dropAllReferences(); } + + virtual ConstPoolVal *clone() const { return new ConstPoolStruct(*this); } + virtual string getStrValue() const; + virtual bool equals(const ConstPoolVal *V) const; + + inline const vector &getValues() const { return Val; } + + // Implement User stuff... + // + virtual Value *getOperand(unsigned i) { + return (i < Val.size()) ? Val[i] : 0; + } + virtual const Value *getOperand(unsigned i) const { + return (i < Val.size()) ? Val[i] : 0; + } + + // setOperand fails! You can't change a constant! + virtual bool setOperand(unsigned i, Value *Val) { return false; } + virtual void dropAllReferences() { Val.clear(); } +}; + +#endif diff --git a/include/llvm/ConstantHandling.h b/include/llvm/ConstantHandling.h new file mode 100644 index 00000000000..3227e3995f6 --- /dev/null +++ b/include/llvm/ConstantHandling.h @@ -0,0 +1,145 @@ +//===-- ConstantHandling.h - Stuff for manipulating constants ----*- C++ -*--=// +// +// This file contains the declarations of some cool operators that allow you +// to do natural things with constant pool values. +// +// Unfortunately we can't overload operators on pointer types (like this:) +// +// inline bool operator==(const ConstPoolVal *V1, const ConstPoolVal *V2) +// +// so we must make due with references, even though it leads to some butt ugly +// looking code downstream. *sigh* (ex: ConstPoolVal *Result = *V1 + *v2; ) +// +//===----------------------------------------------------------------------===// +// +// WARNING: These operators return pointers to newly 'new'd objects. You MUST +// make sure to free them if you don't want them hanging around. Also, +// note that these may return a null object if I don't know how to +// perform those operations on the specified constant types. +// +//===----------------------------------------------------------------------===// +// +// Implementation notes: +// This library is implemented this way for a reason: In most cases, we do +// not want to have to link the constant mucking code into an executable. +// We do, however want to tie some of this into the main type system, as an +// optional component. By using a mutable cache member in the Type class, we +// get exactly the kind of behavior we want. +// +// In the end, we get performance almost exactly the same as having a virtual +// function dispatch, but we don't have to put our virtual functions into the +// "Type" class, and we can implement functionality with templates. Good deal. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPT_CONSTANTHANDLING_H +#define LLVM_OPT_CONSTANTHANDLING_H + +#include "llvm/ConstPoolVals.h" +#include "llvm/Type.h" + +//===----------------------------------------------------------------------===// +// Implement == directly... +//===----------------------------------------------------------------------===// + +inline ConstPoolBool *operator==(const ConstPoolVal &V1, + const ConstPoolVal &V2) { + assert(V1.getType() == V2.getType() && "Constant types must be identical!"); + return new ConstPoolBool(V1.equals(&V2)); +} + +//===----------------------------------------------------------------------===// +// Implement all other operators indirectly through TypeRules system +//===----------------------------------------------------------------------===// + +class ConstRules { +protected: + inline ConstRules() {} // Can only be subclassed... +public: + // Unary Operators... + virtual ConstPoolVal *neg(const ConstPoolVal *V) const = 0; + virtual ConstPoolVal *not(const ConstPoolVal *V) const = 0; + + // Binary Operators... + virtual ConstPoolVal *add(const ConstPoolVal *V1, + const ConstPoolVal *V2) const = 0; + virtual ConstPoolVal *sub(const ConstPoolVal *V1, + const ConstPoolVal *V2) const = 0; + + virtual ConstPoolBool *lessthan(const ConstPoolVal *V1, + const ConstPoolVal *V2) const = 0; + + // ConstRules::get - A type will cache its own type rules if one is needed... + // we just want to make sure to hit the cache instead of doing it indirectly, + // if possible... + // + static inline const ConstRules *get(const ConstPoolVal &V) { + const ConstRules *Result = V.getType()->getConstRules(); + return Result ? Result : find(V.getType()); + } +private : + static const ConstRules *find(const Type *Ty); + + ConstRules(const ConstRules &); // Do not implement + ConstRules &operator=(const ConstRules &); // Do not implement +}; + + +inline ConstPoolVal *operator-(const ConstPoolVal &V) { + return ConstRules::get(V)->neg(&V); +} + +inline ConstPoolVal *operator!(const ConstPoolVal &V) { + return ConstRules::get(V)->not(&V); +} + + + +inline ConstPoolVal *operator+(const ConstPoolVal &V1, const ConstPoolVal &V2) { + assert(V1.getType() == V2.getType() && "Constant types must be identical!"); + return ConstRules::get(V1)->add(&V1, &V2); +} + +inline ConstPoolVal *operator-(const ConstPoolVal &V1, const ConstPoolVal &V2) { + assert(V1.getType() == V2.getType() && "Constant types must be identical!"); + return ConstRules::get(V1)->sub(&V1, &V2); +} + +inline ConstPoolBool *operator<(const ConstPoolVal &V1, + const ConstPoolVal &V2) { + assert(V1.getType() == V2.getType() && "Constant types must be identical!"); + return ConstRules::get(V1)->lessthan(&V1, &V2); +} + + +//===----------------------------------------------------------------------===// +// Implement 'derived' operators based on what we already have... +//===----------------------------------------------------------------------===// + +inline ConstPoolBool *operator>(const ConstPoolVal &V1, + const ConstPoolVal &V2) { + return V2 < V1; +} + +inline ConstPoolBool *operator!=(const ConstPoolVal &V1, + const ConstPoolVal &V2) { + ConstPoolBool *Result = V1 == V2; + Result->setValue(!Result->getValue()); // Invert value + return Result; // !(V1 == V2) +} + +inline ConstPoolBool *operator>=(const ConstPoolVal &V1, + const ConstPoolVal &V2) { + ConstPoolBool *Result = V1 < V2; + Result->setValue(!Result->getValue()); // Invert value + return Result; // !(V1 < V2) +} + +inline ConstPoolBool *operator<=(const ConstPoolVal &V1, + const ConstPoolVal &V2) { + ConstPoolBool *Result = V1 > V2; + Result->setValue(!Result->getValue()); // Invert value + return Result; // !(V1 > V2) +} + +#endif diff --git a/include/llvm/ConstantPool.h b/include/llvm/ConstantPool.h new file mode 100644 index 00000000000..7c8e255790c --- /dev/null +++ b/include/llvm/ConstantPool.h @@ -0,0 +1,74 @@ +//===-- llvm/ConstantPool.h - Define the constant pool class ------*- C++ -*-=// +// +// This file implements a constant pool that is split into different type +// planes. This allows searching for a typed object to go a little faster. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CONSTANTPOOL_H +#define LLVM_CONSTANTPOOL_H + +#include +#include "llvm/ValueHolder.h" + +class ConstPoolVal; +class SymTabValue; +class Type; + +class ConstantPool { +public: + typedef ValueHolder PlaneType; +private: + typedef vector PlanesType; + PlanesType Planes; + SymTabValue *Parent; + + inline void resize(unsigned size); +public: + inline ConstantPool(SymTabValue *P) { Parent = P; } + inline ~ConstantPool() { delete_all(); } + + inline SymTabValue *getParent() { return Parent; } + inline const SymTabValue *getParent() const { return Parent; } + + void setParent(SymTabValue *STV); + + void dropAllReferences(); // Drop all references to other constants + + // Constant getPlane - Returns true if the type plane does not exist, + // otherwise updates the pointer to point to the correct plane. + // + bool getPlane(const Type *T, const PlaneType *&Plane) const; + bool getPlane(const Type *T, PlaneType *&Plane); + + // Normal getPlane - Resizes constant pool to contain type even if it doesn't + // already have it. + // + PlaneType &getPlane(const Type *T); + + // insert - Add constant into the symbol table... + void insert(ConstPoolVal *N); + bool remove(ConstPoolVal *N); // Returns true on failure + + void delete_all(); + + // find - Search to see if a constant of the specified value is already in + // the constant table. + // + const ConstPoolVal *find(const ConstPoolVal *V) const; + ConstPoolVal *find(const ConstPoolVal *V) ; + const ConstPoolVal *find(const Type *Ty) const; + ConstPoolVal *find(const Type *Ty) ; + + // Plane iteration support + // + typedef PlanesType::iterator plane_iterator; + typedef PlanesType::const_iterator plane_const_iterator; + + inline plane_iterator begin() { return Planes.begin(); } + inline plane_const_iterator begin() const { return Planes.begin(); } + inline plane_iterator end() { return Planes.end(); } + inline plane_const_iterator end() const { return Planes.end(); } +}; + +#endif diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h new file mode 100644 index 00000000000..c83a2d1e4d1 --- /dev/null +++ b/include/llvm/DerivedTypes.h @@ -0,0 +1,120 @@ +//===-- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*--=// +// +// This file contains the declarations of classes that represent "derived +// types". These are things like "arrays of x" or "structure of x, y, z" or +// "method returning x taking (y,z) as parameters", etc... +// +// The implementations of these classes live in the Type.cpp file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DERIVED_TYPES_H +#define LLVM_DERIVED_TYPES_H + +#include "llvm/Type.h" +#include + +// Future derived types: SIMD packed format + + +class MethodType : public Type { +public: + typedef vector ParamTypes; +private: + const Type *ResultType; + ParamTypes ParamTys; + + MethodType(const MethodType &); // Do not implement + const MethodType &operator=(const MethodType &); // Do not implement +protected: + // This should really be private, but it squelches a bogus warning + // from GCC to make them protected: warning: `class MethodType' only + // defines private constructors and has no friends + + // Private ctor - Only can be created by a static member... + MethodType(const Type *Result, const vector &Params, + const string &Name); +public: + + inline const Type *getReturnType() const { return ResultType; } + inline const ParamTypes &getParamTypes() const { return ParamTys; } + + static const MethodType *getMethodType(const Type *Result, + const ParamTypes &Params); +}; + + + +class ArrayType : public Type { +private: + const Type *ElementType; + int NumElements; // >= 0 for sized array, -1 for unbounded/unknown array + + ArrayType(const ArrayType &); // Do not implement + const ArrayType &operator=(const ArrayType &); // Do not implement +protected: + // This should really be private, but it squelches a bogus warning + // from GCC to make them protected: warning: `class ArrayType' only + // defines private constructors and has no friends + + + // Private ctor - Only can be created by a static member... + ArrayType(const Type *ElType, int NumEl, const string &Name); +public: + + inline const Type *getElementType() const { return ElementType; } + inline int getNumElements() const { return NumElements; } + + inline bool isSized() const { return NumElements >= 0; } + inline bool isUnsized() const { return NumElements == -1; } + + static const ArrayType *getArrayType(const Type *ElementType, + int NumElements = -1); +}; + +class StructType : public Type { +public: + typedef vector ElementTypes; +private: + ElementTypes ETypes; + + StructType(const StructType &); // Do not implement + const StructType &operator=(const StructType &); // Do not implement + +protected: + // This should really be private, but it squelches a bogus warning + // from GCC to make them protected: warning: `class StructType' only + // defines private constructors and has no friends + + // Private ctor - Only can be created by a static member... + StructType(const vector &Types, const string &Name); +public: + + inline const ElementTypes &getElementTypes() const { return ETypes; } + static const StructType *getStructType(const ElementTypes &Params); +}; + + +class PointerType : public Type { +private: + const Type *ValueType; + + PointerType(const PointerType &); // Do not implement + const PointerType &operator=(const PointerType &); // Do not implement +protected: + // This should really be private, but it squelches a bogus warning + // from GCC to make them protected: warning: `class PointerType' only + // defines private constructors and has no friends + + + // Private ctor - Only can be created by a static member... + PointerType(const Type *ElType); +public: + + inline const Type *getValueType() const { return ValueType; } + + + static const PointerType *getPointerType(const Type *ElementType); +}; + +#endif diff --git a/include/llvm/Function.h b/include/llvm/Function.h new file mode 100644 index 00000000000..7448dce9fe3 --- /dev/null +++ b/include/llvm/Function.h @@ -0,0 +1,174 @@ +//===-- llvm/Method.h - Class to represent a single VM method ----*- C++ -*--=// +// +// This file contains the declaration of the Method class, which represents a +// single Method/function/procedure in the VM. +// +// Note that basic blocks themselves are Def's, because they are referenced +// by instructions like calls and can go in virtual function tables and stuff. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_METHOD_H +#define LLVM_METHOD_H + +#include "llvm/SymTabValue.h" +#include "llvm/BasicBlock.h" +#include + +class Instruction; +class BasicBlock; +class MethodArgument; +class MethodType; +class Method; +class Module; + +typedef UseTy MethodUse; + +class Method : public SymTabValue { +public: + typedef ValueHolder ArgumentListType; + typedef ValueHolder BasicBlocksType; +private: + + // Important things that make up a method! + BasicBlocksType BasicBlocks; // The basic blocks + ArgumentListType ArgumentList; // The formal arguments + + Module *Parent; // The module that contains this method + + friend class ValueHolder; + void setParent(Module *parent); + +public: + Method(const MethodType *Ty, const string &Name = ""); + ~Method(); + + // Specialize setName to handle symbol table majik... + virtual void setName(const string &name); + + const Type *getReturnType() const; + const MethodType *getMethodType() const; + + // Is the body of this method unknown? (the basic block list is empty if so) + // this is true for "extern"al methods. + bool isMethodExternal() const { return BasicBlocks.empty(); } + + + // Get the class structure that this method is contained inside of... + inline Module *getParent() { return Parent; } + inline const Module *getParent() const { return Parent; } + + inline const BasicBlocksType &getBasicBlocks() const { return BasicBlocks; } + inline BasicBlocksType &getBasicBlocks() { return BasicBlocks; } + + inline const ArgumentListType &getArgumentList() const{ return ArgumentList; } + inline ArgumentListType &getArgumentList() { return ArgumentList; } + + + // dropAllReferences() - This function causes all the subinstructions to "let + // go" of all references that they are maintaining. This allows one to + // 'delete' a whole class at a time, even though there may be circular + // references... first all references are dropped, and all use counts go to + // zero. Then everything is delete'd for real. Note that no operations are + // valid on an object that has "dropped all references", except operator + // delete. + // + void dropAllReferences(); + + //===--------------------------------------------------------------------===// + // Method Instruction iterator code + //===--------------------------------------------------------------------===// + // + template + class InstIterator; + typedef InstIterator inst_iterator; + typedef InstIterator inst_const_iterator; + + // This inner class is used to implement inst_begin() & inst_end() for + // inst_iterator and inst_const_iterator's. + // + template + class InstIterator { + typedef _BB_t BBty; + typedef _BB_i_t BBIty; + typedef _BI_t BIty; + typedef _II_t IIty; + _BB_t &BBs; // BasicBlocksType + _BB_i_t BB; // BasicBlocksType::iterator + _BI_t BI; // BasicBlock::InstListType::iterator + public: + typedef bidirectional_iterator_tag iterator_category; + + template InstIterator(M &m) + : BBs(m.getBasicBlocks()), BB(BBs.begin()) { // begin ctor + if (BB != BBs.end()) { + BI = (*BB)->getInstList().begin(); + resyncInstructionIterator(); + } + } + + template InstIterator(M &m, bool) + : BBs(m.getBasicBlocks()), BB(BBs.end()) { // end ctor + } + + // Accessors to get at the underlying iterators... + inline BBIty &getBasicBlockIterator() { return BB; } + inline BIty &getInstructionIterator() { return BI; } + + inline IIty operator*() const { return *BI; } + inline IIty *operator->() const { return &(operator*()); } + + inline bool operator==(const InstIterator &y) const { + return BB == y.BB && (BI == y.BI || BB == BBs.end()); + } + inline bool operator!=(const InstIterator& y) const { + return !operator==(y); + } + + // resyncInstructionIterator - This should be called if the + // InstructionIterator is modified outside of our control. This resynchs + // the internals of the InstIterator to a consistent state. + // + inline void resyncInstructionIterator() { + // The only way that the II could be broken is if it is now pointing to + // the end() of the current BasicBlock and there are successor BBs. + while (BI == (*BB)->getInstList().end()) { + ++BB; + if (BB == BBs.end()) break; + BI = (*BB)->getInstList().begin(); + } + } + + InstIterator& operator++() { + ++BI; + resyncInstructionIterator(); // Make sure it is still valid. + return *this; + } + inline InstIterator operator++(int) { + InstIterator tmp = *this; ++*this; return tmp; + } + + InstIterator& operator--() { + while (BB == BBs.end() || BI == (*BB)->getInstList().begin()) { + --BB; + BI = (*BB)->getInstList().end(); + } + --BI; + return *this; + } + inline InstIterator operator--(int) { + InstIterator tmp = *this; --*this; return tmp; + } + }; + + inline inst_iterator inst_begin() { return inst_iterator(*this); } + inline inst_iterator inst_end() { return inst_iterator(*this, true); } + inline inst_const_iterator inst_begin() const { return inst_const_iterator(*this); } + inline inst_const_iterator inst_end() const { return inst_const_iterator(*this, true); } +}; + +#endif diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h new file mode 100644 index 00000000000..be6ea269740 --- /dev/null +++ b/include/llvm/InstrTypes.h @@ -0,0 +1,131 @@ +//===-- llvm/InstrTypes.h - Important Instruction subclasses -----*- C++ -*--=// +// +// This file defines various meta classes of instructions that exist in the VM +// representation. Specific concrete subclasses of these may be found in the +// i*.h files... +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_INSTRUCTION_TYPES_H +#define LLVM_INSTRUCTION_TYPES_H + +#include "llvm/Instruction.h" +#include +#include + +class Method; +class SymTabValue; + +//===----------------------------------------------------------------------===// +// TerminatorInst Class +//===----------------------------------------------------------------------===// + +// TerminatorInst - Subclasses of this class are all able to terminate a basic +// block. Thus, these are all the flow control type of operations. +// +class TerminatorInst : public Instruction { +public: + TerminatorInst(unsigned iType); + inline ~TerminatorInst() {} + + // Terminators must implement the methods required by Instruction... + virtual Instruction *clone() const = 0; + virtual void dropAllReferences() = 0; + virtual string getOpcode() const = 0; + + virtual bool setOperand(unsigned i, Value *Val) = 0; + virtual const Value *getOperand(unsigned i) const = 0; + + // Additionally, they must provide a method to get at the successors of this + // terminator instruction. If 'idx' is out of range, a null pointer shall be + // returned. + // + virtual const BasicBlock *getSuccessor(unsigned idx) const = 0; + virtual unsigned getNumSuccessors() const = 0; + + inline BasicBlock *getSuccessor(unsigned idx) { + return (BasicBlock*)((const TerminatorInst *)this)->getSuccessor(idx); + } +}; + + +//===----------------------------------------------------------------------===// +// UnaryOperator Class +//===----------------------------------------------------------------------===// + +class UnaryOperator : public Instruction { + Use Source; +public: + UnaryOperator(Value *S, unsigned iType, const string &Name = "") + : Instruction(S->getType(), iType, Name), Source(S, this) { + } + inline ~UnaryOperator() { dropAllReferences(); } + + virtual Instruction *clone() const { + return Instruction::getUnaryOperator(getInstType(), Source); + } + + virtual void dropAllReferences() { + Source = 0; + } + + virtual string getOpcode() const = 0; + + virtual unsigned getNumOperands() const { return 1; } + virtual const Value *getOperand(unsigned i) const { + return (i == 0) ? Source : 0; + } + virtual bool setOperand(unsigned i, Value *Val) { + // assert(Val && "operand must not be null!"); + if (i) return false; + Source = Val; + return true; + } +}; + + + +//===----------------------------------------------------------------------===// +// BinaryOperator Class +//===----------------------------------------------------------------------===// + +class BinaryOperator : public Instruction { + Use Source1, Source2; +public: + BinaryOperator(unsigned iType, Value *S1, Value *S2, + const string &Name = "") + : Instruction(S1->getType(), iType, Name), Source1(S1, this), + Source2(S2, this){ + assert(S1 && S2 && S1->getType() == S2->getType()); + } + inline ~BinaryOperator() { dropAllReferences(); } + + virtual Instruction *clone() const { + return Instruction::getBinaryOperator(getInstType(), Source1, Source2); + } + + virtual void dropAllReferences() { + Source1 = Source2 = 0; + } + + virtual string getOpcode() const = 0; + + virtual unsigned getNumOperands() const { return 2; } + virtual const Value *getOperand(unsigned i) const { + return (i == 0) ? Source1 : ((i == 1) ? Source2 : 0); + } + + virtual bool setOperand(unsigned i, Value *Val) { + // assert(Val && "operand must not be null!"); + if (i == 0) { + Source1 = Val; //assert(Val->getType() == Source2->getType()); + } else if (i == 1) { + Source2 = Val; //assert(Val->getType() == Source1->getType()); + } else { + return false; + } + return true; + } +}; + +#endif diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h new file mode 100644 index 00000000000..415c307e730 --- /dev/null +++ b/include/llvm/Instruction.h @@ -0,0 +1,199 @@ +//===-- llvm/Instruction.h - Instruction class definition --------*- C++ -*--=// +// +// This file contains the declaration of the Instruction class, which is the +// base class for all of the VM instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_INSTRUCTION_H +#define LLVM_INSTRUCTION_H + +#include "llvm/User.h" + +class Type; +class BasicBlock; +class Method; + +class Instruction : public User { + BasicBlock *Parent; + unsigned iType; // InstructionType + + friend class ValueHolder; + inline void setParent(BasicBlock *P) { Parent = P; } + +public: + Instruction(const Type *Ty, unsigned iType, const string &Name = ""); + virtual ~Instruction(); // Virtual dtor == good. + + // Specialize setName to handle symbol table majik... + virtual void setName(const string &name); + + // clone() - Create a copy of 'this' instruction that is identical in all ways + // except the following: + // * The instruction has no parent + // * The instruction has no name + // + virtual Instruction *clone() const = 0; + + // Accessor methods... + // + inline const BasicBlock *getParent() const { return Parent; } + inline BasicBlock *getParent() { return Parent; } + bool hasSideEffects() const { return false; } // Memory & Call insts = true + + // --------------------------------------------------------------------------- + // Implement the User interface + // if i > the number of operands, then getOperand() returns 0, and setOperand + // returns false. setOperand() may also return false if the operand is of + // the wrong type. + // + inline Value *getOperand(unsigned i) { + return (Value*)((const Instruction *)this)->getOperand(i); + } + virtual const Value *getOperand(unsigned i) const = 0; + virtual bool setOperand(unsigned i, Value *Val) = 0; + virtual unsigned getNumOperands() const = 0; + + // --------------------------------------------------------------------------- + // Operand Iterator interface... + // + template class OperandIterator; + typedef OperandIterator op_iterator; + typedef OperandIterator op_const_iterator; + + inline op_iterator op_begin() ; + inline op_const_iterator op_begin() const; + inline op_iterator op_end() ; + inline op_const_iterator op_end() const; + + + // --------------------------------------------------------------------------- + // Subclass classification... getInstType() returns a member of + // one of the enums that is coming soon (down below)... + // + virtual string getOpcode() const = 0; + + unsigned getInstType() const { return iType; } + inline bool isTerminator() const { // Instance of TerminatorInst? + return iType >= FirstTermOp && iType < NumTermOps; + } + inline bool isDefinition() const { return !isTerminator(); } + inline bool isUnaryOp() const { + return iType >= FirstUnaryOp && iType < NumUnaryOps; + } + inline bool isBinaryOp() const { + return iType >= FirstBinaryOp && iType < NumBinaryOps; + } + + static Instruction *getBinaryOperator(unsigned Op, Value *S1, Value *S2); + static Instruction *getUnaryOperator (unsigned Op, Value *Source); + + + //---------------------------------------------------------------------- + // Exported enumerations... + // + enum TermOps { // These terminate basic blocks + FirstTermOp = 1, + Ret = 1, Br, Switch, + NumTermOps // Must remain at end of enum + }; + + enum UnaryOps { + FirstUnaryOp = NumTermOps, + Neg = NumTermOps, Not, + + // Type conversions... + ToBoolTy , + ToUByteTy , ToSByteTy, ToUShortTy, ToShortTy, + ToUInt , ToInt, ToULongTy , ToLongTy, + + ToFloatTy , ToDoubleTy, ToArrayTy , ToPointerTy, + + NumUnaryOps // Must remain at end of enum + }; + + enum BinaryOps { + // Standard binary operators... + FirstBinaryOp = NumUnaryOps, + Add = NumUnaryOps, Sub, Mul, Div, Rem, + + // Logical operators... + And, Or, Xor, + + // Binary comparison operators... + SetEQ, SetNE, SetLE, SetGE, SetLT, SetGT, + + NumBinaryOps + }; + + enum MemoryOps { + FirstMemoryOp = NumBinaryOps, + Malloc = NumBinaryOps, Free, // Heap management instructions + Alloca, // Stack management instruction + + Load, Store, // Memory manipulation instructions. + + GetField, PutField, // Structure manipulation instructions + + NumMemoryOps + }; + + enum OtherOps { + FirstOtherOp = NumMemoryOps, + PHINode = NumMemoryOps, // PHI node instruction + Call, // Call a function + + Shl, Shr, // Shift operations... + + NumOps, // Must be the last 'op' defined. + UserOp1, UserOp2 // May be used internally to a pass... + }; + +public: + template // Operand Iterator Implementation + class OperandIterator { + const _Inst Inst; + unsigned idx; + public: + typedef OperandIterator<_Inst, _Val> _Self; + typedef forward_iterator_tag iterator_category; + typedef _Val pointer; + + inline OperandIterator(_Inst T) : Inst(T), idx(0) {} // begin iterator + inline OperandIterator(_Inst T, bool) + : Inst(T), idx(Inst->getNumOperands()) {} // end iterator + + inline bool operator==(const _Self& x) const { return idx == x.idx; } + inline bool operator!=(const _Self& x) const { return !operator==(x); } + + inline pointer operator*() const { return Inst->getOperand(idx); } + inline pointer *operator->() const { return &(operator*()); } + + inline _Self& operator++() { ++idx; return *this; } // Preincrement + inline _Self operator++(int) { // Postincrement + _Self tmp = *this; ++*this; return tmp; + } + + inline _Self& operator--() { --idx; return *this; } // Predecrement + inline _Self operator--(int) { // Postdecrement + _Self tmp = *this; --*this; return tmp; + } + }; + +}; + +inline Instruction::op_iterator Instruction::op_begin() { + return op_iterator(this); +} +inline Instruction::op_const_iterator Instruction::op_begin() const { + return op_const_iterator(this); +} +inline Instruction::op_iterator Instruction::op_end() { + return op_iterator(this,true); +} +inline Instruction::op_const_iterator Instruction::op_end() const { + return op_const_iterator(this,true); +} + + +#endif diff --git a/include/llvm/Module.h b/include/llvm/Module.h new file mode 100644 index 00000000000..9437b2c7781 --- /dev/null +++ b/include/llvm/Module.h @@ -0,0 +1,38 @@ +//===-- llvm/Module.h - C++ class to represent a VM module -------*- C++ -*--=// +// +// This file contains the declarations for the Module class that is used to +// maintain all the information related to a VM module. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MODULE_H +#define LLVM_MODULE_H + +#include "llvm/SymTabValue.h" +class Method; + +class Module : public SymTabValue { +public: + typedef ValueHolder MethodListType; +private: + MethodListType MethodList; // The Methods + +public: + Module(); + ~Module(); + + inline const MethodListType &getMethodList() const { return MethodList; } + inline MethodListType &getMethodList() { return MethodList; } + + // dropAllReferences() - This function causes all the subinstructions to "let + // go" of all references that they are maintaining. This allows one to + // 'delete' a whole class at a time, even though there may be circular + // references... first all references are dropped, and all use counts go to + // zero. Then everything is delete'd for real. Note that no operations are + // valid on an object that has "dropped all references", except operator + // delete. + // + void dropAllReferences(); +}; + +#endif diff --git a/include/llvm/Optimizations/AllOpts.h b/include/llvm/Optimizations/AllOpts.h new file mode 100644 index 00000000000..bfb7b5768e2 --- /dev/null +++ b/include/llvm/Optimizations/AllOpts.h @@ -0,0 +1,95 @@ +//===-- llvm/AllOpts.h - Header file to get all opt passes -------*- C++ -*--=// +// +// This file #include's all of the small optimization header files. +// +// Note that all optimizations return true if they modified the program, false +// if not. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPT_ALLOPTS_H +#define LLVM_OPT_ALLOPTS_H + +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +class Method; +class CallInst; + +//===----------------------------------------------------------------------===// +// Helper functions +// + +static inline bool ApplyOptToAllMethods(Module *C, bool (*Opt)(Method*)) { + bool Modified = false; + for (Module::MethodListType::iterator I = C->getMethodList().begin(); + I != C->getMethodList().end(); I++) + Modified |= Opt(*I); + return Modified; +} + +//===----------------------------------------------------------------------===// +// Dead Code Elimination Pass +// + +bool DoDeadCodeElimination(Method *M); // DCE a method +bool DoRemoveUnusedConstants(SymTabValue *S); // RUC a method or class +bool DoDeadCodeElimination(Module *C); // DCE & RUC a whole class + +//===----------------------------------------------------------------------===// +// Constant Propogation Pass +// + +bool DoConstantPropogation(Method *M); + +static inline bool DoConstantPropogation(Module *C) { + return ApplyOptToAllMethods(C, DoConstantPropogation); +} + +//===----------------------------------------------------------------------===// +// Method Inlining Pass +// + +// DoMethodInlining - Use a heuristic based approach to inline methods that seem +// to look good. +// +bool DoMethodInlining(Method *M); + +static inline bool DoMethodInlining(Module *C) { + return ApplyOptToAllMethods(C, DoMethodInlining); +} + +// InlineMethod - This function forcibly inlines the called method into the +// basic block of the caller. This returns true if it is not possible to inline +// this call. The program is still in a well defined state if this occurs +// though. +// +// Note that this only does one level of inlining. For example, if the +// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +// exists in the instruction stream. Similiarly this will inline a recursive +// method by one level. +// +bool InlineMethod(CallInst *C); +bool InlineMethod(BasicBlock::InstListType::iterator CI);// *CI must be CallInst + + +//===----------------------------------------------------------------------===// +// Symbol Stripping Pass +// + +// DoSymbolStripping - Remove all symbolic information from a method +// +bool DoSymbolStripping(Method *M); + +// DoSymbolStripping - Remove all symbolic information from all methods in a +// module +// +static inline bool DoSymbolStripping(Module *M) { + return ApplyOptToAllMethods(M, DoSymbolStripping); +} + +// DoFullSymbolStripping - Remove all symbolic information from all methods +// in a module, and all module level symbols. (method names, etc...) +// +bool DoFullSymbolStripping(Module *M); + +#endif diff --git a/include/llvm/SlotCalculator.h b/include/llvm/SlotCalculator.h new file mode 100644 index 00000000000..99e40cbeb5f --- /dev/null +++ b/include/llvm/SlotCalculator.h @@ -0,0 +1,96 @@ +//===-- llvm/Analysis/SlotCalculator.h - Calculate value slots ---*- C++ -*-==// +// +// This ModuleAnalyzer subclass calculates the slots that values will land in. +// This is useful for when writing bytecode or assembly out, because you have +// to know these things. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SLOTCALCULATOR_H +#define LLVM_ANALYSIS_SLOTCALCULATOR_H + +#include "llvm/Analysis/ModuleAnalyzer.h" +#include "llvm/SymTabValue.h" +#include +#include + +class SlotCalculator : public ModuleAnalyzer { + const Module *TheModule; + bool IgnoreNamedNodes; // Shall we not count named nodes? + + typedef vector TypePlane; + vector Table; + map NodeMap; + + // ModuleLevel - Used to keep track of which values belong to the module, + // and which values belong to the currently incorporated method. + // + vector ModuleLevel; + +public: + SlotCalculator(const Module *M, bool IgnoreNamed); + SlotCalculator(const Method *M, bool IgnoreNamed);// Start out in incorp state + inline ~SlotCalculator() {} + + // getValSlot returns < 0 on error! + int getValSlot(const Value *D) const; + + inline unsigned getNumPlanes() const { return Table.size(); } + inline unsigned getModuleLevel(unsigned Plane) const { + return Plane < ModuleLevel.size() ? ModuleLevel[Plane] : 0; + } + + inline const TypePlane &getPlane(unsigned Plane) const { + return Table[Plane]; + } + + // If you'd like to deal with a method, use these two methods to get its data + // into the SlotCalculator! + // + void incorporateMethod(const Method *M); + void purgeMethod(); + +protected: + // insertVal - Insert a value into the value table... + // + void insertVal(const Value *D); + + // visitMethod - This member is called after the constant pool has been + // processed. The default implementation of this is a noop. + // + virtual bool visitMethod(const Method *M); + + // processConstant is called once per each constant in the constant pool. It + // traverses the constant pool such that it visits each constant in the + // order of its type. Thus, all 'int' typed constants shall be visited + // sequentially, etc... + // + virtual bool processConstant(const ConstPoolVal *CPV); + + // processType - This callback occurs when an derived type is discovered + // at the class level. This activity occurs when processing a constant pool. + // + virtual bool processType(const Type *Ty); + + // processMethods - The default implementation of this method loops through + // all of the methods in the module and processModule's them. We don't want + // this (we want to explicitly visit them with incorporateMethod), so we + // disable it. + // + virtual bool processMethods(const Module *M) { return false; } + + // processMethodArgument - This member is called for every argument that + // is passed into the method. + // + virtual bool processMethodArgument(const MethodArgument *MA); + + // processBasicBlock - This member is called for each basic block in a methd. + // + virtual bool processBasicBlock(const BasicBlock *BB); + + // processInstruction - This member is called for each Instruction in a methd. + // + virtual bool processInstruction(const Instruction *I); +}; + +#endif diff --git a/include/llvm/SymTabValue.h b/include/llvm/SymTabValue.h new file mode 100644 index 00000000000..556d5c7a071 --- /dev/null +++ b/include/llvm/SymTabValue.h @@ -0,0 +1,51 @@ +//===-- llvm/SymTabDef.h - Implement SymbolTable Defs ------------*- C++ -*--=// +// +// This subclass of Def implements a def that has a symbol table for keeping +// track of children. This is used by the DefHolder template class... +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYMTABDEF_H +#define LLVM_SYMTABDEF_H + +#include "llvm/Value.h" // Get the definition of Value +#include "llvm/ConstantPool.h" + +class SymbolTable; +class ConstPoolVal; + +class SymTabValue : public Value { +public: + typedef ConstantPool ConstantPoolType; +private: + SymbolTable *SymTab, *ParentSymTab; + ConstantPool ConstPool; // The constant pool + +protected: + void setParentSymTab(SymbolTable *ST); +public: + SymTabValue(const Type *Ty, ValueTy dty, const string &name = ""); + ~SymTabValue(); // Implemented in Def.cpp + + // hasSymbolTable() - Returns true if there is a symbol table allocated to + // this object AND if there is at least one name in it! + // + bool hasSymbolTable() const; + + // CAUTION: The current symbol table may be null if there are no names (ie, + // the symbol table is empty) + // + inline SymbolTable *getSymbolTable() { return SymTab; } + inline const SymbolTable *getSymbolTable() const { return SymTab; } + + inline const ConstantPool &getConstantPool() const{ return ConstPool; } + inline ConstantPool &getConstantPool() { return ConstPool; } + + // getSymbolTableSure is guaranteed to not return a null pointer, because if + // the method does not already have a symtab, one is created. Use this if + // you intend to put something into the symbol table for the method. + // + SymbolTable *getSymbolTableSure(); // Implemented in Def.cpp +}; + +#endif diff --git a/include/llvm/SymbolTable.h b/include/llvm/SymbolTable.h new file mode 100644 index 00000000000..dfb78eee822 --- /dev/null +++ b/include/llvm/SymbolTable.h @@ -0,0 +1,83 @@ +//===-- llvm/SymbolTable.h - Implement a type planed symtab -------*- C++ -*-=// +// +// This file implements a symbol table that has planed broken up by type. +// Identical types may have overlapping symbol names as long as they are +// distinct. +// +// Note that this implements a chained symbol table. If a name being 'lookup'd +// isn't found in the current symbol table, then the parent symbol table is +// searched. +// +// This chaining behavior does NOT affect iterators though: only the lookup +// method +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYMBOL_TABLE_H +#define LLVM_SYMBOL_TABLE_H + +#include +#include +#include + +class Value; +class Type; + +// TODO: Change this back to vector > +// Make the vector be a data member, and base it on UniqueID's +// That should be much more efficient! +// +class SymbolTable : public map > { + typedef map VarMap; + typedef map super; + + SymbolTable *ParentSymTab; + + friend class SymTabValue; + inline void setParentSymTab(SymbolTable *P) { ParentSymTab = P; } + +public: + typedef VarMap::iterator type_iterator; + typedef VarMap::const_iterator type_const_iterator; + + inline SymbolTable(SymbolTable *P = 0) { ParentSymTab = P; } + ~SymbolTable(); + + SymbolTable *getParentSymTab() { return ParentSymTab; } + + // lookup - Returns null on failure... + Value *lookup(const Type *Ty, const string &name); + + // find - returns end(Ty->getIDNumber()) on failure... + type_iterator type_find(const Type *Ty, const string &name); + type_iterator type_find(const Value *D); + + // insert - Add named definition to the symbol table... + void insert(Value *N); + + void remove(Value *N); + Value *type_remove(const type_iterator &It); + + inline unsigned type_size(const Type *TypeID) const { + return find(TypeID)->second.size(); + } + + // Note that type_begin / type_end only work if you know that an element of + // TypeID is already in the symbol table!!! + // + inline type_iterator type_begin(const Type *TypeID) { + return find(TypeID)->second.begin(); + } + inline type_const_iterator type_begin(const Type *TypeID) const { + return find(TypeID)->second.begin(); + } + + inline type_iterator type_end(const Type *TypeID) { + return find(TypeID)->second.end(); + } + inline type_const_iterator type_end(const Type *TypeID) const { + return find(TypeID)->second.end(); + } +}; + +#endif diff --git a/include/llvm/Tools/CommandLine.h b/include/llvm/Tools/CommandLine.h new file mode 100644 index 00000000000..76b4e97989c --- /dev/null +++ b/include/llvm/Tools/CommandLine.h @@ -0,0 +1,126 @@ +//===-- llvm/Tools/CommandLine.h - Command line parser for tools -*- C++ -*--=// +// +// This class implements a command line argument processor that is useful when +// creating a tool. +// +// This class is defined entirely inline so that you don't have to link to any +// libraries to use this. +// +// TODO: make this extensible by passing in arguments to be read. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_COMMANDLINE_H +#define LLVM_TOOLS_COMMANDLINE_H + +#include + +class ToolCommandLine { +public: + inline ToolCommandLine(int &argc, char **argv, bool OutputBytecode = true); + inline ToolCommandLine(const string &infn, const string &outfn = "-"); + inline ToolCommandLine(const ToolCommandLine &O); + inline ToolCommandLine &operator=(const ToolCommandLine &O); + + inline bool getForce() const { return Force; } + inline const string getInputFilename() const { return InputFilename; } + inline const string getOutputFilename() const { return OutputFilename; } + +private: + void calculateOutputFilename(bool OutputBytecode) { + OutputFilename = InputFilename; + unsigned Len = OutputFilename.length(); + + if (Len <= 3) { + OutputFilename += (OutputBytecode ? ".bc" : ".ll"); + return; + } + + if (OutputBytecode) { + if (OutputFilename[Len-3] == '.' && + OutputFilename[Len-2] == 'l' && + OutputFilename[Len-1] == 'l') { // .ll -> .bc + OutputFilename[Len-2] = 'b'; + OutputFilename[Len-1] = 'c'; + } else { + OutputFilename += ".bc"; + } + } else { + if (OutputFilename[Len-3] == '.' && + OutputFilename[Len-2] == 'b' && + OutputFilename[Len-1] == 'c') { // .ll -> .bc + OutputFilename[Len-2] = 'l'; + OutputFilename[Len-1] = 'l'; + } else { + OutputFilename += ".ll"; + } + } + } + +private: + string InputFilename; // Filename to read from. If "-", use stdin. + string OutputFilename; // Filename to write to. If "-", use stdout. + bool Force; // Force output (-f argument) +}; + +inline ToolCommandLine::ToolCommandLine(int &argc, char **argv, bool OutBC) + : InputFilename("-"), OutputFilename("-"), Force(false) { + bool FoundInputFN = false; + bool FoundOutputFN = false; + bool FoundForce = false; + + for (int i = 1; i < argc; i++) { + int RemoveArg = 0; + + if (argv[i][0] == '-') { + if (!FoundInputFN && argv[i][1] == 0) { // Is the current argument '-' + InputFilename = argv[i]; + FoundInputFN = true; + RemoveArg = 1; + } else if (!FoundOutputFN && (argv[i][1] == 'o' && argv[i][2] == 0)) { + // Is the argument -o? + if (i+1 < argc) { // Next arg is output fn + OutputFilename = argv[i+1]; + FoundOutputFN = true; + RemoveArg = 2; + } + } else if (!FoundForce && (argv[i][1] == 'f' && argv[i][2] == 0)) { + Force = true; + FoundForce = true; + RemoveArg = 1; + } + } else if (!FoundInputFN) { // Is the current argument '[^-].*'? + InputFilename = argv[i]; + FoundInputFN = true; + RemoveArg = 1; + } + + if (RemoveArg) { + argc -= RemoveArg; // Shift args over... + memmove(argv+i, argv+i+RemoveArg, (argc-i)*sizeof(char*)); + i--; // Reprocess this argument... + } + } + + if (!FoundOutputFN && InputFilename != "-") + calculateOutputFilename(OutBC); +} + +inline ToolCommandLine::ToolCommandLine(const string &inf, + const string &outf) + : InputFilename(inf), OutputFilename(outf), Force(false) { +} + +inline ToolCommandLine::ToolCommandLine(const ToolCommandLine &Opts) + : InputFilename(Opts.InputFilename), OutputFilename(Opts.OutputFilename), + Force(Opts.Force) { +} + +inline ToolCommandLine &ToolCommandLine::operator=(const ToolCommandLine &Opts){ + InputFilename = Opts.InputFilename; + OutputFilename = Opts.OutputFilename; + Force = Opts.Force; + return *this; +} + +#endif diff --git a/include/llvm/Tools/DataTypes.h b/include/llvm/Tools/DataTypes.h new file mode 100644 index 00000000000..ada16c24da3 --- /dev/null +++ b/include/llvm/Tools/DataTypes.h @@ -0,0 +1,26 @@ + +// TODO: This file sucks. Not only does it not work, but this stuff should be +// autoconfiscated anyways. Major FIXME + + +#ifndef LLVM_TOOLS_DATATYPES_H +#define LLVM_TOOLS_DATATYPES_H + +// Should define the following: +// LITTLE_ENDIAN if applicable +// int64_t +// uint64_t + +#ifdef LINUX +#include // Defined by ISO C 99 +#include + +#else +#include +#ifdef _LITTLE_ENDIAN +#define LITTLE_ENDIAN 1 +#endif +#endif + + +#endif diff --git a/include/llvm/Tools/StringExtras.h b/include/llvm/Tools/StringExtras.h new file mode 100644 index 00000000000..31dedb42488 --- /dev/null +++ b/include/llvm/Tools/StringExtras.h @@ -0,0 +1,63 @@ +//===-- StringExtras.h - Useful string functions -----------------*- C++ -*--=// +// +// This file contains some functions that are useful when dealing with strings. +// No library is required when using these functinons. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_STRING_EXTRAS_H +#define LLVM_TOOLS_STRING_EXTRAS_H + +#include +#include "llvm/Tools/DataTypes.h" + +static inline string utostr(uint64_t X, bool isNeg = false) { + char Buffer[40]; + char *BufPtr = Buffer+39; + + *BufPtr = 0; // Null terminate buffer... + if (X == 0) *--BufPtr = '0'; // Handle special case... + + while (X) { + *--BufPtr = '0' + (X % 10); + X /= 10; + } + + if (isNeg) *--BufPtr = '-'; // Add negative sign... + + return string(BufPtr); +} + +static inline string itostr(int64_t X) { + if (X < 0) + return utostr((uint64_t)-X, true); + else + return utostr((uint64_t)X); +} + + +static inline string utostr(unsigned X, bool isNeg = false) { + char Buffer[20]; + char *BufPtr = Buffer+19; + + *BufPtr = 0; // Null terminate buffer... + if (X == 0) *--BufPtr = '0'; // Handle special case... + + while (X) { + *--BufPtr = '0' + (X % 10); + X /= 10; + } + + if (isNeg) *--BufPtr = '-'; // Add negative sign... + + return string(BufPtr); +} + +static inline string itostr(int X) { + if (X < 0) + return utostr((unsigned)-X, true); + else + return utostr((unsigned)X); +} + +#endif diff --git a/include/llvm/Type.h b/include/llvm/Type.h new file mode 100644 index 00000000000..40555b0dc19 --- /dev/null +++ b/include/llvm/Type.h @@ -0,0 +1,116 @@ +//===-- llvm/Type.h - Classes for handling data types ------------*- C++ -*--=// +// +// This file contains the declaration of the Type class. For more "Type" type +// stuff, look in DerivedTypes.h and Opt/ConstantHandling.h +// +// Note that instances of the Type class are immutable: once they are created, +// they are never changed. Also note that only one instance of a particular +// type is ever created. Thus seeing if two types are equal is a matter of +// doing a trivial pointer comparison. +// +// Types, once allocated, are never free'd. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TYPE_H +#define LLVM_TYPE_H + +#include "llvm/Value.h" + +class ConstRules; +class ConstPoolVal; + +class Type : public Value { +public: + //===--------------------------------------------------------------------===// + // Definitions of all of the base types for the Type system. Based on this + // value, you can cast to a "DerivedType" subclass (see DerivedTypes.h) + // Note: If you add an element to this, you need to add an element to the + // Type::getPrimitiveType function, or else things will break! + // + enum PrimitiveID { + VoidTyID = 0 , BoolTyID, // 0, 1: Basics... + UByteTyID , SByteTyID, // 2, 3: 8 bit types... + UShortTyID , ShortTyID, // 4, 5: 16 bit types... + UIntTyID , IntTyID, // 6, 7: 32 bit types... + ULongTyID , LongTyID, // 8, 9: 64 bit types... + + FloatTyID , DoubleTyID, // 10,11: Floating point types... + + TypeTyID, // 12 : Type definitions + LabelTyID , LockTyID, // 13,14: Labels... mutexes... + + // TODO: Kill FillerTyID. It just makes FirstDerivedTyID = 0x10 + FillerTyID , // 15 : filler + + // Derived types... see DerivedTypes.h file... + // Make sure FirstDerivedTyID stays up to date!!! + MethodTyID , ModuleTyID, // Methods... Modules... + ArrayTyID , PointerTyID, // Array... pointer... + StructTyID , PackedTyID, // Structure... SIMD 'packed' format... + //... + + NumPrimitiveIDs, // Must remain as last defined ID + FirstDerivedTyID = MethodTyID, + }; + +private: + PrimitiveID ID; // The current base type of this type... + unsigned UID; // The unique ID number for this class + + // ConstRulesImpl - See Opt/ConstantHandling.h for more info + mutable const ConstRules *ConstRulesImpl; + +protected: + // ctor is protected, so only subclasses can create Type objects... + Type(const string &Name, PrimitiveID id); +public: + virtual ~Type() {} + + // isSigned - Return whether a numeric type is signed. + virtual bool isSigned() const { return 0; } + + // isUnsigned - Return whether a numeric type is unsigned. This is not + // quite the complement of isSigned... nonnumeric types return false as they + // do with isSigned. + // + virtual bool isUnsigned() const { return 0; } + + inline unsigned getUniqueID() const { return UID; } + inline PrimitiveID getPrimitiveID() const { return ID; } + + // getPrimitiveType/getUniqueIDType - Return a type based on an identifier. + static const Type *getPrimitiveType(PrimitiveID IDNumber); + static const Type *getUniqueIDType(unsigned UID); + + // Methods for dealing with constants uniformly. See Opt/ConstantHandling.h + // for more info on this... + // + inline const ConstRules *getConstRules() const { return ConstRulesImpl; } + inline void setConstRules(const ConstRules *R) const { ConstRulesImpl = R; } + +public: // These are the builtin types that are always available... + static const Type *VoidTy , *BoolTy; + static const Type *SByteTy, *UByteTy, + *ShortTy, *UShortTy, + *IntTy , *UIntTy, + *LongTy , *ULongTy; + static const Type *FloatTy, *DoubleTy; + + static const Type *TypeTy , *LabelTy, *LockTy; + + // Here are some useful little methods to query what type derived types are + // Note that all other types can just compare to see if this == Type::xxxTy; + // + inline bool isDerivedType() const { return ID >= FirstDerivedTyID; } + inline bool isPrimitiveType() const { return ID < FirstDerivedTyID; } + + inline bool isLabelType() const { return this == LabelTy; } + inline bool isMethodType() const { return ID == MethodTyID; } + inline bool isModuleType() const { return ID == ModuleTyID; } + inline bool isArrayType() const { return ID == ArrayTyID; } + inline bool isPointerType() const { return ID == PointerTyID; } + inline bool isStructType() const { return ID == StructTyID; } +}; + +#endif diff --git a/include/llvm/User.h b/include/llvm/User.h new file mode 100644 index 00000000000..58e0dec22bb --- /dev/null +++ b/include/llvm/User.h @@ -0,0 +1,47 @@ +//===-- llvm/User.h - User class definition ----------------------*- C++ -*--=// +// +// This class defines the interface that one who 'use's a Value must implement. +// Each instance of the Value class keeps track of what User's have handles +// to it. +// +// * Instructions are the largest class of User's. +// * Constants may be users of other constants (think arrays and stuff) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_USER_H +#define LLVM_USER_H + +#include "llvm/Value.h" + +class User : public Value { + User(const User &); // Do not implement +public: + User(const Type *Ty, ValueTy vty, const string &name = ""); + virtual ~User() {} + + // if i > the number of operands, then getOperand() returns 0, and setOperand + // returns false. setOperand() may also return false if the operand is of + // the wrong type. + // + virtual Value *getOperand(unsigned i) = 0; + virtual const Value *getOperand(unsigned i) const = 0; + virtual bool setOperand(unsigned i, Value *Val) = 0; + + // dropAllReferences() - This virtual function should be overridden to "let + // go" of all references that this user is maintaining. This allows one to + // 'delete' a whole class at a time, even though there may be circular + // references... first all references are dropped, and all use counts go to + // zero. Then everything is delete'd for real. Note that no operations are + // valid on an object that has "dropped all references", except operator + // delete. + // + virtual void dropAllReferences() = 0; + + // replaceUsesOfWith - Replaces all references to the "From" definition with + // references to the "To" definition. (defined in Value.cpp) + // + void replaceUsesOfWith(Value *From, Value *To); +}; + +#endif diff --git a/include/llvm/Value.h b/include/llvm/Value.h new file mode 100644 index 00000000000..d751eb1c6a8 --- /dev/null +++ b/include/llvm/Value.h @@ -0,0 +1,124 @@ +//===-- llvm/Value.h - Definition of the Value class -------------*- C++ -*--=// +// +// This file defines the very important Value class. This is subclassed by a +// bunch of other important classes, like Def, Method, Module, Type, etc... +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_VALUE_H +#define LLVM_VALUE_H + +#include +#include + +class User; +class Type; +template class ValueHolder; + +//===----------------------------------------------------------------------===// +// Value Class +//===----------------------------------------------------------------------===// + +class Value { +public: + enum ValueTy { + TypeVal, // This is an instance of Type + ConstantVal, // This is an instance of ConstPoolVal + MethodArgumentVal, // This is an instance of MethodArgument + InstructionVal, // This is an instance of Instruction + + BasicBlockVal, // This is an instance of BasicBlock + MethodVal, // This is an instance of Method + ModuleVal, // This is an instance of Module + }; + +private: + list Uses; + string Name; + const Type *Ty; + ValueTy VTy; + + Value(const Value &); // Do not implement +protected: + inline void setType(const Type *ty) { Ty = ty; } +public: + Value(const Type *Ty, ValueTy vty, const string &name = ""); + virtual ~Value(); + + inline const Type *getType() const { return Ty; } + inline ValueTy getValueType() const { return VTy; } + + inline bool hasName() const { return Name != ""; } + inline const string &getName() const { return Name; } + virtual void setName(const string &name) { Name = name; } + + + // replaceAllUsesWith - Go through the uses list for this definition and make + // each use point to "D" instead of "this". After this completes, 'this's + // use list should be empty. + // + void replaceAllUsesWith(Value *D); + + //---------------------------------------------------------------------- + // Methods for handling the list of uses of this DEF. + // + typedef list::iterator use_iterator; + typedef list::const_iterator use_const_iterator; + + inline bool use_size() const { return Uses.size(); } + inline bool use_empty() const { return Uses.empty(); } + inline use_iterator use_begin() { return Uses.begin(); } + inline use_const_iterator use_begin() const { return Uses.begin(); } + inline use_iterator use_end() { return Uses.end(); } + inline use_const_iterator use_end() const { return Uses.end(); } + + inline void use_push_back(User *I) { Uses.push_back(I); } + User *use_remove(use_iterator &I); + + inline void addUse(User *I) { Uses.push_back(I); } + void killUse(User *I); +}; + +// UseTy and it's friendly typedefs (Use) are here to make keeping the "use" +// list of a definition node up-to-date really easy. +// +template +class UseTy { + ValueSubclass *Val; + User *U; +public: + inline UseTy(ValueSubclass *v, User *user) { + Val = v; U = user; + if (Val) Val->addUse(U); + } + + inline ~UseTy() { if (Val) Val->killUse(U); } + + inline operator ValueSubclass *() const { return Val; } + + inline UseTy(const UseTy &user) { + Val = 0; + U = user.U; + operator=(user); + } + inline ValueSubclass *operator=(ValueSubclass *V) { + if (Val) Val->killUse(U); + Val = V; + if (V) V->addUse(U); + return V; + } + + inline ValueSubclass *operator->() { return Val; } + inline const ValueSubclass *operator->() const { return Val; } + + inline UseTy &operator=(const UseTy &user) { + if (Val) Val->killUse(U); + Val = user.Val; + Val->addUse(U); + return *this; + } +}; + +typedef UseTy Use; + +#endif diff --git a/include/llvm/ValueHolder.h b/include/llvm/ValueHolder.h new file mode 100644 index 00000000000..318419f139e --- /dev/null +++ b/include/llvm/ValueHolder.h @@ -0,0 +1,86 @@ +//===-- llvm/ValueHolder.h - Class to hold multiple values -------*- C++ -*--=// +// +// This defines a class that is used as a fancy Definition container. It is +// special because it helps keep the symbol table of the container method up to +// date with the goings on inside of it. +// +// This is used to represent things like the instructions of a basic block and +// the arguments to a method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_VALUEHOLDER_H +#define LLVM_VALUEHOLDER_H + +#include +class SymTabValue; + +// ItemParentType ItemParent - I call setParent() on all of my +// "ValueSubclass" items, and this is the value that I pass in. +// +template +class ValueHolder { + // TODO: Should I use a deque instead of a vector? + vector ValueList; + + ItemParentType *ItemParent; + SymTabValue *Parent; + + ValueHolder(const ValueHolder &V); // DO NOT IMPLEMENT +public: + inline ValueHolder(ItemParentType *IP, SymTabValue *parent = 0) { + assert(IP && "Item parent may not be null!"); + ItemParent = IP; + Parent = 0; + setParent(parent); + } + + inline ~ValueHolder() { + // The caller should have called delete_all first... + assert(empty() && "ValueHolder contains definitions!"); + assert(Parent == 0 && "Should have been unlinked from method!"); + } + + inline const SymTabValue *getParent() const { return Parent; } + inline SymTabValue *getParent() { return Parent; } + void setParent(SymTabValue *Parent); // Defined in ValueHolderImpl.h + + inline unsigned size() const { return ValueList.size(); } + inline bool empty() const { return ValueList.empty(); } + inline const ValueSubclass *front() const { return ValueList.front(); } + inline ValueSubclass *front() { return ValueList.front(); } + inline const ValueSubclass *back() const { return ValueList.back(); } + inline ValueSubclass *back() { return ValueList.back(); } + + //===--------------------------------------------------------------------===// + // sub-Definition iterator code + //===--------------------------------------------------------------------===// + // + typedef vector::iterator iterator; + typedef vector::const_iterator const_iterator; + + inline iterator begin() { return ValueList.begin(); } + inline const_iterator begin() const { return ValueList.begin(); } + inline iterator end() { return ValueList.end(); } + inline const_iterator end() const { return ValueList.end(); } + + void delete_all() { // Delete all removes and deletes all elements + // TODO: REMOVE FROM END OF VECTOR!!! + while (begin() != end()) { + iterator I = begin(); + delete remove(I); // Delete all instructions... + } + } + + // ValueHolder::remove(iterator &) this removes the element at the location + // specified by the iterator, and leaves the iterator pointing to the element + // that used to follow the element deleted. + // + ValueSubclass *remove(iterator &DI); // Defined in ValueHolderImpl.h + void remove(ValueSubclass *D); // Defined in ValueHolderImpl.h + + inline void push_front(ValueSubclass *Inst); // Defined in ValueHolderImpl.h + inline void push_back(ValueSubclass *Inst); // Defined in ValueHolderImpl.h +}; + +#endif diff --git a/include/llvm/iMemory.h b/include/llvm/iMemory.h new file mode 100644 index 00000000000..077266de94c --- /dev/null +++ b/include/llvm/iMemory.h @@ -0,0 +1,140 @@ +//===-- llvm/iMemory.h - Memory Operator node definitions --------*- C++ -*--=// +// +// This file contains the declarations of all of the memory related operators. +// This includes: malloc, free, alloca, load, store, getfield, putfield +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IMEMORY_H +#define LLVM_IMEMORY_H + +#include "llvm/Instruction.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ConstPoolVals.h" + +class ConstPoolType; + +class AllocationInst : public Instruction { +protected: + UseTy TyVal; + Use ArraySize; +public: + AllocationInst(ConstPoolType *tyVal, Value *arrSize, unsigned iTy, + const string &Name = "") + : Instruction(tyVal->getValue(), iTy, Name), + TyVal(tyVal, this), ArraySize(arrSize, this) { + + // Make sure they didn't try to specify a size for an invalid type... + assert(arrSize == 0 || + (getType()->getValueType()->isArrayType() && + ((const ArrayType*)getType()->getValueType())->isUnsized()) && + "Trying to allocate something other than unsized array, with size!"); + + // Make sure that if a size is specified, that it is a uint! + assert(arrSize == 0 || arrSize->getType() == Type::UIntTy && + "Malloc SIZE is not a 'uint'!"); + } + inline ~AllocationInst() {} + + // getType - Overload to return most specific pointer type... + inline const PointerType *getType() const { + return (const PointerType*)Instruction::getType(); + } + + virtual Instruction *clone() const = 0; + + inline virtual void dropAllReferences() { TyVal = 0; ArraySize = 0; } + virtual bool setOperand(unsigned i, Value *Val) { + if (i == 0) { + assert(!Val || Val->getValueType() == Value::ConstantVal); + TyVal = (ConstPoolType*)Val; + return true; + } else if (i == 1) { + // Make sure they didn't try to specify a size for an invalid type... + assert(Val == 0 || + (getType()->getValueType()->isArrayType() && + ((const ArrayType*)getType()->getValueType())->isUnsized()) && + "Trying to allocate something other than unsized array, with size!"); + + // Make sure that if a size is specified, that it is a uint! + assert(Val == 0 || Val->getType() == Type::UIntTy && + "Malloc SIZE is not a 'uint'!"); + + ArraySize = Val; + return true; + } + return false; + } + + virtual unsigned getNumOperands() const { return 2; } + + virtual const Value *getOperand(unsigned i) const { + return i == 0 ? TyVal : (i == 1 ? ArraySize : 0); + } +}; + +class MallocInst : public AllocationInst { +public: + MallocInst(ConstPoolType *tyVal, Value *ArraySize = 0, + const string &Name = "") + : AllocationInst(tyVal, ArraySize, Instruction::Malloc, Name) {} + inline ~MallocInst() {} + + virtual Instruction *clone() const { + return new MallocInst(TyVal, ArraySize); + } + + virtual string getOpcode() const { return "malloc"; } +}; + +class AllocaInst : public AllocationInst { +public: + AllocaInst(ConstPoolType *tyVal, Value *ArraySize = 0, + const string &Name = "") + : AllocationInst(tyVal, ArraySize, Instruction::Alloca, Name) {} + inline ~AllocaInst() {} + + virtual Instruction *clone() const { + return new AllocaInst(TyVal, ArraySize); + } + + virtual string getOpcode() const { return "alloca"; } +}; + + + +class FreeInst : public Instruction { +protected: + Use Pointer; +public: + FreeInst(Value *Ptr, const string &Name = "") + : Instruction(Type::VoidTy, Instruction::Free, Name), + Pointer(Ptr, this) { + + assert(Ptr->getType()->isPointerType() && "Can't free nonpointer!"); + } + inline ~FreeInst() {} + + virtual Instruction *clone() const { return new FreeInst(Pointer); } + + inline virtual void dropAllReferences() { Pointer = 0; } + + virtual bool setOperand(unsigned i, Value *Val) { + if (i == 0) { + assert(!Val || Val->getType()->isPointerType() && + "Can't free nonpointer!"); + Pointer = Val; + return true; + } + return false; + } + + virtual unsigned getNumOperands() const { return 1; } + virtual const Value *getOperand(unsigned i) const { + return i == 0 ? Pointer : 0; + } + + virtual string getOpcode() const { return "free"; } +}; + +#endif // LLVM_IMEMORY_H diff --git a/include/llvm/iOperators.h b/include/llvm/iOperators.h new file mode 100644 index 00000000000..5a31b711427 --- /dev/null +++ b/include/llvm/iOperators.h @@ -0,0 +1,48 @@ +//===-- llvm/iBinary.h - Binary Operator node definitions --------*- C++ -*--=// +// +// This file contains the declarations of all of the Binary Operator classes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IBINARY_H +#define LLVM_IBINARY_H + +#include "llvm/InstrTypes.h" + +//===----------------------------------------------------------------------===// +// Classes to represent Binary operators +//===----------------------------------------------------------------------===// +// +// All of these classes are subclasses of the BinaryOperator class... +// + +class AddInst : public BinaryOperator { +public: + AddInst(Value *S1, Value *S2, const string &Name = "") + : BinaryOperator(Instruction::Add, S1, S2, Name) { + } + + virtual string getOpcode() const { return "add"; } +}; + + +class SubInst : public BinaryOperator { +public: + SubInst(Value *S1, Value *S2, const string &Name = "") + : BinaryOperator(Instruction::Sub, S1, S2, Name) { + } + + virtual string getOpcode() const { return "sub"; } +}; + + +class SetCondInst : public BinaryOperator { + BinaryOps OpType; +public: + SetCondInst(BinaryOps opType, Value *S1, Value *S2, + const string &Name = ""); + + virtual string getOpcode() const; +}; + +#endif diff --git a/include/llvm/iOther.h b/include/llvm/iOther.h new file mode 100644 index 00000000000..4c06b4fdd89 --- /dev/null +++ b/include/llvm/iOther.h @@ -0,0 +1,116 @@ +//===-- llvm/iOther.h - "Other" instruction node definitions -----*- C++ -*--=// +// +// This file contains the declarations for instructions that fall into the +// grandios 'other' catagory... +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IOTHER_H +#define LLVM_IOTHER_H + +#include "llvm/InstrTypes.h" +#include "llvm/Method.h" +#include + +//===----------------------------------------------------------------------===// +// PHINode Class +//===----------------------------------------------------------------------===// + +// PHINode - The PHINode class is used to represent the magical mystical PHI +// node, that can not exist in nature, but can be synthesized in a computer +// scientist's overactive imagination. +// +// TODO: FIXME: This representation is not good enough. Consider the following +// code: +// BB0: %x = int %0 +// BB1: %y = int %1 +// BB2: %z = phi int %0, %1 - Can't tell where constants come from! +// +// TOFIX: Store pair instead of just +// +class PHINode : public Instruction { + vector IncomingValues; + PHINode(const PHINode &PN); +public: + PHINode(const Type *Ty, const string &Name = ""); + inline ~PHINode() { dropAllReferences(); } + + virtual Instruction *clone() const { return new PHINode(*this); } + + // Implement all of the functionality required by User... + // + virtual void dropAllReferences(); + virtual const Value *getOperand(unsigned i) const { + return (i < IncomingValues.size()) ? IncomingValues[i] : 0; + } + inline Value *getOperand(unsigned i) { + return (Value*)((const PHINode*)this)->getOperand(i); + } + virtual unsigned getNumOperands() const { return IncomingValues.size(); } + virtual bool setOperand(unsigned i, Value *Val); + virtual string getOpcode() const { return "phi"; } + + void addIncoming(Value *D); +}; + + +//===----------------------------------------------------------------------===// +// MethodArgument Class +//===----------------------------------------------------------------------===// + +class MethodArgument : public Value { // Defined in the InstrType.cpp file + Method *Parent; + + friend class ValueHolder; + inline void setParent(Method *parent) { Parent = parent; } + +public: + MethodArgument(const Type *Ty, const string &Name = "") + : Value(Ty, Value::MethodArgumentVal, Name) { + Parent = 0; + } + + // Specialize setName to handle symbol table majik... + virtual void setName(const string &name); + + inline const Method *getParent() const { return Parent; } + inline Method *getParent() { return Parent; } +}; + + +//===----------------------------------------------------------------------===// +// Classes to function calls and method invocations +//===----------------------------------------------------------------------===// + +class CallInst : public Instruction { + MethodUse M; + vector Params; + CallInst(const CallInst &CI); +public: + CallInst(Method *M, vector ¶ms, const string &Name = ""); + inline ~CallInst() { dropAllReferences(); } + + virtual string getOpcode() const { return "call"; } + + virtual Instruction *clone() const { return new CallInst(*this); } + bool hasSideEffects() const { return true; } + + + const Method *getCalledMethod() const { return M; } + Method *getCalledMethod() { return M; } + + // Implement all of the functionality required by Instruction... + // + virtual void dropAllReferences(); + virtual const Value *getOperand(unsigned i) const { + return i == 0 ? M : ((i <= Params.size()) ? Params[i-1] : 0); + } + inline Value *getOperand(unsigned i) { + return (Value*)((const CallInst*)this)->getOperand(i); + } + virtual unsigned getNumOperands() const { return Params.size()+1; } + + virtual bool setOperand(unsigned i, Value *Val); +}; + +#endif diff --git a/include/llvm/iTerminators.h b/include/llvm/iTerminators.h new file mode 100644 index 00000000000..0d1cde0d3fb --- /dev/null +++ b/include/llvm/iTerminators.h @@ -0,0 +1,136 @@ +//===-- llvm/iTerminators.h - Termintator instruction nodes ------*- C++ -*--=// +// +// This file contains the declarations for all the subclasses of the +// Instruction class, which is itself defined in the Instruction.h file. In +// between these definitions and the Instruction class are classes that expose +// the SSA properties of each instruction, and that form the SSA graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ITERMINATORS_H +#define LLVM_ITERMINATORS_H + +#include "llvm/InstrTypes.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" + +//===----------------------------------------------------------------------===// +// Classes to represent Basic Block "Terminator" instructions +//===----------------------------------------------------------------------===// + + +//===--------------------------------------------------------------------------- +// ReturnInst - Return a value (possibly void), from a method. Execution does +// not continue in this method any longer. +// +class ReturnInst : public TerminatorInst { + Use Val; // Will be null if returning void... + ReturnInst(const ReturnInst &RI); +public: + ReturnInst(Value *value = 0); + inline ~ReturnInst() { dropAllReferences(); } + + virtual Instruction *clone() const { return new ReturnInst(*this); } + + virtual string getOpcode() const { return "ret"; } + + inline const Value *getReturnValue() const { return Val; } + inline Value *getReturnValue() { return Val; } + + virtual void dropAllReferences(); + virtual const Value *getOperand(unsigned i) const { + return (i == 0) ? Val : 0; + } + inline Value *getOperand(unsigned i) { return (i == 0) ? Val : 0; } + virtual bool setOperand(unsigned i, Value *Val); + virtual unsigned getNumOperands() const { return Val != 0; } + + // Additionally, they must provide a method to get at the successors of this + // terminator instruction. If 'idx' is out of range, a null pointer shall be + // returned. + // + virtual const BasicBlock *getSuccessor(unsigned idx) const { return 0; } + virtual unsigned getNumSuccessors() const { return 0; } +}; + + +//===--------------------------------------------------------------------------- +// BranchInst - Conditional or Unconditional Branch instruction. +// +class BranchInst : public TerminatorInst { + BasicBlockUse TrueDest, FalseDest; + Use Condition; + + BranchInst(const BranchInst &BI); +public: + // If cond = null, then is an unconditional br... + BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse = 0, Value *cond = 0); + inline ~BranchInst() { dropAllReferences(); } + + virtual Instruction *clone() const { return new BranchInst(*this); } + + virtual void dropAllReferences(); + + inline bool isUnconditional() const { + return Condition == 0 || !FalseDest; + } + + virtual string getOpcode() const { return "br"; } + + inline Value *getOperand(unsigned i) { + return (Value*)((const BranchInst *)this)->getOperand(i); + } + virtual const Value *getOperand(unsigned i) const; + virtual bool setOperand(unsigned i, Value *Val); + virtual unsigned getNumOperands() const { return isUnconditional() ? 1 : 3; } + + // Additionally, they must provide a method to get at the successors of this + // terminator instruction. If 'idx' is out of range, a null pointer shall be + // returned. + // + virtual const BasicBlock *getSuccessor(unsigned idx) const; + virtual unsigned getNumSuccessors() const { return 1+!isUnconditional(); } +}; + + +//===--------------------------------------------------------------------------- +// SwitchInst - Multiway switch +// +class SwitchInst : public TerminatorInst { +public: + typedef pair dest_value; +private: + BasicBlockUse DefaultDest; + Use Val; + vector Destinations; + + SwitchInst(const SwitchInst &RI); +public: + typedef vector::iterator dest_iterator; + typedef vector::const_iterator dest_const_iterator; + + SwitchInst(Value *Value, BasicBlock *Default); + inline ~SwitchInst() { dropAllReferences(); } + + virtual Instruction *clone() const { return new SwitchInst(*this); } + + void dest_push_back(ConstPoolVal *OnVal, BasicBlock *Dest); + + virtual string getOpcode() const { return "switch"; } + inline Value *getOperand(unsigned i) { + return (Value*)((const SwitchInst*)this)->getOperand(i); + } + virtual const Value *getOperand(unsigned i) const; + virtual bool setOperand(unsigned i, Value *Val); + virtual unsigned getNumOperands() const; + virtual void dropAllReferences(); + + // Additionally, they must provide a method to get at the successors of this + // terminator instruction. If 'idx' is out of range, a null pointer shall be + // returned. + // + virtual const BasicBlock *getSuccessor(unsigned idx) const; + virtual unsigned getNumSuccessors() const { return 1+Destinations.size(); } +}; + +#endif diff --git a/include/llvm/iUnary.h b/include/llvm/iUnary.h new file mode 100644 index 00000000000..ffe6c3f77ca --- /dev/null +++ b/include/llvm/iUnary.h @@ -0,0 +1,19 @@ +//===-- llvm/iUnary.h - Unary Operator node definitions ----------*- C++ -*--=// +// +// This file contains the declarations of all of the Unary Operator classes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IUNARY_H +#define LLVM_IUNARY_H + +#include "llvm/InstrTypes.h" + +//===----------------------------------------------------------------------===// +// Classes to represent Unary operators +//===----------------------------------------------------------------------===// +// +// All of these classes are subclasses of the UnaryOperator class... +// + +#endif diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile new file mode 100644 index 00000000000..4b672cd6ff5 --- /dev/null +++ b/lib/Analysis/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../.. + +LIBRARYNAME = analysis + +include $(LEVEL)/Makefile.common + diff --git a/lib/Analysis/ModuleAnalyzer.cpp b/lib/Analysis/ModuleAnalyzer.cpp new file mode 100644 index 00000000000..1c3464e48ce --- /dev/null +++ b/lib/Analysis/ModuleAnalyzer.cpp @@ -0,0 +1,150 @@ +//===-- llvm/Analysis/ModuleAnalyzer.cpp - Module analysis driver ----------==// +// +// This class provides a nice interface to traverse a module in a predictable +// way. This is used by the AssemblyWriter, BytecodeWriter, and SlotCalculator +// to do analysis of a module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ModuleAnalyzer.h" +#include "llvm/ConstantPool.h" +#include "llvm/Method.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ConstPoolVals.h" +#include + +// processModule - Driver function to call all of my subclasses virtual methods. +// +bool ModuleAnalyzer::processModule(const Module *M) { + // Loop over the constant pool, process all of the constants... + if (processConstPool(M->getConstantPool(), false)) + return true; + + return processMethods(M); +} + +inline bool ModuleAnalyzer::handleType(set &TypeSet, + const Type *T) { + if (!T->isDerivedType()) return false; // Boring boring types... + if (TypeSet.count(T) != 0) return false; // Already found this type... + TypeSet.insert(T); // Add it to the set + + // Recursively process interesting types... + switch (T->getPrimitiveID()) { + case Type::MethodTyID: { + const MethodType *MT = (const MethodType *)T; + if (handleType(TypeSet, MT->getReturnType())) return true; + const MethodType::ParamTypes &Params = MT->getParamTypes(); + + for (MethodType::ParamTypes::const_iterator I = Params.begin(); + I != Params.end(); ++I) + if (handleType(TypeSet, *I)) return true; + break; + } + + case Type::ArrayTyID: + if (handleType(TypeSet, ((const ArrayType *)T)->getElementType())) + return true; + break; + + case Type::StructTyID: { + const StructType *ST = (const StructType*)T; + const StructType::ElementTypes &Elements = ST->getElementTypes(); + for (StructType::ElementTypes::const_iterator I = Elements.begin(); + I != Elements.end(); ++I) + if (handleType(TypeSet, *I)) return true; + break; + } + + case Type::PointerTyID: + if (handleType(TypeSet, ((const PointerType *)T)->getValueType())) + return true; + break; + + default: + cerr << "ModuleAnalyzer::handleType, type unknown: '" + << T->getName() << "'\n"; + break; + } + + return processType(T); +} + + +bool ModuleAnalyzer::processConstPool(const ConstantPool &CP, bool isMethod) { + // TypeSet - Keep track of which types have already been processType'ed. We + // don't want to reprocess the same type more than once. + // + set TypeSet; + + for (ConstantPool::plane_const_iterator PI = CP.begin(); + PI != CP.end(); ++PI) { + const ConstantPool::PlaneType &Plane = **PI; + if (Plane.empty()) continue; // Skip empty type planes... + + if (processConstPoolPlane(CP, Plane, isMethod)) return true; + + for (ConstantPool::PlaneType::const_iterator CI = Plane.begin(); + CI != Plane.end(); CI++) { + if ((*CI)->getType() == Type::TypeTy) + if (handleType(TypeSet, ((const ConstPoolType*)(*CI))->getValue())) + return true; + if (handleType(TypeSet, (*CI)->getType())) return true; + + if (processConstant(*CI)) return true; + } + } + + if (!isMethod) { + assert(CP.getParent()->getValueType() == Value::ModuleVal); + const Module *M = (const Module*)CP.getParent(); + // Process the method types after the constant pool... + for (Module::MethodListType::const_iterator I = M->getMethodList().begin(); + I != M->getMethodList().end(); I++) { + if (handleType(TypeSet, (*I)->getType())) return true; + if (visitMethod(*I)) return true; + } + } + return false; +} + +bool ModuleAnalyzer::processMethods(const Module *M) { + for (Module::MethodListType::const_iterator I = M->getMethodList().begin(); + I != M->getMethodList().end(); I++) + if (processMethod(*I)) return true; + + return false; +} + +bool ModuleAnalyzer::processMethod(const Method *M) { + // Loop over the arguments, processing them... + const Method::ArgumentListType &ArgList = M->getArgumentList(); + for (Method::ArgumentListType::const_iterator AI = ArgList.begin(); + AI != ArgList.end(); AI++) + if (processMethodArgument(*AI)) return true; + + // Loop over the constant pool, adding the constants to the table... + processConstPool(M->getConstantPool(), true); + + // Loop over all the basic blocks, in order... + Method::BasicBlocksType::const_iterator BBI = M->getBasicBlocks().begin(); + for (; BBI != M->getBasicBlocks().end(); BBI++) + if (processBasicBlock(*BBI)) return true; + return false; +} + +bool ModuleAnalyzer::processBasicBlock(const BasicBlock *BB) { + // Process all of the instructions in the basic block + BasicBlock::InstListType::const_iterator Inst = BB->getInstList().begin(); + for (; Inst != BB->getInstList().end(); Inst++) { + if (preProcessInstruction(*Inst) || processInstruction(*Inst)) return true; + } + return false; +} + +bool ModuleAnalyzer::preProcessInstruction(const Instruction *I) { + + return false; +} diff --git a/lib/AsmParser/Lexer.cpp b/lib/AsmParser/Lexer.cpp new file mode 100644 index 00000000000..9edd3bf5b1c --- /dev/null +++ b/lib/AsmParser/Lexer.cpp @@ -0,0 +1,2058 @@ +#define yy_create_buffer llvmAsm_create_buffer +#define yy_delete_buffer llvmAsm_delete_buffer +#define yy_scan_buffer llvmAsm_scan_buffer +#define yy_scan_string llvmAsm_scan_string +#define yy_scan_bytes llvmAsm_scan_bytes +#define yy_flex_debug llvmAsm_flex_debug +#define yy_init_buffer llvmAsm_init_buffer +#define yy_flush_buffer llvmAsm_flush_buffer +#define yy_load_buffer_state llvmAsm_load_buffer_state +#define yy_switch_to_buffer llvmAsm_switch_to_buffer +#define yyin llvmAsmin +#define yyleng llvmAsmleng +#define yylex llvmAsmlex +#define yyout llvmAsmout +#define yyrestart llvmAsmrestart +#define yytext llvmAsmtext +#define yylineno llvmAsmlineno + +#line 20 "Lexer.cpp" +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header$ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +#include + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include +#include + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include +#include +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +static YY_BUFFER_STATE yy_current_buffer = 0; + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); +YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static inline void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + + +#define YY_USES_REJECT + +#define yywrap() 1 +#define YY_SKIP_YYWRAP +typedef unsigned char YY_CHAR; +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +typedef int yy_state_type; +extern int yylineno; +int yylineno = 1; +extern char *yytext; +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 58 +#define YY_END_OF_BUFFER 59 +static yyconst short int yy_acclist[113] = + { 0, + 59, 57, 58, 56, 57, 58, 56, 58, 57, 58, + 57, 58, 57, 58, 8, 57, 58, 52, 57, 58, + 1, 57, 58, 57, 58, 57, 58, 57, 58, 57, + 58, 57, 58, 57, 58, 57, 58, 57, 58, 57, + 58, 57, 58, 57, 58, 57, 58, 57, 58, 57, + 58, 57, 58, 57, 58, 57, 58, 50, 49, 54, + 53, 52, 1, 9, 40, 51, 49, 55, 28, 31, + 3, 16, 30, 24, 25, 26, 32, 39, 29, 11, + 27, 44, 45, 18, 4, 22, 17, 10, 2, 5, + 20, 23, 12, 34, 38, 36, 37, 35, 33, 14, + + 46, 13, 19, 43, 21, 42, 41, 15, 6, 47, + 48, 7 + } ; + +static yyconst short int yy_accept[199] = + { 0, + 1, 1, 1, 2, 4, 7, 9, 11, 13, 15, + 18, 21, 24, 26, 28, 30, 32, 34, 36, 38, + 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, + 58, 58, 59, 60, 60, 61, 62, 63, 64, 64, + 64, 65, 65, 65, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 67, 68, + 69, 70, 70, 70, 70, 70, 70, 71, 71, 72, + 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, + + 74, 75, 76, 77, 77, 78, 79, 79, 79, 79, + 79, 80, 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 81, 82, 82, 82, 82, 82, 83, 83, + 83, 83, 84, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 86, 87, 87, 88, 88, 88, + 89, 89, 90, 90, 90, 91, 92, 92, 92, 93, + 93, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 102, 103, 104, 104, 105, 105, 106, 106, 106, + 107, 107, 108, 109, 110, 110, 110, 110, 111, 111, + 112, 112, 112, 112, 112, 112, 113, 113 + + } ; + +static yyconst int yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 4, 1, 5, 6, 1, 1, 1, + 1, 1, 1, 1, 7, 5, 1, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 9, 10, 1, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 1, 11, 12, 13, 14, + + 15, 16, 17, 18, 19, 5, 5, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 5, + 32, 5, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst int yy_meta[33] = + { 0, + 1, 1, 2, 3, 4, 1, 5, 4, 6, 1, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4 + } ; + +static yyconst short int yy_base[203] = + { 0, + 0, 0, 389, 390, 390, 390, 0, 379, 26, 379, + 27, 0, 28, 40, 29, 35, 34, 42, 30, 38, + 56, 60, 55, 63, 65, 84, 68, 107, 52, 382, + 376, 390, 0, 376, 375, 374, 74, 0, 32, 64, + 372, 78, 67, 371, 77, 92, 76, 79, 89, 98, + 100, 95, 102, 105, 108, 112, 111, 119, 122, 123, + 116, 126, 128, 129, 132, 134, 137, 138, 139, 140, + 143, 144, 145, 149, 146, 156, 154, 390, 0, 371, + 369, 157, 166, 158, 161, 167, 368, 170, 367, 174, + 175, 179, 180, 182, 366, 183, 181, 188, 184, 365, + + 364, 363, 362, 190, 361, 360, 191, 211, 198, 199, + 359, 194, 200, 201, 202, 204, 205, 212, 203, 225, + 214, 358, 357, 228, 209, 231, 232, 356, 233, 234, + 235, 355, 354, 236, 238, 239, 241, 247, 249, 252, + 242, 254, 259, 353, 352, 256, 351, 264, 265, 350, + 267, 349, 270, 271, 348, 347, 273, 274, 346, 276, + 275, 345, 344, 343, 342, 341, 340, 335, 330, 325, + 283, 320, 319, 278, 317, 284, 316, 285, 288, 315, + 289, 313, 311, 310, 293, 291, 301, 308, 295, 244, + 303, 299, 302, 307, 309, 82, 390, 331, 334, 337, + + 342, 53 + } ; + +static yyconst short int yy_def[203] = + { 0, + 197, 1, 197, 197, 197, 197, 198, 199, 200, 197, + 199, 201, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 198, + 199, 197, 202, 197, 197, 197, 199, 201, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 197, 202, 197, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 0, 197, 197, 197, + + 197, 197 + } ; + +static yyconst short int yy_nxt[423] = + { 0, + 4, 5, 6, 7, 8, 9, 10, 11, 4, 12, + 13, 14, 15, 16, 17, 18, 19, 8, 20, 21, + 22, 23, 8, 24, 8, 25, 26, 27, 28, 29, + 8, 8, 34, 35, 37, 32, 32, 32, 32, 45, + 32, 39, 32, 32, 53, 81, 32, 40, 32, 46, + 32, 41, 50, 47, 42, 49, 79, 48, 54, 55, + 32, 51, 43, 32, 32, 44, 56, 52, 32, 60, + 58, 32, 32, 32, 77, 32, 32, 61, 57, 64, + 62, 37, 32, 82, 32, 32, 32, 32, 59, 84, + 32, 63, 32, 71, 83, 65, 85, 32, 66, 72, + + 32, 67, 89, 32, 86, 87, 32, 88, 32, 92, + 32, 68, 69, 32, 70, 32, 32, 90, 73, 32, + 32, 97, 91, 96, 32, 74, 75, 32, 94, 93, + 32, 32, 98, 76, 32, 95, 32, 32, 99, 101, + 32, 100, 32, 102, 103, 32, 32, 32, 32, 105, + 111, 32, 32, 32, 32, 104, 106, 32, 112, 109, + 110, 108, 32, 107, 32, 32, 32, 114, 117, 32, + 116, 113, 119, 118, 32, 32, 115, 122, 32, 120, + 123, 125, 32, 32, 121, 127, 124, 32, 32, 32, + 32, 32, 32, 128, 132, 129, 32, 131, 32, 32, + + 126, 130, 32, 134, 133, 135, 32, 32, 32, 32, + 32, 32, 32, 32, 144, 145, 150, 32, 136, 32, + 32, 143, 32, 141, 142, 137, 148, 138, 154, 146, + 139, 147, 140, 32, 149, 152, 32, 151, 153, 32, + 32, 32, 32, 32, 32, 155, 32, 32, 158, 32, + 32, 157, 32, 162, 159, 32, 161, 32, 160, 156, + 32, 164, 32, 166, 32, 163, 168, 32, 170, 169, + 172, 171, 32, 32, 165, 32, 167, 175, 32, 32, + 173, 32, 32, 32, 32, 177, 32, 178, 180, 181, + 174, 32, 32, 32, 179, 176, 32, 32, 184, 32, + + 182, 32, 186, 32, 185, 183, 188, 32, 187, 32, + 32, 32, 189, 192, 190, 32, 32, 32, 32, 32, + 194, 32, 191, 32, 32, 32, 193, 32, 32, 195, + 196, 30, 30, 32, 30, 30, 30, 31, 32, 31, + 33, 33, 38, 32, 38, 38, 38, 38, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 80, 32, + 32, 36, 35, 80, 32, 78, 36, 32, 197, 3, + 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, + + 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, + 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, + 197, 197 + } ; + +static yyconst short int yy_chk[423] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 9, 9, 11, 11, 13, 15, 19, 15, + 39, 13, 17, 16, 19, 39, 20, 13, 14, 16, + 18, 14, 18, 16, 14, 17, 202, 16, 20, 20, + 29, 18, 14, 23, 21, 14, 21, 18, 22, 23, + 22, 24, 40, 25, 29, 43, 27, 23, 21, 25, + 24, 37, 37, 40, 47, 45, 42, 48, 22, 43, + 196, 24, 26, 27, 42, 26, 45, 49, 26, 27, + + 46, 26, 49, 52, 46, 47, 50, 48, 51, 52, + 53, 26, 26, 54, 26, 28, 55, 50, 28, 57, + 56, 57, 51, 56, 61, 28, 28, 58, 54, 53, + 59, 60, 57, 28, 62, 55, 63, 64, 58, 60, + 65, 59, 66, 61, 62, 67, 68, 69, 70, 64, + 69, 71, 72, 73, 75, 63, 64, 74, 70, 67, + 68, 66, 77, 65, 76, 82, 84, 72, 75, 85, + 74, 71, 77, 76, 83, 86, 73, 84, 88, 82, + 85, 88, 90, 91, 83, 91, 86, 92, 93, 97, + 94, 96, 99, 92, 97, 93, 98, 96, 104, 107, + + 90, 94, 112, 99, 98, 104, 109, 110, 113, 114, + 115, 119, 116, 117, 113, 114, 119, 125, 107, 108, + 118, 112, 121, 109, 110, 108, 117, 108, 125, 115, + 108, 116, 108, 120, 118, 121, 124, 120, 124, 126, + 127, 129, 130, 131, 134, 126, 135, 136, 130, 137, + 141, 129, 190, 136, 131, 138, 135, 139, 134, 127, + 140, 138, 142, 139, 146, 137, 140, 143, 142, 141, + 146, 143, 148, 149, 138, 151, 139, 151, 153, 154, + 148, 157, 158, 161, 160, 154, 174, 157, 160, 161, + 149, 171, 176, 178, 158, 153, 179, 181, 176, 186, + + 171, 185, 179, 189, 178, 174, 185, 192, 181, 187, + 193, 191, 186, 191, 187, 194, 188, 195, 184, 183, + 193, 182, 189, 180, 177, 175, 192, 173, 172, 194, + 195, 198, 198, 170, 198, 198, 198, 199, 169, 199, + 200, 200, 201, 168, 201, 201, 201, 201, 167, 166, + 165, 164, 163, 162, 159, 156, 155, 152, 150, 147, + 145, 144, 133, 132, 128, 123, 122, 111, 106, 105, + 103, 102, 101, 100, 95, 89, 87, 81, 80, 44, + 41, 36, 35, 34, 31, 30, 10, 8, 3, 197, + 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, + + 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, + 197, 197, 197, 197, 197, 197, 197, 197, 197, 197, + 197, 197 + } ; + +static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr; +static char *yy_full_match; +static int yy_lp; +#define REJECT \ +{ \ +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \ +yy_cp = yy_full_match; /* restore poss. backed-over text */ \ +++yy_lp; \ +goto find_rule; \ +} +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "Lexer.l" +#define INITIAL 0 +/*===-- Lexer.l - Scanner for llvm assembly files ----------------*- C++ -*--=// +// +// This file implements the flex scanner for LLVM assembly languages files. +// +//===------------------------------------------------------------------------=*/ +#define YY_NEVER_INTERACTIVE 1 +#line 21 "Lexer.l" +#include "ParserInternals.h" +#include "llvm/BasicBlock.h" +#include "llvm/Method.h" +#include "llvm/Module.h" +#include +#include "llvmAsmParser.h" + +#define RET_TOK(type, Enum, sym) \ + llvmAsmlval.type = Instruction::Enum; return sym + + +// TODO: All of the static identifiers are figured out by the lexer, +// these should be hashed. + + +// atoull - Convert an ascii string of decimal digits into the unsigned long +// long representation... this does not have to do input error checking, +// because we know that the input will be matched by a suitable regex... +// +uint64_t atoull(const char *Buffer) { + uint64_t Result = 0; + for (; *Buffer; Buffer++) { + uint64_t OldRes = Result; + Result *= 10; + Result += *Buffer-'0'; + if (Result < OldRes) { // Uh, oh, overflow detected!!! + ThrowException("constant bigger than 64 bits detected!"); + } + } + return Result; +} + + +#define YY_NEVER_INTERACTIVE 1 +/* Comments start with a ; and go till end of line */ +/* Variable(Def) identifiers start with a % sign */ +/* Label identifiers end with a colon */ +/* Quoted names can contain any character except " and \ */ +/* [PN]Integer: match positive and negative literal integer values that + * are preceeded by a '%' character. These represent unnamed variable slots. + */ +/* E[PN]Integer: match positive and negative literal integer values */ +#line 618 "Lexer.cpp" + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +#ifndef YY_NO_UNPUT +static inline void yyunput YY_PROTO(( int c, char *buf_ptr )); +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst char * )); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) \ + { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ + && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL int yylex YY_PROTO(( void )) +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + +#line 83 "Lexer.l" + + +#line 772 "Lexer.cpp" + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; + yy_state_ptr = yy_state_buf; + *yy_state_ptr++ = yy_current_state; +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 198 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + *yy_state_ptr++ = yy_current_state; + ++yy_cp; + } + while ( yy_current_state != 197 ); + +yy_find_action: + yy_current_state = *--yy_state_ptr; + yy_lp = yy_accept[yy_current_state]; + for ( ; ; ) /* until we find what rule we matched */ + { + if ( yy_lp && yy_lp < yy_accept[yy_current_state + 1] ) + { + yy_act = yy_acclist[yy_lp]; + { + yy_full_match = yy_cp; + break; + } + } + --yy_cp; + yy_current_state = *--yy_state_ptr; + yy_lp = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + + if ( yy_act != YY_END_OF_BUFFER ) + { + int yyl; + for ( yyl = 0; yyl < yyleng; ++yyl ) + if ( yytext[yyl] == '\n' ) + ++yylineno; + } + +do_action: /* This label is used only to access EOF actions. */ + + + switch ( yy_act ) + { /* beginning of action switch */ +case 1: +YY_RULE_SETUP +#line 85 "Lexer.l" +{ /* Ignore comments for now */ } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 87 "Lexer.l" +{ return BEGINTOK; } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 88 "Lexer.l" +{ return END; } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 89 "Lexer.l" +{ return TRUE; } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 90 "Lexer.l" +{ return FALSE; } + YY_BREAK +case 6: +YY_RULE_SETUP +#line 91 "Lexer.l" +{ return DECLARE; } + YY_BREAK +case 7: +YY_RULE_SETUP +#line 92 "Lexer.l" +{ return IMPLEMENTATION; } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 94 "Lexer.l" +{ cerr << "deprecated argument '-' used!\n"; return '-'; } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 95 "Lexer.l" +{ cerr << "deprecated type 'bb' used!\n"; llvmAsmlval.TypeVal = Type::LabelTy; return LABEL;} + YY_BREAK +case 10: +YY_RULE_SETUP +#line 97 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::VoidTy ; return VOID; } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 98 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::BoolTy ; return BOOL; } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 99 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::SByteTy ; return SBYTE; } + YY_BREAK +case 13: +YY_RULE_SETUP +#line 100 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::UByteTy ; return UBYTE; } + YY_BREAK +case 14: +YY_RULE_SETUP +#line 101 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::ShortTy ; return SHORT; } + YY_BREAK +case 15: +YY_RULE_SETUP +#line 102 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::UShortTy; return USHORT; } + YY_BREAK +case 16: +YY_RULE_SETUP +#line 103 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::IntTy ; return INT; } + YY_BREAK +case 17: +YY_RULE_SETUP +#line 104 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::UIntTy ; return UINT; } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 105 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::LongTy ; return LONG; } + YY_BREAK +case 19: +YY_RULE_SETUP +#line 106 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::ULongTy ; return ULONG; } + YY_BREAK +case 20: +YY_RULE_SETUP +#line 107 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::FloatTy ; return FLOAT; } + YY_BREAK +case 21: +YY_RULE_SETUP +#line 108 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::DoubleTy; return DOUBLE; } + YY_BREAK +case 22: +YY_RULE_SETUP +#line 110 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::TypeTy ; return TYPE; } + YY_BREAK +case 23: +YY_RULE_SETUP +#line 112 "Lexer.l" +{ llvmAsmlval.TypeVal = Type::LabelTy ; return LABEL; } + YY_BREAK +case 24: +YY_RULE_SETUP +#line 114 "Lexer.l" +{ RET_TOK(UnaryOpVal, Neg, NEG); } + YY_BREAK +case 25: +YY_RULE_SETUP +#line 115 "Lexer.l" +{ RET_TOK(UnaryOpVal, Not, NOT); } + YY_BREAK +case 26: +YY_RULE_SETUP +#line 117 "Lexer.l" +{ return PHI; } + YY_BREAK +case 27: +YY_RULE_SETUP +#line 118 "Lexer.l" +{ return CALL; } + YY_BREAK +case 28: +YY_RULE_SETUP +#line 119 "Lexer.l" +{ RET_TOK(BinaryOpVal, Add, ADD); } + YY_BREAK +case 29: +YY_RULE_SETUP +#line 120 "Lexer.l" +{ RET_TOK(BinaryOpVal, Sub, SUB); } + YY_BREAK +case 30: +YY_RULE_SETUP +#line 121 "Lexer.l" +{ RET_TOK(BinaryOpVal, Mul, MUL); } + YY_BREAK +case 31: +YY_RULE_SETUP +#line 122 "Lexer.l" +{ RET_TOK(BinaryOpVal, Div, DIV); } + YY_BREAK +case 32: +YY_RULE_SETUP +#line 123 "Lexer.l" +{ RET_TOK(BinaryOpVal, Rem, REM); } + YY_BREAK +case 33: +YY_RULE_SETUP +#line 124 "Lexer.l" +{ RET_TOK(BinaryOpVal, SetNE, SETNE); } + YY_BREAK +case 34: +YY_RULE_SETUP +#line 125 "Lexer.l" +{ RET_TOK(BinaryOpVal, SetEQ, SETEQ); } + YY_BREAK +case 35: +YY_RULE_SETUP +#line 126 "Lexer.l" +{ RET_TOK(BinaryOpVal, SetLT, SETLT); } + YY_BREAK +case 36: +YY_RULE_SETUP +#line 127 "Lexer.l" +{ RET_TOK(BinaryOpVal, SetGT, SETGT); } + YY_BREAK +case 37: +YY_RULE_SETUP +#line 128 "Lexer.l" +{ RET_TOK(BinaryOpVal, SetLE, SETLE); } + YY_BREAK +case 38: +YY_RULE_SETUP +#line 129 "Lexer.l" +{ RET_TOK(BinaryOpVal, SetGE, SETGE); } + YY_BREAK +case 39: +YY_RULE_SETUP +#line 131 "Lexer.l" +{ RET_TOK(TermOpVal, Ret, RET); } + YY_BREAK +case 40: +YY_RULE_SETUP +#line 132 "Lexer.l" +{ RET_TOK(TermOpVal, Br, BR); } + YY_BREAK +case 41: +YY_RULE_SETUP +#line 133 "Lexer.l" +{ RET_TOK(TermOpVal, Switch, SWITCH); } + YY_BREAK +case 42: +YY_RULE_SETUP +#line 136 "Lexer.l" +{ RET_TOK(MemOpVal, Malloc, MALLOC); } + YY_BREAK +case 43: +YY_RULE_SETUP +#line 137 "Lexer.l" +{ RET_TOK(MemOpVal, Alloca, ALLOCA); } + YY_BREAK +case 44: +YY_RULE_SETUP +#line 138 "Lexer.l" +{ RET_TOK(MemOpVal, Free, FREE); } + YY_BREAK +case 45: +YY_RULE_SETUP +#line 139 "Lexer.l" +{ RET_TOK(MemOpVal, Load, LOAD); } + YY_BREAK +case 46: +YY_RULE_SETUP +#line 140 "Lexer.l" +{ RET_TOK(MemOpVal, Store, STORE); } + YY_BREAK +case 47: +YY_RULE_SETUP +#line 141 "Lexer.l" +{ RET_TOK(MemOpVal, GetField, GETFIELD); } + YY_BREAK +case 48: +YY_RULE_SETUP +#line 142 "Lexer.l" +{ RET_TOK(MemOpVal, PutField, PUTFIELD); } + YY_BREAK +case 49: +YY_RULE_SETUP +#line 145 "Lexer.l" +{ llvmAsmlval.StrVal = strdup(yytext+1); return VAR_ID; } + YY_BREAK +case 50: +YY_RULE_SETUP +#line 146 "Lexer.l" +{ + yytext[strlen(yytext)-1] = 0; // nuke colon + llvmAsmlval.StrVal = strdup(yytext); + return LABELSTR; + } + YY_BREAK +case 51: +YY_RULE_SETUP +#line 152 "Lexer.l" +{ + yytext[strlen(yytext)-1] = 0; // nuke end quote + llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote + return STRINGCONSTANT; + } + YY_BREAK +case 52: +YY_RULE_SETUP +#line 159 "Lexer.l" +{ llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; } + YY_BREAK +case 53: +YY_RULE_SETUP +#line 160 "Lexer.l" +{ + uint64_t Val = atoull(yytext+1); + // +1: we have bigger negative range + if (Val > (uint64_t)INT64_MAX+1) + ThrowException("Constant too large for signed 64 bits!"); + llvmAsmlval.SInt64Val = -Val; + return ESINT64VAL; + } + YY_BREAK +case 54: +YY_RULE_SETUP +#line 170 "Lexer.l" +{ llvmAsmlval.UIntVal = atoull(yytext+1); return UINTVAL; } + YY_BREAK +case 55: +YY_RULE_SETUP +#line 171 "Lexer.l" +{ + uint64_t Val = atoull(yytext+2); + // +1: we have bigger negative range + if (Val > (uint64_t)INT32_MAX+1) + ThrowException("Constant too large for signed 32 bits!"); + llvmAsmlval.SIntVal = -Val; + return SINTVAL; + } + YY_BREAK +case 56: +YY_RULE_SETUP +#line 181 "Lexer.l" +{ /* Ignore whitespace */ } + YY_BREAK +case 57: +YY_RULE_SETUP +#line 182 "Lexer.l" +{ /*printf("'%s'", yytext);*/ return yytext[0]; } + YY_BREAK +case 58: +YY_RULE_SETUP +#line 184 "Lexer.l" +YY_FATAL_ERROR( "flex scanner jammed" ); + YY_BREAK +#line 1175 "Lexer.cpp" + case YY_STATE_EOF(INITIAL): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_c_buf_p; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + { + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +static yy_state_type yy_get_previous_state() + { + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = yy_start; + yy_state_ptr = yy_state_buf; + *yy_state_ptr++ = yy_current_state; + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 198 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + *yy_state_ptr++ = yy_current_state; + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif + { + register int yy_is_jam; + + register YY_CHAR yy_c = 1; + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 198 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 197); + if ( ! yy_is_jam ) + *yy_state_ptr++ = yy_current_state; + + return yy_is_jam ? 0 : yy_current_state; + } + + +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static inline void yyunput( int c, register char *yy_bp ) +#else +static inline void yyunput( c, yy_bp ) +int c; +register char *yy_bp; +#endif + { + register char *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register char *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register char *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + if ( c == '\n' ) + --yylineno; + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +#endif /* ifndef YY_NO_UNPUT */ + + +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ + *yy_c_buf_p = '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + + if ( c == '\n' ) + ++yylineno; + + return c; + } + + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + +#ifndef YY_ALWAYS_INTERACTIVE +#ifndef YY_NEVER_INTERACTIVE +extern int isatty YY_PROTO(( int )); +#endif +#endif + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif + } + + +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } + + +#ifndef YY_NO_SCAN_BUFFER +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +char *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +#endif + + +#ifndef YY_NO_SCAN_STRING +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst char *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_bytes( yy_str, len ); + } +#endif + + +#ifndef YY_NO_SCAN_BYTES +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) +#else +YY_BUFFER_STATE yy_scan_bytes( bytes, len ) +yyconst char *bytes; +int len; +#endif + { + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (char *) yy_flex_alloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = bytes[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +#endif + + +#ifndef YY_NO_PUSH_STATE +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +static void yy_pop_state() + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +static int yy_top_state() + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +char msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +char *s1; +yyconst char *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst char *s ) +#else +static int yy_flex_strlen( s ) +yyconst char *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static inline void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static inline void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif +#line 184 "Lexer.l" + diff --git a/lib/AsmParser/Lexer.l b/lib/AsmParser/Lexer.l new file mode 100644 index 00000000000..89d776bbb0c --- /dev/null +++ b/lib/AsmParser/Lexer.l @@ -0,0 +1,184 @@ +/*===-- Lexer.l - Scanner for llvm assembly files ----------------*- C++ -*--=// +// +// This file implements the flex scanner for LLVM assembly languages files. +// +//===------------------------------------------------------------------------=*/ + +%option prefix="llvmAsm" +%option yylineno +%option nostdinit +%option never-interactive +%option batch +%option noyywrap +%option nodefault +%option 8bit +%option outfile="Lexer.cpp" +%option ecs +%option noreject +%option noyymore + +%{ +#include "ParserInternals.h" +#include "llvm/BasicBlock.h" +#include "llvm/Method.h" +#include "llvm/Module.h" +#include +#include "llvmAsmParser.h" + +#define RET_TOK(type, Enum, sym) \ + llvmAsmlval.type = Instruction::Enum; return sym + + +// TODO: All of the static identifiers are figured out by the lexer, +// these should be hashed. + + +// atoull - Convert an ascii string of decimal digits into the unsigned long +// long representation... this does not have to do input error checking, +// because we know that the input will be matched by a suitable regex... +// +uint64_t atoull(const char *Buffer) { + uint64_t Result = 0; + for (; *Buffer; Buffer++) { + uint64_t OldRes = Result; + Result *= 10; + Result += *Buffer-'0'; + if (Result < OldRes) { // Uh, oh, overflow detected!!! + ThrowException("constant bigger than 64 bits detected!"); + } + } + return Result; +} + + +#define YY_NEVER_INTERACTIVE 1 +%} + + + +/* Comments start with a ; and go till end of line */ +Comment ;.* + +/* Variable(Def) identifiers start with a % sign */ +VarID %[a-zA-Z$._][a-zA-Z$._0-9]* + +/* Label identifiers end with a colon */ +Label [a-zA-Z$._0-9]+: + +/* Quoted names can contain any character except " and \ */ +StringConstant \"[^\"]+\" + + +/* [PN]Integer: match positive and negative literal integer values that + * are preceeded by a '%' character. These represent unnamed variable slots. + */ +EPInteger %[0-9]+ +ENInteger %-[0-9]+ + + +/* E[PN]Integer: match positive and negative literal integer values */ +PInteger [0-9]+ +NInteger -[0-9]+ + +%% + +{Comment} { /* Ignore comments for now */ } + +begin { return BEGINTOK; } +end { return END; } +true { return TRUE; } +false { return FALSE; } +declare { return DECLARE; } +implementation { return IMPLEMENTATION; } + +- { cerr << "deprecated argument '-' used!\n"; return '-'; } +bb { cerr << "deprecated type 'bb' used!\n"; llvmAsmlval.TypeVal = Type::LabelTy; return LABEL;} + +void { llvmAsmlval.TypeVal = Type::VoidTy ; return VOID; } +bool { llvmAsmlval.TypeVal = Type::BoolTy ; return BOOL; } +sbyte { llvmAsmlval.TypeVal = Type::SByteTy ; return SBYTE; } +ubyte { llvmAsmlval.TypeVal = Type::UByteTy ; return UBYTE; } +short { llvmAsmlval.TypeVal = Type::ShortTy ; return SHORT; } +ushort { llvmAsmlval.TypeVal = Type::UShortTy; return USHORT; } +int { llvmAsmlval.TypeVal = Type::IntTy ; return INT; } +uint { llvmAsmlval.TypeVal = Type::UIntTy ; return UINT; } +long { llvmAsmlval.TypeVal = Type::LongTy ; return LONG; } +ulong { llvmAsmlval.TypeVal = Type::ULongTy ; return ULONG; } +float { llvmAsmlval.TypeVal = Type::FloatTy ; return FLOAT; } +double { llvmAsmlval.TypeVal = Type::DoubleTy; return DOUBLE; } + +type { llvmAsmlval.TypeVal = Type::TypeTy ; return TYPE; } + +label { llvmAsmlval.TypeVal = Type::LabelTy ; return LABEL; } + +neg { RET_TOK(UnaryOpVal, Neg, NEG); } +not { RET_TOK(UnaryOpVal, Not, NOT); } + +phi { return PHI; } +call { return CALL; } +add { RET_TOK(BinaryOpVal, Add, ADD); } +sub { RET_TOK(BinaryOpVal, Sub, SUB); } +mul { RET_TOK(BinaryOpVal, Mul, MUL); } +div { RET_TOK(BinaryOpVal, Div, DIV); } +rem { RET_TOK(BinaryOpVal, Rem, REM); } +setne { RET_TOK(BinaryOpVal, SetNE, SETNE); } +seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); } +setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); } +setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); } +setle { RET_TOK(BinaryOpVal, SetLE, SETLE); } +setge { RET_TOK(BinaryOpVal, SetGE, SETGE); } + +ret { RET_TOK(TermOpVal, Ret, RET); } +br { RET_TOK(TermOpVal, Br, BR); } +switch { RET_TOK(TermOpVal, Switch, SWITCH); } + + +malloc { RET_TOK(MemOpVal, Malloc, MALLOC); } +alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); } +free { RET_TOK(MemOpVal, Free, FREE); } +load { RET_TOK(MemOpVal, Load, LOAD); } +store { RET_TOK(MemOpVal, Store, STORE); } +getfield { RET_TOK(MemOpVal, GetField, GETFIELD); } +putfield { RET_TOK(MemOpVal, PutField, PUTFIELD); } + + +{VarID} { llvmAsmlval.StrVal = strdup(yytext+1); return VAR_ID; } +{Label} { + yytext[strlen(yytext)-1] = 0; // nuke colon + llvmAsmlval.StrVal = strdup(yytext); + return LABELSTR; + } + +{StringConstant} { + yytext[strlen(yytext)-1] = 0; // nuke end quote + llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote + return STRINGCONSTANT; + } + + +{PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; } +{NInteger} { + uint64_t Val = atoull(yytext+1); + // +1: we have bigger negative range + if (Val > (uint64_t)INT64_MAX+1) + ThrowException("Constant too large for signed 64 bits!"); + llvmAsmlval.SInt64Val = -Val; + return ESINT64VAL; + } + + +{EPInteger} { llvmAsmlval.UIntVal = atoull(yytext+1); return UINTVAL; } +{ENInteger} { + uint64_t Val = atoull(yytext+2); + // +1: we have bigger negative range + if (Val > (uint64_t)INT32_MAX+1) + ThrowException("Constant too large for signed 32 bits!"); + llvmAsmlval.SIntVal = -Val; + return SINTVAL; + } + + +[ \t\n] { /* Ignore whitespace */ } +. { /*printf("'%s'", yytext);*/ return yytext[0]; } + +%% diff --git a/lib/AsmParser/Makefile b/lib/AsmParser/Makefile new file mode 100644 index 00000000000..5bec1f46892 --- /dev/null +++ b/lib/AsmParser/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = asmparser + +include $(LEVEL)/Makefile.common + diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp new file mode 100644 index 00000000000..57c831e9e17 --- /dev/null +++ b/lib/AsmParser/Parser.cpp @@ -0,0 +1,84 @@ +//===- Parser.cpp - Main dispatch module for the Parser library -------------=== +// +// This library implements the functionality defined in llvm/assembly/parser.h +// +//===------------------------------------------------------------------------=== + +#include "llvm/Analysis/Verifier.h" +#include "llvm/Module.h" +#include "ParserInternals.h" +#include // for sprintf + +// The useful interface defined by this file... Parse an ascii file, and return +// the internal representation in a nice slice'n'dice'able representation. +// +Module *ParseAssemblyFile(const ToolCommandLine &Opts) throw (ParseException) { + FILE *F = stdin; + + if (Opts.getInputFilename() != "-") + F = fopen(Opts.getInputFilename().c_str(), "r"); + + if (F == 0) { + throw ParseException(Opts, string("Could not open file '") + + Opts.getInputFilename() + "'"); + } + + // TODO: If this throws an exception, F is not closed. + Module *Result = RunVMAsmParser(Opts, F); + + if (F != stdin) + fclose(F); + + if (Result) { // Check to see that it is valid... + vector Errors; + if (verify(Result, Errors)) { + delete Result; Result = 0; + string Message; + + for (unsigned i = 0; i < Errors.size(); i++) + Message += Errors[i] + "\n"; + + throw ParseException(Opts, Message); + } + } + return Result; +} + + +//===------------------------------------------------------------------------=== +// ParseException Class +//===------------------------------------------------------------------------=== + + +ParseException::ParseException(const ToolCommandLine &opts, + const string &message, int lineNo, int colNo) + : Opts(opts), Message(message) { + LineNo = lineNo; ColumnNo = colNo; +} + +ParseException::ParseException(const ParseException &E) + : Opts(E.Opts), Message(E.Message) { + LineNo = E.LineNo; + ColumnNo = E.ColumnNo; +} + +const string ParseException::getMessage() const { // Includes info from options + string Result; + char Buffer[10]; + + if (Opts.getInputFilename() == "-") + Result += ""; + else + Result += Opts.getInputFilename(); + + if (LineNo != -1) { + sprintf(Buffer, "%d", LineNo); + Result += string(":") + Buffer; + if (ColumnNo != -1) { + sprintf(Buffer, "%d", ColumnNo); + Result += string(",") + Buffer; + } + } + + return Result + ": " + Message; +} diff --git a/lib/AsmParser/ParserInternals.h b/lib/AsmParser/ParserInternals.h new file mode 100644 index 00000000000..2856c9b08cd --- /dev/null +++ b/lib/AsmParser/ParserInternals.h @@ -0,0 +1,159 @@ +//===-- ParserInternals.h - Definitions internal to the parser ---*- C++ -*--=// +// +// This header file defines the various variables that are shared among the +// different components of the parser... +// +//===----------------------------------------------------------------------===// + +#ifndef PARSER_INTERNALS_H +#define PARSER_INTERNALS_H + +#include +#define __STDC_LIMIT_MACROS + +#include "llvm/InstrTypes.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "llvm/Method.h" +#include "llvm/Type.h" +#include "llvm/Assembly/Parser.h" +#include "llvm/Tools/CommandLine.h" +#include "llvm/Tools/StringExtras.h" + +class Module; + +// Global variables exported from the lexer... +extern FILE *llvmAsmin; +extern int llvmAsmlineno; + +// Globals exported by the parser... +extern const ToolCommandLine *CurOptions; +Module *RunVMAsmParser(const ToolCommandLine &Opts, FILE *F); + + +// ThrowException - Wrapper around the ParseException class that automatically +// fills in file line number and column number and options info. +// +// This also helps me because I keep typing 'throw new ParseException' instead +// of just 'throw ParseException'... sigh... +// +static inline void ThrowException(const string &message) { + // TODO: column number in exception + throw ParseException(*CurOptions, message, llvmAsmlineno); +} + +// ValID - Represents a reference of a definition of some sort. This may either +// be a numeric reference or a symbolic (%var) reference. This is just a +// discriminated union. +// +// Note that I can't implement this class in a straight forward manner with +// constructors and stuff because it goes in a union, and GCC doesn't like +// putting classes with ctor's in unions. :( +// +struct ValID { + int Type; // 0 = number, 1 = name, 2 = const pool, + // 3 = unsigned const pool, 4 = const string + union { + int Num; // If it's a numeric reference + char *Name; // If it's a named reference. Memory must be free'd. + int64_t ConstPool64; // Constant pool reference. This is the value + uint64_t UConstPool64;// Unsigned constant pool reference. + }; + + static ValID create(int Num) { + ValID D; D.Type = 0; D.Num = Num; return D; + } + + static ValID create(char *Name) { + ValID D; D.Type = 1; D.Name = Name; return D; + } + + static ValID create(int64_t Val) { + ValID D; D.Type = 2; D.ConstPool64 = Val; return D; + } + + static ValID create(uint64_t Val) { + ValID D; D.Type = 3; D.UConstPool64 = Val; return D; + } + + static ValID create_conststr(char *Name) { + ValID D; D.Type = 4; D.Name = Name; return D; + } + + inline void destroy() { + if (Type == 1 || Type == 4) free(Name); // Free this strdup'd memory... + } + + inline ValID copy() const { + if (Type != 1 && Type != 4) return *this; + ValID Result = *this; + Result.Name = strdup(Name); + return Result; + } + + inline string getName() const { + switch (Type) { + case 0: return string("#") + itostr(Num); + case 1: return Name; + case 4: return string("\"") + Name + string("\""); + default: return string("%") + itostr(ConstPool64); + } + } +}; + + + +template +class PlaceholderDef : public SuperType { + ValID D; + // TODO: Placeholder def should hold Line #/Column # of definition in case + // there is an error resolving the defintition! +public: + PlaceholderDef(const Type *Ty, const ValID &d) : SuperType(Ty), D(d) {} + ValID &getDef() { return D; } +}; + +struct InstPlaceHolderHelper : public Instruction { + InstPlaceHolderHelper(const Type *Ty) : Instruction(Ty, UserOp1, "") {} + + virtual Instruction *clone() const { abort(); } + + inline virtual void dropAllReferences() {} + virtual string getOpcode() const { return "placeholder"; } + + // No "operands"... + virtual Value *getOperand(unsigned i) { return 0; } + virtual const Value *getOperand(unsigned i) const { return 0; } + virtual bool setOperand(unsigned i, Value *Val) { return false; } + virtual unsigned getNumOperands() const { return 0; } +}; + +struct BBPlaceHolderHelper : public BasicBlock { + BBPlaceHolderHelper(const Type *Ty) : BasicBlock() { + assert(Ty->isLabelType()); + } +}; + +struct MethPlaceHolderHelper : public Method { + MethPlaceHolderHelper(const Type *Ty) + : Method((const MethodType*)Ty) { + assert(Ty->isMethodType() && "Method placeholders must be method types!"); + } +}; + +typedef PlaceholderDef DefPlaceHolder; +typedef PlaceholderDef BBPlaceHolder; +typedef PlaceholderDef MethPlaceHolder; +//typedef PlaceholderDef ModulePlaceHolder; + +static inline ValID &getValIDFromPlaceHolder(Value *Def) { + switch (Def->getType()->getPrimitiveID()) { + case Type::LabelTyID: return ((BBPlaceHolder*)Def)->getDef(); + case Type::MethodTyID: return ((MethPlaceHolder*)Def)->getDef(); +//case Type::ModuleTyID: return ((ModulePlaceHolder*)Def)->getDef(); + default: return ((DefPlaceHolder*)Def)->getDef(); + } +} + +#endif diff --git a/lib/AsmParser/llvmAsmParser.cpp b/lib/AsmParser/llvmAsmParser.cpp new file mode 100644 index 00000000000..e79f1bf5f66 --- /dev/null +++ b/lib/AsmParser/llvmAsmParser.cpp @@ -0,0 +1,2202 @@ + +/* A Bison parser, made from llvmAsmParser.y + by GNU Bison version 1.28 */ + +#define YYBISON 1 /* Identify Bison output. */ + +#define yyparse llvmAsmparse +#define yylex llvmAsmlex +#define yyerror llvmAsmerror +#define yylval llvmAsmlval +#define yychar llvmAsmchar +#define yydebug llvmAsmdebug +#define yynerrs llvmAsmnerrs +#define ESINT64VAL 257 +#define EUINT64VAL 258 +#define SINTVAL 259 +#define UINTVAL 260 +#define VOID 261 +#define BOOL 262 +#define SBYTE 263 +#define UBYTE 264 +#define SHORT 265 +#define USHORT 266 +#define INT 267 +#define UINT 268 +#define LONG 269 +#define ULONG 270 +#define FLOAT 271 +#define DOUBLE 272 +#define STRING 273 +#define TYPE 274 +#define LABEL 275 +#define VAR_ID 276 +#define LABELSTR 277 +#define STRINGCONSTANT 278 +#define IMPLEMENTATION 279 +#define TRUE 280 +#define FALSE 281 +#define BEGINTOK 282 +#define END 283 +#define DECLARE 284 +#define PHI 285 +#define CALL 286 +#define RET 287 +#define BR 288 +#define SWITCH 289 +#define NEG 290 +#define NOT 291 +#define TOINT 292 +#define TOUINT 293 +#define ADD 294 +#define SUB 295 +#define MUL 296 +#define DIV 297 +#define REM 298 +#define SETLE 299 +#define SETGE 300 +#define SETLT 301 +#define SETGT 302 +#define SETEQ 303 +#define SETNE 304 +#define MALLOC 305 +#define ALLOCA 306 +#define FREE 307 +#define LOAD 308 +#define STORE 309 +#define GETFIELD 310 +#define PUTFIELD 311 + +#line 13 "llvmAsmParser.y" + +#include "ParserInternals.h" +#include "llvm/BasicBlock.h" +#include "llvm/Method.h" +#include "llvm/SymbolTable.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Parser.h" +#include "llvm/ConstantPool.h" +#include "llvm/iTerminators.h" +#include "llvm/iMemory.h" +#include +#include // Get definition of pair class +#include // This embarasment is due to our flex lexer... + +int yyerror(char *ErrorMsg); // Forward declarations to prevent "implicit +int yylex(); // declaration" of xxx warnings. +int yyparse(); + +static Module *ParserResult; +const ToolCommandLine *CurOptions = 0; + +// This contains info used when building the body of a method. It is destroyed +// when the method is completed. +// +typedef vector ValueList; // Numbered defs +static void ResolveDefinitions(vector &LateResolvers); + +static struct PerModuleInfo { + Module *CurrentModule; + vector Values; // Module level numbered definitions + vector LateResolveValues; + + void ModuleDone() { + // If we could not resolve some blocks at parsing time (forward branches) + // resolve the branches now... + ResolveDefinitions(LateResolveValues); + + Values.clear(); // Clear out method local definitions + CurrentModule = 0; + } +} CurModule; + +static struct PerMethodInfo { + Method *CurrentMethod; // Pointer to current method being created + + vector Values; // Keep track of numbered definitions + vector LateResolveValues; + + inline PerMethodInfo() { + CurrentMethod = 0; + } + + inline ~PerMethodInfo() {} + + inline void MethodStart(Method *M) { + CurrentMethod = M; + } + + void MethodDone() { + // If we could not resolve some blocks at parsing time (forward branches) + // resolve the branches now... + ResolveDefinitions(LateResolveValues); + + Values.clear(); // Clear out method local definitions + CurrentMethod = 0; + } +} CurMeth; // Info for the current method... + + +//===----------------------------------------------------------------------===// +// Code to handle definitions of all the types +//===----------------------------------------------------------------------===// + +static void InsertValue(Value *D, vector &ValueTab = CurMeth.Values) { + if (!D->hasName()) { // Is this a numbered definition? + unsigned type = D->getType()->getUniqueID(); + if (ValueTab.size() <= type) + ValueTab.resize(type+1, ValueList()); + //printf("Values[%d][%d] = %d\n", type, ValueTab[type].size(), D); + ValueTab[type].push_back(D); + } +} + +static Value *getVal(const Type *Type, ValID &D, + bool DoNotImprovise = false) { + switch (D.Type) { + case 0: { // Is it a numbered definition? + unsigned type = Type->getUniqueID(); + unsigned Num = (unsigned)D.Num; + + // Module constants occupy the lowest numbered slots... + if (type < CurModule.Values.size()) { + if (Num < CurModule.Values[type].size()) + return CurModule.Values[type][Num]; + + Num -= CurModule.Values[type].size(); + } + + // Make sure that our type is within bounds + if (CurMeth.Values.size() <= type) + break; + + // Check that the number is within bounds... + if (CurMeth.Values[type].size() <= Num) + break; + + return CurMeth.Values[type][Num]; + } + case 1: { // Is it a named definition? + string Name(D.Name); + SymbolTable *SymTab = 0; + if (CurMeth.CurrentMethod) + SymTab = CurMeth.CurrentMethod->getSymbolTable(); + Value *N = SymTab ? SymTab->lookup(Type, Name) : 0; + + if (N == 0) { + SymTab = CurModule.CurrentModule->getSymbolTable(); + if (SymTab) + N = SymTab->lookup(Type, Name); + if (N == 0) break; + } + + D.destroy(); // Free old strdup'd memory... + return N; + } + + case 2: // Is it a constant pool reference?? + case 3: // Is it an unsigned const pool reference? + case 4:{ // Is it a string const pool reference? + ConstPoolVal *CPV = 0; + + // Check to make sure that "Type" is an integral type, and that our + // value will fit into the specified type... + switch (D.Type) { + case 2: + if (Type == Type::BoolTy) { // Special handling for boolean data + CPV = new ConstPoolBool(D.ConstPool64 != 0); + } else { + if (!ConstPoolSInt::isValueValidForType(Type, D.ConstPool64)) + ThrowException("Symbolic constant pool reference is invalid!"); + CPV = new ConstPoolSInt(Type, D.ConstPool64); + } + break; + case 3: + if (!ConstPoolUInt::isValueValidForType(Type, D.UConstPool64)) { + if (!ConstPoolSInt::isValueValidForType(Type, D.ConstPool64)) { + ThrowException("Symbolic constant pool reference is invalid!"); + } else { // This is really a signed reference. Transmogrify. + CPV = new ConstPoolSInt(Type, D.ConstPool64); + } + } else { + CPV = new ConstPoolUInt(Type, D.UConstPool64); + } + break; + case 4: + cerr << "FIXME: TODO: String constants [sbyte] not implemented yet!\n"; + abort(); + //CPV = new ConstPoolString(D.Name); + D.destroy(); // Free the string memory + break; + } + assert(CPV && "How did we escape creating a constant??"); + + // Scan through the constant table and see if we already have loaded this + // constant. + // + ConstantPool &CP = CurMeth.CurrentMethod ? + CurMeth.CurrentMethod->getConstantPool() : + CurModule.CurrentModule->getConstantPool(); + ConstPoolVal *C = CP.find(CPV); // Already have this constant? + if (C) { + delete CPV; // Didn't need this after all, oh well. + return C; // Yup, we already have one, recycle it! + } + CP.insert(CPV); + + // Success, everything is kosher. Lets go! + return CPV; + } // End of case 2,3,4 + } // End of switch + + + // If we reached here, we referenced either a symbol that we don't know about + // or an id number that hasn't been read yet. We may be referencing something + // forward, so just create an entry to be resolved later and get to it... + // + if (DoNotImprovise) return 0; // Do we just want a null to be returned? + + // TODO: Attempt to coallecse nodes that are the same with previous ones. + Value *d = 0; + switch (Type->getPrimitiveID()) { + case Type::LabelTyID: d = new BBPlaceHolder(Type, D); break; + case Type::MethodTyID: + d = new MethPlaceHolder(Type, D); + InsertValue(d, CurModule.LateResolveValues); + return d; +//case Type::ClassTyID: d = new ClassPlaceHolder(Type, D); break; + default: d = new DefPlaceHolder(Type, D); break; + } + + assert(d != 0 && "How did we not make something?"); + InsertValue(d, CurMeth.LateResolveValues); + return d; +} + + +//===----------------------------------------------------------------------===// +// Code to handle forward references in instructions +//===----------------------------------------------------------------------===// +// +// This code handles the late binding needed with statements that reference +// values not defined yet... for example, a forward branch, or the PHI node for +// a loop body. +// +// This keeps a table (CurMeth.LateResolveValues) of all such forward references +// and back patchs after we are done. +// + +// ResolveDefinitions - If we could not resolve some defs at parsing +// time (forward branches, phi functions for loops, etc...) resolve the +// defs now... +// +static void ResolveDefinitions(vector &LateResolvers) { + // Loop over LateResolveDefs fixing up stuff that couldn't be resolved + for (unsigned ty = 0; ty < LateResolvers.size(); ty++) { + while (!LateResolvers[ty].empty()) { + Value *V = LateResolvers[ty].back(); + LateResolvers[ty].pop_back(); + ValID &DID = getValIDFromPlaceHolder(V); + + Value *TheRealValue = getVal(Type::getUniqueIDType(ty), DID, true); + + if (TheRealValue == 0 && DID.Type == 1) + ThrowException("Reference to an invalid definition: '" +DID.getName() + + "' of type '" + V->getType()->getName() + "'"); + else if (TheRealValue == 0) + ThrowException("Reference to an invalid definition: #" +itostr(DID.Num)+ + " of type '" + V->getType()->getName() + "'"); + + V->replaceAllUsesWith(TheRealValue); + assert(V->use_empty()); + delete V; + } + } + + LateResolvers.clear(); +} + +// addConstValToConstantPool - This code is used to insert a constant into the +// current constant pool. This is designed to make maximal (but not more than +// possible) reuse (merging) of constants in the constant pool. This means that +// multiple references to %4, for example will all get merged. +// +static ConstPoolVal *addConstValToConstantPool(ConstPoolVal *C) { + vector &ValTab = CurMeth.CurrentMethod ? + CurMeth.Values : CurModule.Values; + ConstantPool &CP = CurMeth.CurrentMethod ? + CurMeth.CurrentMethod->getConstantPool() : + CurModule.CurrentModule->getConstantPool(); + + if (ConstPoolVal *CPV = CP.find(C)) { + // Constant already in constant pool. Try to merge the two constants + if (CPV->hasName() && !C->hasName()) { + // Merge the two values, we inherit the existing CPV's name. + // InsertValue requires that the value have no name to insert correctly + // (because we want to fill the slot this constant would have filled) + // + string Name = CPV->getName(); + CPV->setName(""); + InsertValue(CPV, ValTab); + CPV->setName(Name); + delete C; + return CPV; + } else if (!CPV->hasName() && C->hasName()) { + // If we have a name on this value and there isn't one in the const + // pool val already, propogate it. + // + CPV->setName(C->getName()); + delete C; // Sorry, you're toast + return CPV; + } else if (CPV->hasName() && C->hasName()) { + // Both values have distinct names. We cannot merge them. + CP.insert(C); + InsertValue(C, ValTab); + return C; + } else if (!CPV->hasName() && !C->hasName()) { + // Neither value has a name, trivially merge them. + InsertValue(CPV, ValTab); + delete C; + return CPV; + } + + assert(0 && "Not reached!"); + return 0; + } else { // No duplication of value. + CP.insert(C); + InsertValue(C, ValTab); + return C; + } +} + +//===----------------------------------------------------------------------===// +// RunVMAsmParser - Define an interface to this parser +//===----------------------------------------------------------------------===// +// +Module *RunVMAsmParser(const ToolCommandLine &Opts, FILE *F) { + llvmAsmin = F; + CurOptions = &Opts; + llvmAsmlineno = 1; // Reset the current line number... + + CurModule.CurrentModule = new Module(); // Allocate a new module to read + yyparse(); // Parse the file. + Module *Result = ParserResult; + CurOptions = 0; + llvmAsmin = stdin; // F is about to go away, don't use it anymore... + ParserResult = 0; + + return Result; +} + + +#line 337 "llvmAsmParser.y" +typedef union { + Module *ModuleVal; + Method *MethodVal; + MethodArgument *MethArgVal; + BasicBlock *BasicBlockVal; + TerminatorInst *TermInstVal; + Instruction *InstVal; + ConstPoolVal *ConstVal; + const Type *TypeVal; + + list *MethodArgList; + list *ValueList; + list *TypeList; + list > *JumpTable; + vector *ConstVector; + + int64_t SInt64Val; + uint64_t UInt64Val; + int SIntVal; + unsigned UIntVal; + + char *StrVal; // This memory is allocated by strdup! + ValID ValIDVal; // May contain memory allocated by strdup + + Instruction::UnaryOps UnaryOpVal; + Instruction::BinaryOps BinaryOpVal; + Instruction::TermOps TermOpVal; + Instruction::MemoryOps MemOpVal; +} YYSTYPE; +#include + +#ifndef __cplusplus +#ifndef __STDC__ +#define const +#endif +#endif + + + +#define YYFINAL 220 +#define YYFLAG -32768 +#define YYNTBASE 68 + +#define YYTRANSLATE(x) ((unsigned)(x) <= 311 ? yytranslate[x] : 103) + +static const char yytranslate[] = { 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 65, + 66, 67, 2, 64, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 58, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 59, 2, 60, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 61, + 2, 2, 62, 2, 63, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 1, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 57 +}; + +#if YYDEBUG != 0 +static const short yyprhs[] = { 0, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, + 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, + 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, + 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 93, 94, 97, 100, + 103, 106, 109, 112, 119, 125, 134, 142, 149, 154, + 158, 160, 164, 165, 167, 170, 173, 175, 176, 179, + 183, 185, 187, 188, 194, 198, 201, 203, 205, 207, + 209, 211, 213, 215, 217, 219, 224, 228, 232, 238, + 242, 245, 248, 250, 254, 257, 260, 263, 267, 270, + 271, 275, 278, 282, 292, 302, 309, 315, 318, 321, + 325, 327, 328, 334, 338, 341, 348, 350, 353, 359, + 362, 368 +}; + +static const short yyrhs[] = { 5, + 0, 6, 0, 3, 0, 4, 0, 8, 0, 9, + 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, + 0, 15, 0, 16, 0, 17, 0, 18, 0, 19, + 0, 20, 0, 21, 0, 70, 0, 7, 0, 36, + 0, 37, 0, 38, 0, 39, 0, 40, 0, 41, + 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, + 0, 47, 0, 48, 0, 49, 0, 50, 0, 15, + 0, 13, 0, 11, 0, 9, 0, 16, 0, 14, + 0, 12, 0, 10, 0, 74, 0, 75, 0, 22, + 58, 0, 0, 74, 69, 0, 75, 4, 0, 8, + 26, 0, 8, 27, 0, 19, 24, 0, 20, 70, + 0, 59, 70, 60, 59, 79, 60, 0, 59, 70, + 60, 59, 60, 0, 59, 4, 61, 70, 60, 59, + 79, 60, 0, 59, 4, 61, 70, 60, 59, 60, + 0, 62, 92, 63, 62, 79, 63, 0, 62, 63, + 62, 63, 0, 79, 64, 78, 0, 78, 0, 80, + 77, 78, 0, 0, 82, 0, 82, 89, 0, 80, + 25, 0, 22, 0, 0, 70, 83, 0, 84, 64, + 85, 0, 84, 0, 85, 0, 0, 71, 24, 65, + 86, 66, 0, 87, 80, 28, 0, 93, 29, 0, + 3, 0, 4, 0, 26, 0, 27, 0, 24, 0, + 68, 0, 22, 0, 90, 0, 91, 0, 71, 65, + 92, 66, 0, 71, 65, 66, 0, 59, 70, 60, + 0, 59, 4, 61, 70, 60, 0, 62, 92, 63, + 0, 62, 63, 0, 70, 67, 0, 70, 0, 92, + 64, 70, 0, 93, 94, 0, 88, 94, 0, 95, + 96, 0, 23, 95, 96, 0, 95, 98, 0, 0, + 33, 70, 91, 0, 33, 7, 0, 34, 21, 91, + 0, 34, 8, 91, 64, 21, 91, 64, 21, 91, + 0, 35, 76, 91, 64, 21, 91, 59, 97, 60, + 0, 97, 76, 90, 64, 21, 91, 0, 76, 90, + 64, 21, 91, 0, 77, 101, 0, 70, 91, 0, + 99, 64, 91, 0, 99, 0, 0, 73, 70, 91, + 64, 91, 0, 72, 70, 91, 0, 31, 99, 0, + 32, 70, 91, 65, 100, 66, 0, 102, 0, 51, + 70, 0, 51, 70, 64, 14, 91, 0, 52, 70, + 0, 52, 70, 64, 14, 91, 0, 53, 70, 91, + 0 +}; + +#endif + +#if YYDEBUG != 0 +static const short yyrline[] = { 0, + 433, 434, 441, 442, 453, 453, 453, 453, 453, 453, + 453, 454, 454, 454, 454, 454, 454, 454, 457, 457, + 462, 462, 462, 462, 463, 463, 463, 463, 463, 464, + 464, 464, 464, 464, 464, 468, 468, 468, 468, 469, + 469, 469, 469, 470, 470, 472, 475, 479, 484, 489, + 492, 495, 501, 504, 517, 521, 539, 546, 554, 568, + 571, 577, 585, 596, 601, 606, 615, 615, 617, 625, + 629, 634, 637, 641, 668, 672, 681, 684, 687, 690, + 693, 698, 701, 704, 711, 719, 724, 728, 731, 734, + 739, 742, 747, 751, 756, 760, 769, 774, 783, 787, + 791, 794, 797, 800, 805, 816, 824, 834, 842, 846, + 852, 852, 854, 859, 864, 873, 910, 914, 919, 929, + 934, 944 +}; +#endif + + +#if YYDEBUG != 0 || defined (YYERROR_VERBOSE) + +static const char * const yytname[] = { "$","error","$undefined.","ESINT64VAL", +"EUINT64VAL","SINTVAL","UINTVAL","VOID","BOOL","SBYTE","UBYTE","SHORT","USHORT", +"INT","UINT","LONG","ULONG","FLOAT","DOUBLE","STRING","TYPE","LABEL","VAR_ID", +"LABELSTR","STRINGCONSTANT","IMPLEMENTATION","TRUE","FALSE","BEGINTOK","END", +"DECLARE","PHI","CALL","RET","BR","SWITCH","NEG","NOT","TOINT","TOUINT","ADD", +"SUB","MUL","DIV","REM","SETLE","SETGE","SETLT","SETGT","SETEQ","SETNE","MALLOC", +"ALLOCA","FREE","LOAD","STORE","GETFIELD","PUTFIELD","'='","'['","']'","'x'", +"'{'","'}'","','","'('","')'","'*'","INTVAL","EINT64VAL","Types","TypesV","UnaryOps", +"BinaryOps","SIntType","UIntType","IntType","OptAssign","ConstVal","ConstVector", +"ConstPool","Module","MethodList","OptVAR_ID","ArgVal","ArgListH","ArgList", +"MethodHeaderH","MethodHeader","Method","ConstValueRef","ValueRef","TypeList", +"BasicBlockList","BasicBlock","InstructionList","BBTerminatorInst","JumpTable", +"Inst","ValueRefList","ValueRefListE","InstVal","MemoryInst", NULL +}; +#endif + +static const short yyr1[] = { 0, + 68, 68, 69, 69, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, + 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, + 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, + 75, 75, 75, 76, 76, 77, 77, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 79, + 79, 80, 80, 81, 82, 82, 83, 83, 84, 85, + 85, 86, 86, 87, 88, 89, 90, 90, 90, 90, + 90, 91, 91, 91, 70, 70, 70, 70, 70, 70, + 70, 70, 92, 92, 93, 93, 94, 94, 95, 95, + 96, 96, 96, 96, 96, 97, 97, 98, 99, 99, + 100, 100, 101, 101, 101, 101, 101, 102, 102, 102, + 102, 102 +}; + +static const short yyr2[] = { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 2, 0, 2, 2, 2, + 2, 2, 2, 6, 5, 8, 7, 6, 4, 3, + 1, 3, 0, 1, 2, 2, 1, 0, 2, 3, + 1, 1, 0, 5, 3, 2, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 4, 3, 3, 5, 3, + 2, 2, 1, 3, 2, 2, 2, 3, 2, 0, + 3, 2, 3, 9, 9, 6, 5, 2, 2, 3, + 1, 0, 5, 3, 2, 6, 1, 2, 5, 2, + 5, 3 +}; + +static const short yydefact[] = { 63, + 47, 64, 0, 66, 0, 77, 78, 1, 2, 20, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 83, 81, 79, 80, 0, 0, + 82, 19, 0, 63, 100, 65, 84, 85, 100, 46, + 0, 39, 43, 38, 42, 37, 41, 36, 40, 0, + 0, 0, 0, 0, 0, 62, 78, 19, 0, 91, + 93, 0, 92, 0, 0, 47, 100, 96, 47, 76, + 95, 50, 51, 52, 53, 78, 19, 0, 0, 3, + 4, 48, 49, 0, 88, 90, 0, 73, 87, 0, + 75, 47, 0, 0, 0, 0, 97, 99, 0, 0, + 0, 0, 19, 94, 68, 71, 72, 0, 86, 98, + 102, 19, 0, 0, 44, 45, 0, 0, 0, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 0, 0, 0, 0, 0, 108, + 117, 19, 0, 59, 0, 89, 67, 69, 0, 74, + 101, 0, 103, 0, 19, 115, 19, 118, 120, 19, + 19, 19, 0, 55, 61, 0, 0, 70, 0, 0, + 109, 0, 0, 0, 0, 122, 114, 0, 0, 54, + 0, 58, 0, 0, 110, 112, 0, 0, 0, 57, + 0, 60, 0, 0, 111, 0, 119, 121, 113, 56, + 0, 0, 116, 0, 0, 0, 104, 0, 105, 0, + 0, 0, 0, 0, 107, 0, 106, 0, 0, 0 +}; + +static const short yydefgoto[] = { 31, + 82, 61, 59, 138, 139, 54, 55, 117, 5, 165, + 166, 1, 218, 2, 148, 106, 107, 108, 34, 35, + 36, 37, 38, 62, 39, 68, 69, 97, 206, 98, + 156, 196, 140, 141 +}; + +static const short yypact[] = {-32768, + 59, 295, -23,-32768, 435,-32768,-32768,-32768,-32768,-32768, +-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, +-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, 320, 209, +-32768, -21, -20,-32768, 38,-32768,-32768,-32768, 83,-32768, + 66,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, 77, + 295, 380, 234, 206, 122,-32768, 107, 29, 108,-32768, + 167, 6,-32768, 111, 145, 101,-32768,-32768, 45,-32768, +-32768,-32768,-32768,-32768, 167, 142, 44, 121, 81,-32768, +-32768,-32768,-32768, 295,-32768,-32768, 295, 295,-32768, 193, +-32768, 45, 405, 1, 264, 149,-32768,-32768, 295, 205, + 202, 204, 58, 167, 10, 203,-32768, 215,-32768,-32768, + 217, 7, 116, 116,-32768,-32768, 116, 295, 295,-32768, +-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, +-32768,-32768,-32768,-32768, 295, 295, 295, 295, 295,-32768, +-32768, 72, 28,-32768, 435,-32768,-32768,-32768, 295,-32768, +-32768, 219,-32768, 220, 7, 221, 7, -59, 141, 7, + 7, 7, 210,-32768,-32768, 110, 199,-32768, 249, 265, +-32768, 116, 222, 274, 275,-32768,-32768, 226, 43,-32768, + 435,-32768, 116, 116,-32768, 295, 116, 116, 116,-32768, + 115,-32768, 227, 233, 221, 228,-32768,-32768,-32768,-32768, + 297, 264,-32768, 116, 104, 5,-32768, 231,-32768, 104, + 299, 279, 116, 324,-32768, 116,-32768, 348, 349,-32768 +}; + +static const short yypgoto[] = {-32768, +-32768, -2, 350,-32768,-32768, -93, -92, -24, -62, -4, + -119, 316,-32768,-32768,-32768,-32768, 207,-32768,-32768,-32768, +-32768, -64, -89, 11,-32768, 312, 286, 263,-32768,-32768, + 172,-32768,-32768,-32768 +}; + + +#define YYLAST 497 + + +static const short yytable[] = { 32, + 56, 115, 116, 64, 174, -19, 96, 63, 113, 6, + 7, 8, 9, 42, 43, 44, 45, 46, 47, 48, + 49, 114, 151, 152, 153, 167, 58, 154, 25, 96, + 26, 147, 27, 28, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 65, 63, 50, 51, 75, 77, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 191, + 67, 50, 51, 79, 209, 171, 3, 173, 86, 87, + 176, 177, 178, 63, -19, 90, 63, 93, 94, 95, + 3, 103, 185, 4, 104, 105, 52, 164, 85, 53, + 112, 72, 73, 193, 194, 63, 142, 197, 198, 199, + 74, 52, 190, 100, 53, 67, 6, 7, 115, 116, + 63, 70, 115, 116, 207, 155, 157, 146, 6, 7, + 8, 9, 3, 215, 63, 83, 217, 26, 91, 27, + 28, 163, 158, 159, 160, 161, 162, 25, 63, 26, + 208, 27, 28, 102, 87, 212, 105, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 84, 26, 180, + 27, 28, 65, 181, 200, 88, 192, 205, 181, 118, + 119, 210, 101, 155, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 99, 29, 175, -19, 30, 63, 80, 81, + 89, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, -19, 26, 63, 27, 28, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 87, 26, 109, 27, + 28, 182, 181, 143, 144, 145, 149, 29, 179, 183, + 30, 60, 42, 43, 44, 45, 46, 47, 48, 49, + 150, -20, 169, 170, 172, 184, 186, 187, 188, 189, + 201, 202, 29, 203, 211, 30, 78, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 204, 26, 213, + 27, 28, 6, 57, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 214, 26, 216, 27, 28, 219, 220, 66, + 71, 33, 92, 29, 110, 168, 30, 195, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 29, 0, + 0, 30, 6, 76, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 0, 26, 0, 27, 28, 6, 7, 8, + 9, 111, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 0, 26, 0, + 27, 28, 0, 0, 0, 0, 0, 0, 29, 0, + 0, 30, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 0, 0, 50, 51, 0, 0, 0, 0, 0, + 0, 0, 0, 29, 0, 0, 30, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 52, 0, 0, 53 +}; + +static const short yycheck[] = { 2, + 5, 95, 95, 24, 64, 65, 69, 67, 8, 3, + 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, + 16, 21, 112, 113, 114, 145, 29, 117, 22, 92, + 24, 22, 26, 27, 58, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 65, 67, 19, 20, 51, 52, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 179, + 23, 19, 20, 53, 60, 155, 22, 157, 63, 64, + 160, 161, 162, 67, 65, 65, 67, 33, 34, 35, + 22, 84, 172, 25, 87, 88, 59, 60, 60, 62, + 93, 26, 27, 183, 184, 67, 99, 187, 188, 189, + 24, 59, 60, 60, 62, 23, 3, 4, 202, 202, + 67, 29, 206, 206, 204, 118, 119, 60, 3, 4, + 5, 6, 22, 213, 67, 4, 216, 24, 28, 26, + 27, 60, 135, 136, 137, 138, 139, 22, 67, 24, + 205, 26, 27, 63, 64, 210, 149, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 61, 24, 60, + 26, 27, 65, 64, 60, 65, 181, 202, 64, 31, + 32, 206, 62, 186, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 61, 59, 64, 65, 62, 67, 3, 4, + 66, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 65, 24, 67, 26, 27, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 64, 24, 66, 26, + 27, 63, 64, 59, 63, 62, 64, 59, 59, 21, + 62, 63, 9, 10, 11, 12, 13, 14, 15, 16, + 66, 65, 64, 64, 64, 21, 65, 14, 14, 64, + 64, 59, 59, 66, 64, 62, 63, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 21, 24, 21, + 26, 27, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 64, 24, 21, 26, 27, 0, 0, 34, + 39, 2, 67, 59, 92, 149, 62, 186, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 59, -1, + -1, 62, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, -1, 24, -1, 26, 27, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, -1, 24, -1, + 26, 27, -1, -1, -1, -1, -1, -1, 59, -1, + -1, 62, 8, 9, 10, 11, 12, 13, 14, 15, + 16, -1, -1, 19, 20, -1, -1, -1, -1, -1, + -1, -1, -1, 59, -1, -1, 62, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 59, -1, -1, 62 +}; +/* -*-C-*- Note some compilers choke on comments on `#line' lines. */ +#line 3 "/usr/dcs/software/supported/encap/bison-1.28/share/bison.simple" +/* This file comes from bison-1.28. */ + +/* Skeleton output parser for bison, + Copyright (C) 1984, 1989, 1990 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, when this file is copied by Bison into a + Bison output file, you may use that output file without restriction. + This special exception was added by the Free Software Foundation + in version 1.24 of Bison. */ + +/* This is the parser code that is written into each bison parser + when the %semantic_parser declaration is not specified in the grammar. + It was written by Richard Stallman by simplifying the hairy parser + used when %semantic_parser is specified. */ + +#ifndef YYSTACK_USE_ALLOCA +#ifdef alloca +#define YYSTACK_USE_ALLOCA +#else /* alloca not defined */ +#ifdef __GNUC__ +#define YYSTACK_USE_ALLOCA +#define alloca __builtin_alloca +#else /* not GNU C. */ +#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) || (defined (__sun) && defined (__i386)) +#define YYSTACK_USE_ALLOCA +#include +#else /* not sparc */ +/* We think this test detects Watcom and Microsoft C. */ +/* This used to test MSDOS, but that is a bad idea + since that symbol is in the user namespace. */ +#if (defined (_MSDOS) || defined (_MSDOS_)) && !defined (__TURBOC__) +#if 0 /* No need for malloc.h, which pollutes the namespace; + instead, just don't use alloca. */ +#include +#endif +#else /* not MSDOS, or __TURBOC__ */ +#if defined(_AIX) +/* I don't know what this was needed for, but it pollutes the namespace. + So I turned it off. rms, 2 May 1997. */ +/* #include */ + #pragma alloca +#define YYSTACK_USE_ALLOCA +#else /* not MSDOS, or __TURBOC__, or _AIX */ +#if 0 +#ifdef __hpux /* haible@ilog.fr says this works for HPUX 9.05 and up, + and on HPUX 10. Eventually we can turn this on. */ +#define YYSTACK_USE_ALLOCA +#define alloca __builtin_alloca +#endif /* __hpux */ +#endif +#endif /* not _AIX */ +#endif /* not MSDOS, or __TURBOC__ */ +#endif /* not sparc */ +#endif /* not GNU C */ +#endif /* alloca not defined */ +#endif /* YYSTACK_USE_ALLOCA not defined */ + +#ifdef YYSTACK_USE_ALLOCA +#define YYSTACK_ALLOC alloca +#else +#define YYSTACK_ALLOC malloc +#endif + +/* Note: there must be only one dollar sign in this file. + It is replaced by the list of actions, each action + as one case of the switch. */ + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY -2 +#define YYEOF 0 +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrlab1 +/* Like YYERROR except do call yyerror. + This remains here temporarily to ease the + transition to the new meaning of YYERROR, for GCC. + Once GCC version 2 has supplanted version 1, this can go. */ +#define YYFAIL goto yyerrlab +#define YYRECOVERING() (!!yyerrstatus) +#define YYBACKUP(token, value) \ +do \ + if (yychar == YYEMPTY && yylen == 1) \ + { yychar = (token), yylval = (value); \ + yychar1 = YYTRANSLATE (yychar); \ + YYPOPSTACK; \ + goto yybackup; \ + } \ + else \ + { yyerror ("syntax error: cannot back up"); YYERROR; } \ +while (0) + +#define YYTERROR 1 +#define YYERRCODE 256 + +#ifndef YYPURE +#define YYLEX yylex() +#endif + +#ifdef YYPURE +#ifdef YYLSP_NEEDED +#ifdef YYLEX_PARAM +#define YYLEX yylex(&yylval, &yylloc, YYLEX_PARAM) +#else +#define YYLEX yylex(&yylval, &yylloc) +#endif +#else /* not YYLSP_NEEDED */ +#ifdef YYLEX_PARAM +#define YYLEX yylex(&yylval, YYLEX_PARAM) +#else +#define YYLEX yylex(&yylval) +#endif +#endif /* not YYLSP_NEEDED */ +#endif + +/* If nonreentrant, generate the variables here */ + +#ifndef YYPURE + +int yychar; /* the lookahead symbol */ +YYSTYPE yylval; /* the semantic value of the */ + /* lookahead symbol */ + +#ifdef YYLSP_NEEDED +YYLTYPE yylloc; /* location data for the lookahead */ + /* symbol */ +#endif + +int yynerrs; /* number of parse errors so far */ +#endif /* not YYPURE */ + +#if YYDEBUG != 0 +int yydebug; /* nonzero means print parse trace */ +/* Since this is uninitialized, it does not stop multiple parsers + from coexisting. */ +#endif + +/* YYINITDEPTH indicates the initial size of the parser's stacks */ + +#ifndef YYINITDEPTH +#define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH is the maximum size the stacks can grow to + (effective only if the built-in stack extension method is used). */ + +#if YYMAXDEPTH == 0 +#undef YYMAXDEPTH +#endif + +#ifndef YYMAXDEPTH +#define YYMAXDEPTH 10000 +#endif + +/* Define __yy_memcpy. Note that the size argument + should be passed with type unsigned int, because that is what the non-GCC + definitions require. With GCC, __builtin_memcpy takes an arg + of type size_t, but it can handle unsigned int. */ + +#if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ +#define __yy_memcpy(TO,FROM,COUNT) __builtin_memcpy(TO,FROM,COUNT) +#else /* not GNU C or C++ */ +#ifndef __cplusplus + +/* This is the most reliable way to avoid incompatibilities + in available built-in functions on various systems. */ +static void +__yy_memcpy (to, from, count) + char *to; + char *from; + unsigned int count; +{ + register char *f = from; + register char *t = to; + register int i = count; + + while (i-- > 0) + *t++ = *f++; +} + +#else /* __cplusplus */ + +/* This is the most reliable way to avoid incompatibilities + in available built-in functions on various systems. */ +static void +__yy_memcpy (char *to, char *from, unsigned int count) +{ + register char *t = to; + register char *f = from; + register int i = count; + + while (i-- > 0) + *t++ = *f++; +} + +#endif +#endif + +#line 217 "/usr/dcs/software/supported/encap/bison-1.28/share/bison.simple" + +/* The user can define YYPARSE_PARAM as the name of an argument to be passed + into yyparse. The argument should have type void *. + It should actually point to an object. + Grammar actions can access the variable by casting it + to the proper pointer type. */ + +#ifdef YYPARSE_PARAM +#ifdef __cplusplus +#define YYPARSE_PARAM_ARG void *YYPARSE_PARAM +#define YYPARSE_PARAM_DECL +#else /* not __cplusplus */ +#define YYPARSE_PARAM_ARG YYPARSE_PARAM +#define YYPARSE_PARAM_DECL void *YYPARSE_PARAM; +#endif /* not __cplusplus */ +#else /* not YYPARSE_PARAM */ +#define YYPARSE_PARAM_ARG +#define YYPARSE_PARAM_DECL +#endif /* not YYPARSE_PARAM */ + +/* Prevent warning if -Wstrict-prototypes. */ +#ifdef __GNUC__ +#ifdef YYPARSE_PARAM +int yyparse (void *); +#else +int yyparse (void); +#endif +#endif + +int +yyparse(YYPARSE_PARAM_ARG) + YYPARSE_PARAM_DECL +{ + register int yystate; + register int yyn; + register short *yyssp; + register YYSTYPE *yyvsp; + int yyerrstatus; /* number of tokens to shift before error messages enabled */ + int yychar1 = 0; /* lookahead token as an internal (translated) token number */ + + short yyssa[YYINITDEPTH]; /* the state stack */ + YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ + + short *yyss = yyssa; /* refer to the stacks thru separate pointers */ + YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ + +#ifdef YYLSP_NEEDED + YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */ + YYLTYPE *yyls = yylsa; + YYLTYPE *yylsp; + +#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) +#else +#define YYPOPSTACK (yyvsp--, yyssp--) +#endif + + int yystacksize = YYINITDEPTH; + int yyfree_stacks = 0; + +#ifdef YYPURE + int yychar; + YYSTYPE yylval; + int yynerrs; +#ifdef YYLSP_NEEDED + YYLTYPE yylloc; +#endif +#endif + + YYSTYPE yyval; /* the variable used to return */ + /* semantic values from the action */ + /* routines */ + + int yylen; + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Starting parse\n"); +#endif + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + + /* Initialize stack pointers. + Waste one element of value and location stack + so that they stay on the same level as the state stack. + The wasted elements are never initialized. */ + + yyssp = yyss - 1; + yyvsp = yyvs; +#ifdef YYLSP_NEEDED + yylsp = yyls; +#endif + +/* Push a new state, which is found in yystate . */ +/* In all cases, when you get here, the value and location stacks + have just been pushed. so pushing a state here evens the stacks. */ +yynewstate: + + *++yyssp = yystate; + + if (yyssp >= yyss + yystacksize - 1) + { + /* Give user a chance to reallocate the stack */ + /* Use copies of these so that the &'s don't force the real ones into memory. */ + YYSTYPE *yyvs1 = yyvs; + short *yyss1 = yyss; +#ifdef YYLSP_NEEDED + YYLTYPE *yyls1 = yyls; +#endif + + /* Get the current used size of the three stacks, in elements. */ + int size = yyssp - yyss + 1; + +#ifdef yyoverflow + /* Each stack pointer address is followed by the size of + the data in use in that stack, in bytes. */ +#ifdef YYLSP_NEEDED + /* This used to be a conditional around just the two extra args, + but that might be undefined if yyoverflow is a macro. */ + yyoverflow("parser stack overflow", + &yyss1, size * sizeof (*yyssp), + &yyvs1, size * sizeof (*yyvsp), + &yyls1, size * sizeof (*yylsp), + &yystacksize); +#else + yyoverflow("parser stack overflow", + &yyss1, size * sizeof (*yyssp), + &yyvs1, size * sizeof (*yyvsp), + &yystacksize); +#endif + + yyss = yyss1; yyvs = yyvs1; +#ifdef YYLSP_NEEDED + yyls = yyls1; +#endif +#else /* no yyoverflow */ + /* Extend the stack our own way. */ + if (yystacksize >= YYMAXDEPTH) + { + yyerror("parser stack overflow"); + if (yyfree_stacks) + { + free (yyss); + free (yyvs); +#ifdef YYLSP_NEEDED + free (yyls); +#endif + } + return 2; + } + yystacksize *= 2; + if (yystacksize > YYMAXDEPTH) + yystacksize = YYMAXDEPTH; +#ifndef YYSTACK_USE_ALLOCA + yyfree_stacks = 1; +#endif + yyss = (short *) YYSTACK_ALLOC (yystacksize * sizeof (*yyssp)); + __yy_memcpy ((char *)yyss, (char *)yyss1, + size * (unsigned int) sizeof (*yyssp)); + yyvs = (YYSTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp)); + __yy_memcpy ((char *)yyvs, (char *)yyvs1, + size * (unsigned int) sizeof (*yyvsp)); +#ifdef YYLSP_NEEDED + yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp)); + __yy_memcpy ((char *)yyls, (char *)yyls1, + size * (unsigned int) sizeof (*yylsp)); +#endif +#endif /* no yyoverflow */ + + yyssp = yyss + size - 1; + yyvsp = yyvs + size - 1; +#ifdef YYLSP_NEEDED + yylsp = yyls + size - 1; +#endif + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Stack size increased to %d\n", yystacksize); +#endif + + if (yyssp >= yyss + yystacksize - 1) + YYABORT; + } + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Entering state %d\n", yystate); +#endif + + goto yybackup; + yybackup: + +/* Do appropriate processing given the current state. */ +/* Read a lookahead token if we need one and don't already have one. */ +/* yyresume: */ + + /* First try to decide what to do without reference to lookahead token. */ + + yyn = yypact[yystate]; + if (yyn == YYFLAG) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* yychar is either YYEMPTY or YYEOF + or a valid token in external form. */ + + if (yychar == YYEMPTY) + { +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Reading a token: "); +#endif + yychar = YYLEX; + } + + /* Convert token to internal form (in yychar1) for indexing tables with */ + + if (yychar <= 0) /* This means end of input. */ + { + yychar1 = 0; + yychar = YYEOF; /* Don't call YYLEX any more */ + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Now at end of input.\n"); +#endif + } + else + { + yychar1 = YYTRANSLATE(yychar); + +#if YYDEBUG != 0 + if (yydebug) + { + fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); + /* Give the individual parser a way to print the precise meaning + of a token, for further debugging info. */ +#ifdef YYPRINT + YYPRINT (stderr, yychar, yylval); +#endif + fprintf (stderr, ")\n"); + } +#endif + } + + yyn += yychar1; + if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) + goto yydefault; + + yyn = yytable[yyn]; + + /* yyn is what to do for this token type in this state. + Negative => reduce, -yyn is rule number. + Positive => shift, yyn is new state. + New state is final state => don't bother to shift, + just return success. + 0, or most negative number => error. */ + + if (yyn < 0) + { + if (yyn == YYFLAG) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + else if (yyn == 0) + goto yyerrlab; + + if (yyn == YYFINAL) + YYACCEPT; + + /* Shift the lookahead token. */ + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]); +#endif + + /* Discard the token being shifted unless it is eof. */ + if (yychar != YYEOF) + yychar = YYEMPTY; + + *++yyvsp = yylval; +#ifdef YYLSP_NEEDED + *++yylsp = yylloc; +#endif + + /* count tokens shifted since error; after three, turn off error status. */ + if (yyerrstatus) yyerrstatus--; + + yystate = yyn; + goto yynewstate; + +/* Do the default action for the current state. */ +yydefault: + + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + +/* Do a reduction. yyn is the number of a rule to reduce with. */ +yyreduce: + yylen = yyr2[yyn]; + if (yylen > 0) + yyval = yyvsp[1-yylen]; /* implement default value of the action */ + +#if YYDEBUG != 0 + if (yydebug) + { + int i; + + fprintf (stderr, "Reducing via rule %d (line %d), ", + yyn, yyrline[yyn]); + + /* Print the symbols being reduced, and their result. */ + for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) + fprintf (stderr, "%s ", yytname[yyrhs[i]]); + fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]); + } +#endif + + + switch (yyn) { + +case 2: +#line 434 "llvmAsmParser.y" +{ + if (yyvsp[0].UIntVal > (uint32_t)INT32_MAX) // Outside of my range! + ThrowException("Value too large for type!"); + yyval.SIntVal = (int32_t)yyvsp[0].UIntVal; +; + break;} +case 4: +#line 442 "llvmAsmParser.y" +{ + if (yyvsp[0].UInt64Val > (uint64_t)INT64_MAX) // Outside of my range! + ThrowException("Value too large for type!"); + yyval.SInt64Val = (int64_t)yyvsp[0].UInt64Val; +; + break;} +case 46: +#line 472 "llvmAsmParser.y" +{ + yyval.StrVal = yyvsp[-1].StrVal; + ; + break;} +case 47: +#line 475 "llvmAsmParser.y" +{ + yyval.StrVal = 0; + ; + break;} +case 48: +#line 479 "llvmAsmParser.y" +{ // integral constants + if (!ConstPoolSInt::isValueValidForType(yyvsp[-1].TypeVal, yyvsp[0].SInt64Val)) + ThrowException("Constant value doesn't fit in type!"); + yyval.ConstVal = new ConstPoolSInt(yyvsp[-1].TypeVal, yyvsp[0].SInt64Val); + ; + break;} +case 49: +#line 484 "llvmAsmParser.y" +{ // integral constants + if (!ConstPoolUInt::isValueValidForType(yyvsp[-1].TypeVal, yyvsp[0].UInt64Val)) + ThrowException("Constant value doesn't fit in type!"); + yyval.ConstVal = new ConstPoolUInt(yyvsp[-1].TypeVal, yyvsp[0].UInt64Val); + ; + break;} +case 50: +#line 489 "llvmAsmParser.y" +{ // Boolean constants + yyval.ConstVal = new ConstPoolBool(true); + ; + break;} +case 51: +#line 492 "llvmAsmParser.y" +{ // Boolean constants + yyval.ConstVal = new ConstPoolBool(false); + ; + break;} +case 52: +#line 495 "llvmAsmParser.y" +{ // String constants + cerr << "FIXME: TODO: String constants [sbyte] not implemented yet!\n"; + abort(); + //$$ = new ConstPoolString($2); + free(yyvsp[0].StrVal); + ; + break;} +case 53: +#line 501 "llvmAsmParser.y" +{ // Type constants + yyval.ConstVal = new ConstPoolType(yyvsp[0].TypeVal); + ; + break;} +case 54: +#line 504 "llvmAsmParser.y" +{ // Nonempty array constant + // Verify all elements are correct type! + const ArrayType *AT = ArrayType::getArrayType(yyvsp[-4].TypeVal); + for (unsigned i = 0; i < yyvsp[-1].ConstVector->size(); i++) { + if (yyvsp[-4].TypeVal != (*yyvsp[-1].ConstVector)[i]->getType()) + ThrowException("Element #" + utostr(i) + " is not of type '" + + yyvsp[-4].TypeVal->getName() + "' as required!\nIt is of type '" + + (*yyvsp[-1].ConstVector)[i]->getType()->getName() + "'."); + } + + yyval.ConstVal = new ConstPoolArray(AT, *yyvsp[-1].ConstVector); + delete yyvsp[-1].ConstVector; + ; + break;} +case 55: +#line 517 "llvmAsmParser.y" +{ // Empty array constant + vector Empty; + yyval.ConstVal = new ConstPoolArray(ArrayType::getArrayType(yyvsp[-3].TypeVal), Empty); + ; + break;} +case 56: +#line 521 "llvmAsmParser.y" +{ + // Verify all elements are correct type! + const ArrayType *AT = ArrayType::getArrayType(yyvsp[-4].TypeVal, (int)yyvsp[-6].UInt64Val); + if (yyvsp[-6].UInt64Val != yyvsp[-1].ConstVector->size()) + ThrowException("Type mismatch: constant sized array initialized with " + + utostr(yyvsp[-1].ConstVector->size()) + " arguments, but has size of " + + itostr((int)yyvsp[-6].UInt64Val) + "!"); + + for (unsigned i = 0; i < yyvsp[-1].ConstVector->size(); i++) { + if (yyvsp[-4].TypeVal != (*yyvsp[-1].ConstVector)[i]->getType()) + ThrowException("Element #" + utostr(i) + " is not of type '" + + yyvsp[-4].TypeVal->getName() + "' as required!\nIt is of type '" + + (*yyvsp[-1].ConstVector)[i]->getType()->getName() + "'."); + } + + yyval.ConstVal = new ConstPoolArray(AT, *yyvsp[-1].ConstVector); + delete yyvsp[-1].ConstVector; + ; + break;} +case 57: +#line 539 "llvmAsmParser.y" +{ + if (yyvsp[-5].UInt64Val != 0) + ThrowException("Type mismatch: constant sized array initialized with 0" + " arguments, but has size of " + itostr((int)yyvsp[-5].UInt64Val) + "!"); + vector Empty; + yyval.ConstVal = new ConstPoolArray(ArrayType::getArrayType(yyvsp[-3].TypeVal, 0), Empty); + ; + break;} +case 58: +#line 546 "llvmAsmParser.y" +{ + StructType::ElementTypes Types(yyvsp[-4].TypeList->begin(), yyvsp[-4].TypeList->end()); + delete yyvsp[-4].TypeList; + + const StructType *St = StructType::getStructType(Types); + yyval.ConstVal = new ConstPoolStruct(St, *yyvsp[-1].ConstVector); + delete yyvsp[-1].ConstVector; + ; + break;} +case 59: +#line 554 "llvmAsmParser.y" +{ + const StructType *St = + StructType::getStructType(StructType::ElementTypes()); + vector Empty; + yyval.ConstVal = new ConstPoolStruct(St, Empty); + ; + break;} +case 60: +#line 568 "llvmAsmParser.y" +{ + (yyval.ConstVector = yyvsp[-2].ConstVector)->push_back(addConstValToConstantPool(yyvsp[0].ConstVal)); + ; + break;} +case 61: +#line 571 "llvmAsmParser.y" +{ + yyval.ConstVector = new vector(); + yyval.ConstVector->push_back(addConstValToConstantPool(yyvsp[0].ConstVal)); + ; + break;} +case 62: +#line 577 "llvmAsmParser.y" +{ + if (yyvsp[-1].StrVal) { + yyvsp[0].ConstVal->setName(yyvsp[-1].StrVal); + free(yyvsp[-1].StrVal); + } + + addConstValToConstantPool(yyvsp[0].ConstVal); + ; + break;} +case 63: +#line 585 "llvmAsmParser.y" +{ + ; + break;} +case 64: +#line 596 "llvmAsmParser.y" +{ + yyval.ModuleVal = ParserResult = yyvsp[0].ModuleVal; + CurModule.ModuleDone(); +; + break;} +case 65: +#line 601 "llvmAsmParser.y" +{ + yyvsp[-1].ModuleVal->getMethodList().push_back(yyvsp[0].MethodVal); + CurMeth.MethodDone(); + yyval.ModuleVal = yyvsp[-1].ModuleVal; + ; + break;} +case 66: +#line 606 "llvmAsmParser.y" +{ + yyval.ModuleVal = CurModule.CurrentModule; + ; + break;} +case 68: +#line 615 "llvmAsmParser.y" +{ yyval.StrVal = 0; ; + break;} +case 69: +#line 617 "llvmAsmParser.y" +{ + yyval.MethArgVal = new MethodArgument(yyvsp[-1].TypeVal); + if (yyvsp[0].StrVal) { // Was the argument named? + yyval.MethArgVal->setName(yyvsp[0].StrVal); + free(yyvsp[0].StrVal); // The string was strdup'd, so free it now. + } +; + break;} +case 70: +#line 625 "llvmAsmParser.y" +{ + yyval.MethodArgList = yyvsp[0].MethodArgList; + yyvsp[0].MethodArgList->push_front(yyvsp[-2].MethArgVal); + ; + break;} +case 71: +#line 629 "llvmAsmParser.y" +{ + yyval.MethodArgList = new list(); + yyval.MethodArgList->push_front(yyvsp[0].MethArgVal); + ; + break;} +case 72: +#line 634 "llvmAsmParser.y" +{ + yyval.MethodArgList = yyvsp[0].MethodArgList; + ; + break;} +case 73: +#line 637 "llvmAsmParser.y" +{ + yyval.MethodArgList = 0; + ; + break;} +case 74: +#line 641 "llvmAsmParser.y" +{ + MethodType::ParamTypes ParamTypeList; + if (yyvsp[-1].MethodArgList) + for (list::iterator I = yyvsp[-1].MethodArgList->begin(); I != yyvsp[-1].MethodArgList->end(); I++) + ParamTypeList.push_back((*I)->getType()); + + const MethodType *MT = MethodType::getMethodType(yyvsp[-4].TypeVal, ParamTypeList); + + Method *M = new Method(MT, yyvsp[-3].StrVal); + free(yyvsp[-3].StrVal); // Free strdup'd memory! + + InsertValue(M, CurModule.Values); + + CurMeth.MethodStart(M); + + // Add all of the arguments we parsed to the method... + if (yyvsp[-1].MethodArgList) { // Is null if empty... + Method::ArgumentListType &ArgList = M->getArgumentList(); + + for (list::iterator I = yyvsp[-1].MethodArgList->begin(); I != yyvsp[-1].MethodArgList->end(); I++) { + InsertValue(*I); + ArgList.push_back(*I); + } + delete yyvsp[-1].MethodArgList; // We're now done with the argument list + } +; + break;} +case 75: +#line 668 "llvmAsmParser.y" +{ + yyval.MethodVal = CurMeth.CurrentMethod; +; + break;} +case 76: +#line 672 "llvmAsmParser.y" +{ + yyval.MethodVal = yyvsp[-1].MethodVal; +; + break;} +case 77: +#line 681 "llvmAsmParser.y" +{ // A reference to a direct constant + yyval.ValIDVal = ValID::create(yyvsp[0].SInt64Val); + ; + break;} +case 78: +#line 684 "llvmAsmParser.y" +{ + yyval.ValIDVal = ValID::create(yyvsp[0].UInt64Val); + ; + break;} +case 79: +#line 687 "llvmAsmParser.y" +{ + yyval.ValIDVal = ValID::create((int64_t)1); + ; + break;} +case 80: +#line 690 "llvmAsmParser.y" +{ + yyval.ValIDVal = ValID::create((int64_t)0); + ; + break;} +case 81: +#line 693 "llvmAsmParser.y" +{ // Quoted strings work too... especially for methods + yyval.ValIDVal = ValID::create_conststr(yyvsp[0].StrVal); + ; + break;} +case 82: +#line 698 "llvmAsmParser.y" +{ // Is it an integer reference...? + yyval.ValIDVal = ValID::create(yyvsp[0].SIntVal); + ; + break;} +case 83: +#line 701 "llvmAsmParser.y" +{ // It must be a named reference then... + yyval.ValIDVal = ValID::create(yyvsp[0].StrVal); + ; + break;} +case 84: +#line 704 "llvmAsmParser.y" +{ + yyval.ValIDVal = yyvsp[0].ValIDVal; + ; + break;} +case 85: +#line 711 "llvmAsmParser.y" +{ + Value *D = getVal(Type::TypeTy, yyvsp[0].ValIDVal, true); + if (D == 0) ThrowException("Invalid user defined type: " + yyvsp[0].ValIDVal.getName()); + assert (D->getValueType() == Value::ConstantVal && + "Internal error! User defined type not in const pool!"); + ConstPoolType *CPT = (ConstPoolType*)D; + yyval.TypeVal = CPT->getValue(); + ; + break;} +case 86: +#line 719 "llvmAsmParser.y" +{ // Method derived type? + MethodType::ParamTypes Params(yyvsp[-1].TypeList->begin(), yyvsp[-1].TypeList->end()); + delete yyvsp[-1].TypeList; + yyval.TypeVal = MethodType::getMethodType(yyvsp[-3].TypeVal, Params); + ; + break;} +case 87: +#line 724 "llvmAsmParser.y" +{ // Method derived type? + MethodType::ParamTypes Params; // Empty list + yyval.TypeVal = MethodType::getMethodType(yyvsp[-2].TypeVal, Params); + ; + break;} +case 88: +#line 728 "llvmAsmParser.y" +{ + yyval.TypeVal = ArrayType::getArrayType(yyvsp[-1].TypeVal); + ; + break;} +case 89: +#line 731 "llvmAsmParser.y" +{ + yyval.TypeVal = ArrayType::getArrayType(yyvsp[-1].TypeVal, (int)yyvsp[-3].UInt64Val); + ; + break;} +case 90: +#line 734 "llvmAsmParser.y" +{ + StructType::ElementTypes Elements(yyvsp[-1].TypeList->begin(), yyvsp[-1].TypeList->end()); + delete yyvsp[-1].TypeList; + yyval.TypeVal = StructType::getStructType(Elements); + ; + break;} +case 91: +#line 739 "llvmAsmParser.y" +{ + yyval.TypeVal = StructType::getStructType(StructType::ElementTypes()); + ; + break;} +case 92: +#line 742 "llvmAsmParser.y" +{ + yyval.TypeVal = PointerType::getPointerType(yyvsp[-1].TypeVal); + ; + break;} +case 93: +#line 747 "llvmAsmParser.y" +{ + yyval.TypeList = new list(); + yyval.TypeList->push_back(yyvsp[0].TypeVal); + ; + break;} +case 94: +#line 751 "llvmAsmParser.y" +{ + (yyval.TypeList=yyvsp[-2].TypeList)->push_back(yyvsp[0].TypeVal); + ; + break;} +case 95: +#line 756 "llvmAsmParser.y" +{ + yyvsp[-1].MethodVal->getBasicBlocks().push_back(yyvsp[0].BasicBlockVal); + yyval.MethodVal = yyvsp[-1].MethodVal; + ; + break;} +case 96: +#line 760 "llvmAsmParser.y" +{ // Do not allow methods with 0 basic blocks + yyval.MethodVal = yyvsp[-1].MethodVal; // in them... + yyvsp[-1].MethodVal->getBasicBlocks().push_back(yyvsp[0].BasicBlockVal); + ; + break;} +case 97: +#line 769 "llvmAsmParser.y" +{ + yyvsp[-1].BasicBlockVal->getInstList().push_back(yyvsp[0].TermInstVal); + InsertValue(yyvsp[-1].BasicBlockVal); + yyval.BasicBlockVal = yyvsp[-1].BasicBlockVal; + ; + break;} +case 98: +#line 774 "llvmAsmParser.y" +{ + yyvsp[-1].BasicBlockVal->getInstList().push_back(yyvsp[0].TermInstVal); + yyvsp[-1].BasicBlockVal->setName(yyvsp[-2].StrVal); + free(yyvsp[-2].StrVal); // Free the strdup'd memory... + + InsertValue(yyvsp[-1].BasicBlockVal); + yyval.BasicBlockVal = yyvsp[-1].BasicBlockVal; + ; + break;} +case 99: +#line 783 "llvmAsmParser.y" +{ + yyvsp[-1].BasicBlockVal->getInstList().push_back(yyvsp[0].InstVal); + yyval.BasicBlockVal = yyvsp[-1].BasicBlockVal; + ; + break;} +case 100: +#line 787 "llvmAsmParser.y" +{ + yyval.BasicBlockVal = new BasicBlock(); + ; + break;} +case 101: +#line 791 "llvmAsmParser.y" +{ // Return with a result... + yyval.TermInstVal = new ReturnInst(getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal)); + ; + break;} +case 102: +#line 794 "llvmAsmParser.y" +{ // Return with no result... + yyval.TermInstVal = new ReturnInst(); + ; + break;} +case 103: +#line 797 "llvmAsmParser.y" +{ // Unconditional Branch... + yyval.TermInstVal = new BranchInst((BasicBlock*)getVal(Type::LabelTy, yyvsp[0].ValIDVal)); + ; + break;} +case 104: +#line 800 "llvmAsmParser.y" +{ + yyval.TermInstVal = new BranchInst((BasicBlock*)getVal(Type::LabelTy, yyvsp[-3].ValIDVal), + (BasicBlock*)getVal(Type::LabelTy, yyvsp[0].ValIDVal), + getVal(Type::BoolTy, yyvsp[-6].ValIDVal)); + ; + break;} +case 105: +#line 805 "llvmAsmParser.y" +{ + SwitchInst *S = new SwitchInst(getVal(yyvsp[-7].TypeVal, yyvsp[-6].ValIDVal), + (BasicBlock*)getVal(Type::LabelTy, yyvsp[-3].ValIDVal)); + yyval.TermInstVal = S; + + list >::iterator I = yyvsp[-1].JumpTable->begin(), + end = yyvsp[-1].JumpTable->end(); + for (; I != end; I++) + S->dest_push_back(I->first, I->second); + ; + break;} +case 106: +#line 816 "llvmAsmParser.y" +{ + yyval.JumpTable = yyvsp[-5].JumpTable; + ConstPoolVal *V = (ConstPoolVal*)getVal(yyvsp[-4].TypeVal, yyvsp[-3].ValIDVal, true); + if (V == 0) + ThrowException("May only switch on a constant pool value!"); + + yyval.JumpTable->push_back(make_pair(V, (BasicBlock*)getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal))); + ; + break;} +case 107: +#line 824 "llvmAsmParser.y" +{ + yyval.JumpTable = new list >(); + ConstPoolVal *V = (ConstPoolVal*)getVal(yyvsp[-4].TypeVal, yyvsp[-3].ValIDVal, true); + + if (V == 0) + ThrowException("May only switch on a constant pool value!"); + + yyval.JumpTable->push_back(make_pair(V, (BasicBlock*)getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal))); + ; + break;} +case 108: +#line 834 "llvmAsmParser.y" +{ + if (yyvsp[-1].StrVal) // Is this definition named?? + yyvsp[0].InstVal->setName(yyvsp[-1].StrVal); // if so, assign the name... + + InsertValue(yyvsp[0].InstVal); + yyval.InstVal = yyvsp[0].InstVal; +; + break;} +case 109: +#line 842 "llvmAsmParser.y" +{ // Used for PHI nodes and call statements... + yyval.ValueList = new list(); + yyval.ValueList->push_back(getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal)); + ; + break;} +case 110: +#line 846 "llvmAsmParser.y" +{ + yyval.ValueList = yyvsp[-2].ValueList; + yyvsp[-2].ValueList->push_back(getVal(yyvsp[-2].ValueList->front()->getType(), yyvsp[0].ValIDVal)); + ; + break;} +case 112: +#line 852 "llvmAsmParser.y" +{ yyval.ValueList = 0; ; + break;} +case 113: +#line 854 "llvmAsmParser.y" +{ + yyval.InstVal = Instruction::getBinaryOperator(yyvsp[-4].BinaryOpVal, getVal(yyvsp[-3].TypeVal, yyvsp[-2].ValIDVal), getVal(yyvsp[-3].TypeVal, yyvsp[0].ValIDVal)); + if (yyval.InstVal == 0) + ThrowException("binary operator returned null!"); + ; + break;} +case 114: +#line 859 "llvmAsmParser.y" +{ + yyval.InstVal = Instruction::getUnaryOperator(yyvsp[-2].UnaryOpVal, getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal)); + if (yyval.InstVal == 0) + ThrowException("unary operator returned null!"); + ; + break;} +case 115: +#line 864 "llvmAsmParser.y" +{ + yyval.InstVal = new PHINode(yyvsp[0].ValueList->front()->getType()); + while (yyvsp[0].ValueList->begin() != yyvsp[0].ValueList->end()) { + // TODO: Ensure all types are the same... + ((PHINode*)yyval.InstVal)->addIncoming(yyvsp[0].ValueList->front()); + yyvsp[0].ValueList->pop_front(); + } + delete yyvsp[0].ValueList; // Free the list... + ; + break;} +case 116: +#line 873 "llvmAsmParser.y" +{ + if (!yyvsp[-4].TypeVal->isMethodType()) + ThrowException("Can only call methods: invalid type '" + + yyvsp[-4].TypeVal->getName() + "'!"); + + const MethodType *Ty = (const MethodType*)yyvsp[-4].TypeVal; + + Value *V = getVal(Ty, yyvsp[-3].ValIDVal); + if (V->getValueType() != Value::MethodVal || V->getType() != Ty) + ThrowException("Cannot call: " + yyvsp[-3].ValIDVal.getName() + "!"); + + // Create or access a new type that corresponds to the function call... + vector Params; + + if (yyvsp[-1].ValueList) { + // Pull out just the arguments... + Params.insert(Params.begin(), yyvsp[-1].ValueList->begin(), yyvsp[-1].ValueList->end()); + delete yyvsp[-1].ValueList; + + // Loop through MethodType's arguments and ensure they are specified + // correctly! + // + MethodType::ParamTypes::const_iterator I = Ty->getParamTypes().begin(); + unsigned i; + for (i = 0; i < Params.size() && I != Ty->getParamTypes().end(); ++i,++I){ + if (Params[i]->getType() != *I) + ThrowException("Parameter " + utostr(i) + " is not of type '" + + (*I)->getName() + "'!"); + } + + if (i != Params.size() || I != Ty->getParamTypes().end()) + ThrowException("Invalid number of parameters detected!"); + } + + // Create the call node... + yyval.InstVal = new CallInst((Method*)V, Params); + ; + break;} +case 117: +#line 910 "llvmAsmParser.y" +{ + yyval.InstVal = yyvsp[0].InstVal; + ; + break;} +case 118: +#line 914 "llvmAsmParser.y" +{ + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType(yyvsp[0].TypeVal)); + TyVal = addConstValToConstantPool(TyVal); + yyval.InstVal = new MallocInst((ConstPoolType*)TyVal); + ; + break;} +case 119: +#line 919 "llvmAsmParser.y" +{ + if (!yyvsp[-3].TypeVal->isArrayType() || ((const ArrayType*)yyvsp[-3].TypeVal)->isSized()) + ThrowException("Trying to allocate " + yyvsp[-3].TypeVal->getName() + + " as unsized array!"); + + Value *ArrSize = getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal); + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType(yyvsp[-3].TypeVal)); + TyVal = addConstValToConstantPool(TyVal); + yyval.InstVal = new MallocInst((ConstPoolType*)TyVal, ArrSize); + ; + break;} +case 120: +#line 929 "llvmAsmParser.y" +{ + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType(yyvsp[0].TypeVal)); + TyVal = addConstValToConstantPool(TyVal); + yyval.InstVal = new AllocaInst((ConstPoolType*)TyVal); + ; + break;} +case 121: +#line 934 "llvmAsmParser.y" +{ + if (!yyvsp[-3].TypeVal->isArrayType() || ((const ArrayType*)yyvsp[-3].TypeVal)->isSized()) + ThrowException("Trying to allocate " + yyvsp[-3].TypeVal->getName() + + " as unsized array!"); + + Value *ArrSize = getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal); + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType(yyvsp[-3].TypeVal)); + TyVal = addConstValToConstantPool(TyVal); + yyval.InstVal = new AllocaInst((ConstPoolType*)TyVal, ArrSize); + ; + break;} +case 122: +#line 944 "llvmAsmParser.y" +{ + if (!yyvsp[-1].TypeVal->isPointerType()) + ThrowException("Trying to free nonpointer type " + yyvsp[-1].TypeVal->getName() + "!"); + yyval.InstVal = new FreeInst(getVal(yyvsp[-1].TypeVal, yyvsp[0].ValIDVal)); + ; + break;} +} + /* the action file gets copied in in place of this dollarsign */ +#line 543 "/usr/dcs/software/supported/encap/bison-1.28/share/bison.simple" + + yyvsp -= yylen; + yyssp -= yylen; +#ifdef YYLSP_NEEDED + yylsp -= yylen; +#endif + +#if YYDEBUG != 0 + if (yydebug) + { + short *ssp1 = yyss - 1; + fprintf (stderr, "state stack now"); + while (ssp1 != yyssp) + fprintf (stderr, " %d", *++ssp1); + fprintf (stderr, "\n"); + } +#endif + + *++yyvsp = yyval; + +#ifdef YYLSP_NEEDED + yylsp++; + if (yylen == 0) + { + yylsp->first_line = yylloc.first_line; + yylsp->first_column = yylloc.first_column; + yylsp->last_line = (yylsp-1)->last_line; + yylsp->last_column = (yylsp-1)->last_column; + yylsp->text = 0; + } + else + { + yylsp->last_line = (yylsp+yylen-1)->last_line; + yylsp->last_column = (yylsp+yylen-1)->last_column; + } +#endif + + /* Now "shift" the result of the reduction. + Determine what state that goes to, + based on the state we popped back to + and the rule number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTBASE] + *yyssp; + if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTBASE]; + + goto yynewstate; + +yyerrlab: /* here on detecting error */ + + if (! yyerrstatus) + /* If not already recovering from an error, report this error. */ + { + ++yynerrs; + +#ifdef YYERROR_VERBOSE + yyn = yypact[yystate]; + + if (yyn > YYFLAG && yyn < YYLAST) + { + int size = 0; + char *msg; + int x, count; + + count = 0; + /* Start X at -yyn if nec to avoid negative indexes in yycheck. */ + for (x = (yyn < 0 ? -yyn : 0); + x < (sizeof(yytname) / sizeof(char *)); x++) + if (yycheck[x + yyn] == x) + size += strlen(yytname[x]) + 15, count++; + msg = (char *) malloc(size + 15); + if (msg != 0) + { + strcpy(msg, "parse error"); + + if (count < 5) + { + count = 0; + for (x = (yyn < 0 ? -yyn : 0); + x < (sizeof(yytname) / sizeof(char *)); x++) + if (yycheck[x + yyn] == x) + { + strcat(msg, count == 0 ? ", expecting `" : " or `"); + strcat(msg, yytname[x]); + strcat(msg, "'"); + count++; + } + } + yyerror(msg); + free(msg); + } + else + yyerror ("parse error; also virtual memory exceeded"); + } + else +#endif /* YYERROR_VERBOSE */ + yyerror("parse error"); + } + + goto yyerrlab1; +yyerrlab1: /* here on error raised explicitly by an action */ + + if (yyerrstatus == 3) + { + /* if just tried and failed to reuse lookahead token after an error, discard it. */ + + /* return failure if at end of input */ + if (yychar == YYEOF) + YYABORT; + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); +#endif + + yychar = YYEMPTY; + } + + /* Else will try to reuse lookahead token + after shifting the error token. */ + + yyerrstatus = 3; /* Each real token shifted decrements this */ + + goto yyerrhandle; + +yyerrdefault: /* current state does not do anything special for the error token. */ + +#if 0 + /* This is wrong; only states that explicitly want error tokens + should shift them. */ + yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ + if (yyn) goto yydefault; +#endif + +yyerrpop: /* pop the current state because it cannot handle the error token */ + + if (yyssp == yyss) YYABORT; + yyvsp--; + yystate = *--yyssp; +#ifdef YYLSP_NEEDED + yylsp--; +#endif + +#if YYDEBUG != 0 + if (yydebug) + { + short *ssp1 = yyss - 1; + fprintf (stderr, "Error: state stack now"); + while (ssp1 != yyssp) + fprintf (stderr, " %d", *++ssp1); + fprintf (stderr, "\n"); + } +#endif + +yyerrhandle: + + yyn = yypact[yystate]; + if (yyn == YYFLAG) + goto yyerrdefault; + + yyn += YYTERROR; + if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) + goto yyerrdefault; + + yyn = yytable[yyn]; + if (yyn < 0) + { + if (yyn == YYFLAG) + goto yyerrpop; + yyn = -yyn; + goto yyreduce; + } + else if (yyn == 0) + goto yyerrpop; + + if (yyn == YYFINAL) + YYACCEPT; + +#if YYDEBUG != 0 + if (yydebug) + fprintf(stderr, "Shifting error token, "); +#endif + + *++yyvsp = yylval; +#ifdef YYLSP_NEEDED + *++yylsp = yylloc; +#endif + + yystate = yyn; + goto yynewstate; + + yyacceptlab: + /* YYACCEPT comes here. */ + if (yyfree_stacks) + { + free (yyss); + free (yyvs); +#ifdef YYLSP_NEEDED + free (yyls); +#endif + } + return 0; + + yyabortlab: + /* YYABORT comes here. */ + if (yyfree_stacks) + { + free (yyss); + free (yyvs); +#ifdef YYLSP_NEEDED + free (yyls); +#endif + } + return 1; +} +#line 950 "llvmAsmParser.y" + +int yyerror(char *ErrorMsg) { + ThrowException(string("Parse error: ") + ErrorMsg); + return 0; +} diff --git a/lib/AsmParser/llvmAsmParser.h b/lib/AsmParser/llvmAsmParser.h new file mode 100644 index 00000000000..64e460f994c --- /dev/null +++ b/lib/AsmParser/llvmAsmParser.h @@ -0,0 +1,87 @@ +typedef union { + Module *ModuleVal; + Method *MethodVal; + MethodArgument *MethArgVal; + BasicBlock *BasicBlockVal; + TerminatorInst *TermInstVal; + Instruction *InstVal; + ConstPoolVal *ConstVal; + const Type *TypeVal; + + list *MethodArgList; + list *ValueList; + list *TypeList; + list > *JumpTable; + vector *ConstVector; + + int64_t SInt64Val; + uint64_t UInt64Val; + int SIntVal; + unsigned UIntVal; + + char *StrVal; // This memory is allocated by strdup! + ValID ValIDVal; // May contain memory allocated by strdup + + Instruction::UnaryOps UnaryOpVal; + Instruction::BinaryOps BinaryOpVal; + Instruction::TermOps TermOpVal; + Instruction::MemoryOps MemOpVal; +} YYSTYPE; +#define ESINT64VAL 257 +#define EUINT64VAL 258 +#define SINTVAL 259 +#define UINTVAL 260 +#define VOID 261 +#define BOOL 262 +#define SBYTE 263 +#define UBYTE 264 +#define SHORT 265 +#define USHORT 266 +#define INT 267 +#define UINT 268 +#define LONG 269 +#define ULONG 270 +#define FLOAT 271 +#define DOUBLE 272 +#define STRING 273 +#define TYPE 274 +#define LABEL 275 +#define VAR_ID 276 +#define LABELSTR 277 +#define STRINGCONSTANT 278 +#define IMPLEMENTATION 279 +#define TRUE 280 +#define FALSE 281 +#define BEGINTOK 282 +#define END 283 +#define DECLARE 284 +#define PHI 285 +#define CALL 286 +#define RET 287 +#define BR 288 +#define SWITCH 289 +#define NEG 290 +#define NOT 291 +#define TOINT 292 +#define TOUINT 293 +#define ADD 294 +#define SUB 295 +#define MUL 296 +#define DIV 297 +#define REM 298 +#define SETLE 299 +#define SETGE 300 +#define SETLT 301 +#define SETGT 302 +#define SETEQ 303 +#define SETNE 304 +#define MALLOC 305 +#define ALLOCA 306 +#define FREE 307 +#define LOAD 308 +#define STORE 309 +#define GETFIELD 310 +#define PUTFIELD 311 + + +extern YYSTYPE llvmAsmlval; diff --git a/lib/AsmParser/llvmAsmParser.y b/lib/AsmParser/llvmAsmParser.y new file mode 100644 index 00000000000..01709ca5deb --- /dev/null +++ b/lib/AsmParser/llvmAsmParser.y @@ -0,0 +1,954 @@ +//===-- llvmAsmParser.y - Parser for llvm assembly files ---------*- C++ -*--=// +// +// This file implements the bison parser for LLVM assembly languages files. +// +//===------------------------------------------------------------------------=// + +// +// TODO: Parse comments and add them to an internal node... so that they may +// be saved in the bytecode format as well as everything else. Very important +// for a general IR format. +// + +%{ +#include "ParserInternals.h" +#include "llvm/BasicBlock.h" +#include "llvm/Method.h" +#include "llvm/SymbolTable.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Parser.h" +#include "llvm/ConstantPool.h" +#include "llvm/iTerminators.h" +#include "llvm/iMemory.h" +#include +#include // Get definition of pair class +#include // This embarasment is due to our flex lexer... + +int yyerror(char *ErrorMsg); // Forward declarations to prevent "implicit +int yylex(); // declaration" of xxx warnings. +int yyparse(); + +static Module *ParserResult; +const ToolCommandLine *CurOptions = 0; + +// This contains info used when building the body of a method. It is destroyed +// when the method is completed. +// +typedef vector ValueList; // Numbered defs +static void ResolveDefinitions(vector &LateResolvers); + +static struct PerModuleInfo { + Module *CurrentModule; + vector Values; // Module level numbered definitions + vector LateResolveValues; + + void ModuleDone() { + // If we could not resolve some blocks at parsing time (forward branches) + // resolve the branches now... + ResolveDefinitions(LateResolveValues); + + Values.clear(); // Clear out method local definitions + CurrentModule = 0; + } +} CurModule; + +static struct PerMethodInfo { + Method *CurrentMethod; // Pointer to current method being created + + vector Values; // Keep track of numbered definitions + vector LateResolveValues; + + inline PerMethodInfo() { + CurrentMethod = 0; + } + + inline ~PerMethodInfo() {} + + inline void MethodStart(Method *M) { + CurrentMethod = M; + } + + void MethodDone() { + // If we could not resolve some blocks at parsing time (forward branches) + // resolve the branches now... + ResolveDefinitions(LateResolveValues); + + Values.clear(); // Clear out method local definitions + CurrentMethod = 0; + } +} CurMeth; // Info for the current method... + + +//===----------------------------------------------------------------------===// +// Code to handle definitions of all the types +//===----------------------------------------------------------------------===// + +static void InsertValue(Value *D, vector &ValueTab = CurMeth.Values) { + if (!D->hasName()) { // Is this a numbered definition? + unsigned type = D->getType()->getUniqueID(); + if (ValueTab.size() <= type) + ValueTab.resize(type+1, ValueList()); + //printf("Values[%d][%d] = %d\n", type, ValueTab[type].size(), D); + ValueTab[type].push_back(D); + } +} + +static Value *getVal(const Type *Type, ValID &D, + bool DoNotImprovise = false) { + switch (D.Type) { + case 0: { // Is it a numbered definition? + unsigned type = Type->getUniqueID(); + unsigned Num = (unsigned)D.Num; + + // Module constants occupy the lowest numbered slots... + if (type < CurModule.Values.size()) { + if (Num < CurModule.Values[type].size()) + return CurModule.Values[type][Num]; + + Num -= CurModule.Values[type].size(); + } + + // Make sure that our type is within bounds + if (CurMeth.Values.size() <= type) + break; + + // Check that the number is within bounds... + if (CurMeth.Values[type].size() <= Num) + break; + + return CurMeth.Values[type][Num]; + } + case 1: { // Is it a named definition? + string Name(D.Name); + SymbolTable *SymTab = 0; + if (CurMeth.CurrentMethod) + SymTab = CurMeth.CurrentMethod->getSymbolTable(); + Value *N = SymTab ? SymTab->lookup(Type, Name) : 0; + + if (N == 0) { + SymTab = CurModule.CurrentModule->getSymbolTable(); + if (SymTab) + N = SymTab->lookup(Type, Name); + if (N == 0) break; + } + + D.destroy(); // Free old strdup'd memory... + return N; + } + + case 2: // Is it a constant pool reference?? + case 3: // Is it an unsigned const pool reference? + case 4:{ // Is it a string const pool reference? + ConstPoolVal *CPV = 0; + + // Check to make sure that "Type" is an integral type, and that our + // value will fit into the specified type... + switch (D.Type) { + case 2: + if (Type == Type::BoolTy) { // Special handling for boolean data + CPV = new ConstPoolBool(D.ConstPool64 != 0); + } else { + if (!ConstPoolSInt::isValueValidForType(Type, D.ConstPool64)) + ThrowException("Symbolic constant pool reference is invalid!"); + CPV = new ConstPoolSInt(Type, D.ConstPool64); + } + break; + case 3: + if (!ConstPoolUInt::isValueValidForType(Type, D.UConstPool64)) { + if (!ConstPoolSInt::isValueValidForType(Type, D.ConstPool64)) { + ThrowException("Symbolic constant pool reference is invalid!"); + } else { // This is really a signed reference. Transmogrify. + CPV = new ConstPoolSInt(Type, D.ConstPool64); + } + } else { + CPV = new ConstPoolUInt(Type, D.UConstPool64); + } + break; + case 4: + cerr << "FIXME: TODO: String constants [sbyte] not implemented yet!\n"; + abort(); + //CPV = new ConstPoolString(D.Name); + D.destroy(); // Free the string memory + break; + } + assert(CPV && "How did we escape creating a constant??"); + + // Scan through the constant table and see if we already have loaded this + // constant. + // + ConstantPool &CP = CurMeth.CurrentMethod ? + CurMeth.CurrentMethod->getConstantPool() : + CurModule.CurrentModule->getConstantPool(); + ConstPoolVal *C = CP.find(CPV); // Already have this constant? + if (C) { + delete CPV; // Didn't need this after all, oh well. + return C; // Yup, we already have one, recycle it! + } + CP.insert(CPV); + + // Success, everything is kosher. Lets go! + return CPV; + } // End of case 2,3,4 + } // End of switch + + + // If we reached here, we referenced either a symbol that we don't know about + // or an id number that hasn't been read yet. We may be referencing something + // forward, so just create an entry to be resolved later and get to it... + // + if (DoNotImprovise) return 0; // Do we just want a null to be returned? + + // TODO: Attempt to coallecse nodes that are the same with previous ones. + Value *d = 0; + switch (Type->getPrimitiveID()) { + case Type::LabelTyID: d = new BBPlaceHolder(Type, D); break; + case Type::MethodTyID: + d = new MethPlaceHolder(Type, D); + InsertValue(d, CurModule.LateResolveValues); + return d; +//case Type::ClassTyID: d = new ClassPlaceHolder(Type, D); break; + default: d = new DefPlaceHolder(Type, D); break; + } + + assert(d != 0 && "How did we not make something?"); + InsertValue(d, CurMeth.LateResolveValues); + return d; +} + + +//===----------------------------------------------------------------------===// +// Code to handle forward references in instructions +//===----------------------------------------------------------------------===// +// +// This code handles the late binding needed with statements that reference +// values not defined yet... for example, a forward branch, or the PHI node for +// a loop body. +// +// This keeps a table (CurMeth.LateResolveValues) of all such forward references +// and back patchs after we are done. +// + +// ResolveDefinitions - If we could not resolve some defs at parsing +// time (forward branches, phi functions for loops, etc...) resolve the +// defs now... +// +static void ResolveDefinitions(vector &LateResolvers) { + // Loop over LateResolveDefs fixing up stuff that couldn't be resolved + for (unsigned ty = 0; ty < LateResolvers.size(); ty++) { + while (!LateResolvers[ty].empty()) { + Value *V = LateResolvers[ty].back(); + LateResolvers[ty].pop_back(); + ValID &DID = getValIDFromPlaceHolder(V); + + Value *TheRealValue = getVal(Type::getUniqueIDType(ty), DID, true); + + if (TheRealValue == 0 && DID.Type == 1) + ThrowException("Reference to an invalid definition: '" +DID.getName() + + "' of type '" + V->getType()->getName() + "'"); + else if (TheRealValue == 0) + ThrowException("Reference to an invalid definition: #" +itostr(DID.Num)+ + " of type '" + V->getType()->getName() + "'"); + + V->replaceAllUsesWith(TheRealValue); + assert(V->use_empty()); + delete V; + } + } + + LateResolvers.clear(); +} + +// addConstValToConstantPool - This code is used to insert a constant into the +// current constant pool. This is designed to make maximal (but not more than +// possible) reuse (merging) of constants in the constant pool. This means that +// multiple references to %4, for example will all get merged. +// +static ConstPoolVal *addConstValToConstantPool(ConstPoolVal *C) { + vector &ValTab = CurMeth.CurrentMethod ? + CurMeth.Values : CurModule.Values; + ConstantPool &CP = CurMeth.CurrentMethod ? + CurMeth.CurrentMethod->getConstantPool() : + CurModule.CurrentModule->getConstantPool(); + + if (ConstPoolVal *CPV = CP.find(C)) { + // Constant already in constant pool. Try to merge the two constants + if (CPV->hasName() && !C->hasName()) { + // Merge the two values, we inherit the existing CPV's name. + // InsertValue requires that the value have no name to insert correctly + // (because we want to fill the slot this constant would have filled) + // + string Name = CPV->getName(); + CPV->setName(""); + InsertValue(CPV, ValTab); + CPV->setName(Name); + delete C; + return CPV; + } else if (!CPV->hasName() && C->hasName()) { + // If we have a name on this value and there isn't one in the const + // pool val already, propogate it. + // + CPV->setName(C->getName()); + delete C; // Sorry, you're toast + return CPV; + } else if (CPV->hasName() && C->hasName()) { + // Both values have distinct names. We cannot merge them. + CP.insert(C); + InsertValue(C, ValTab); + return C; + } else if (!CPV->hasName() && !C->hasName()) { + // Neither value has a name, trivially merge them. + InsertValue(CPV, ValTab); + delete C; + return CPV; + } + + assert(0 && "Not reached!"); + return 0; + } else { // No duplication of value. + CP.insert(C); + InsertValue(C, ValTab); + return C; + } +} + +//===----------------------------------------------------------------------===// +// RunVMAsmParser - Define an interface to this parser +//===----------------------------------------------------------------------===// +// +Module *RunVMAsmParser(const ToolCommandLine &Opts, FILE *F) { + llvmAsmin = F; + CurOptions = &Opts; + llvmAsmlineno = 1; // Reset the current line number... + + CurModule.CurrentModule = new Module(); // Allocate a new module to read + yyparse(); // Parse the file. + Module *Result = ParserResult; + CurOptions = 0; + llvmAsmin = stdin; // F is about to go away, don't use it anymore... + ParserResult = 0; + + return Result; +} + +%} + +%union { + Module *ModuleVal; + Method *MethodVal; + MethodArgument *MethArgVal; + BasicBlock *BasicBlockVal; + TerminatorInst *TermInstVal; + Instruction *InstVal; + ConstPoolVal *ConstVal; + const Type *TypeVal; + + list *MethodArgList; + list *ValueList; + list *TypeList; + list > *JumpTable; + vector *ConstVector; + + int64_t SInt64Val; + uint64_t UInt64Val; + int SIntVal; + unsigned UIntVal; + + char *StrVal; // This memory is allocated by strdup! + ValID ValIDVal; // May contain memory allocated by strdup + + Instruction::UnaryOps UnaryOpVal; + Instruction::BinaryOps BinaryOpVal; + Instruction::TermOps TermOpVal; + Instruction::MemoryOps MemOpVal; +} + +%type Module MethodList +%type Method MethodHeader BasicBlockList +%type BasicBlock InstructionList +%type BBTerminatorInst +%type Inst InstVal MemoryInst +%type ConstVal +%type ConstVector +%type ArgList ArgListH +%type ArgVal +%type ValueRefList ValueRefListE +%type TypeList +%type JumpTable + +%type ValueRef ConstValueRef // Reference to a definition or BB + +// Tokens and types for handling constant integer values +// +// ESINT64VAL - A negative number within long long range +%token ESINT64VAL + +// EUINT64VAL - A positive number within uns. long long range +%token EUINT64VAL +%type EINT64VAL + +%token SINTVAL // Signed 32 bit ints... +%token UINTVAL // Unsigned 32 bit ints... +%type INTVAL + +// Built in types... +%type Types TypesV SIntType UIntType IntType +%token VOID BOOL SBYTE UBYTE SHORT USHORT INT UINT LONG ULONG +%token FLOAT DOUBLE STRING TYPE LABEL + +%token VAR_ID LABELSTR STRINGCONSTANT +%type OptVAR_ID OptAssign + + +%token IMPLEMENTATION TRUE FALSE BEGINTOK END DECLARE +%token PHI CALL + +// Basic Block Terminating Operators +%token RET BR SWITCH + +// Unary Operators +%type UnaryOps // all the unary operators +%token NEG NOT + +// Unary Conversion Operators +%token TOINT TOUINT + +// Binary Operators +%type BinaryOps // all the binary operators +%token ADD SUB MUL DIV REM + +// Binary Comarators +%token SETLE SETGE SETLT SETGT SETEQ SETNE + +// Memory Instructions +%token MALLOC ALLOCA FREE LOAD STORE GETFIELD PUTFIELD + +%start Module +%% + +// Handle constant integer size restriction and conversion... +// + +INTVAL : SINTVAL +INTVAL : UINTVAL { + if ($1 > (uint32_t)INT32_MAX) // Outside of my range! + ThrowException("Value too large for type!"); + $$ = (int32_t)$1; +} + + +EINT64VAL : ESINT64VAL // These have same type and can't cause problems... +EINT64VAL : EUINT64VAL { + if ($1 > (uint64_t)INT64_MAX) // Outside of my range! + ThrowException("Value too large for type!"); + $$ = (int64_t)$1; +} + +// Types includes all predefined types... except void, because you can't do +// anything with it except for certain specific things... +// +// User defined types are added latter... +// +Types : BOOL | SBYTE | UBYTE | SHORT | USHORT | INT | UINT +Types : LONG | ULONG | FLOAT | DOUBLE | STRING | TYPE | LABEL + +// TypesV includes all of 'Types', but it also includes the void type. +TypesV : Types | VOID + +// Operations that are notably excluded from this list include: +// RET, BR, & SWITCH because they end basic blocks and are treated specially. +// +UnaryOps : NEG | NOT | TOINT | TOUINT +BinaryOps : ADD | SUB | MUL | DIV | REM +BinaryOps : SETLE | SETGE | SETLT | SETGT | SETEQ | SETNE + +// Valueine some types that allow classification if we only want a particular +// thing... +SIntType : LONG | INT | SHORT | SBYTE +UIntType : ULONG | UINT | USHORT | UBYTE +IntType : SIntType | UIntType + +OptAssign : VAR_ID '=' { + $$ = $1; + } + | /*empty*/ { + $$ = 0; + } + +ConstVal : SIntType EINT64VAL { // integral constants + if (!ConstPoolSInt::isValueValidForType($1, $2)) + ThrowException("Constant value doesn't fit in type!"); + $$ = new ConstPoolSInt($1, $2); + } + | UIntType EUINT64VAL { // integral constants + if (!ConstPoolUInt::isValueValidForType($1, $2)) + ThrowException("Constant value doesn't fit in type!"); + $$ = new ConstPoolUInt($1, $2); + } + | BOOL TRUE { // Boolean constants + $$ = new ConstPoolBool(true); + } + | BOOL FALSE { // Boolean constants + $$ = new ConstPoolBool(false); + } + | STRING STRINGCONSTANT { // String constants + cerr << "FIXME: TODO: String constants [sbyte] not implemented yet!\n"; + abort(); + //$$ = new ConstPoolString($2); + free($2); + } + | TYPE Types { // Type constants + $$ = new ConstPoolType($2); + } + | '[' Types ']' '[' ConstVector ']' { // Nonempty array constant + // Verify all elements are correct type! + const ArrayType *AT = ArrayType::getArrayType($2); + for (unsigned i = 0; i < $5->size(); i++) { + if ($2 != (*$5)[i]->getType()) + ThrowException("Element #" + utostr(i) + " is not of type '" + + $2->getName() + "' as required!\nIt is of type '" + + (*$5)[i]->getType()->getName() + "'."); + } + + $$ = new ConstPoolArray(AT, *$5); + delete $5; + } + | '[' Types ']' '[' ']' { // Empty array constant + vector Empty; + $$ = new ConstPoolArray(ArrayType::getArrayType($2), Empty); + } + | '[' EUINT64VAL 'x' Types ']' '[' ConstVector ']' { + // Verify all elements are correct type! + const ArrayType *AT = ArrayType::getArrayType($4, (int)$2); + if ($2 != $7->size()) + ThrowException("Type mismatch: constant sized array initialized with " + + utostr($7->size()) + " arguments, but has size of " + + itostr((int)$2) + "!"); + + for (unsigned i = 0; i < $7->size(); i++) { + if ($4 != (*$7)[i]->getType()) + ThrowException("Element #" + utostr(i) + " is not of type '" + + $4->getName() + "' as required!\nIt is of type '" + + (*$7)[i]->getType()->getName() + "'."); + } + + $$ = new ConstPoolArray(AT, *$7); + delete $7; + } + | '[' EUINT64VAL 'x' Types ']' '[' ']' { + if ($2 != 0) + ThrowException("Type mismatch: constant sized array initialized with 0" + " arguments, but has size of " + itostr((int)$2) + "!"); + vector Empty; + $$ = new ConstPoolArray(ArrayType::getArrayType($4, 0), Empty); + } + | '{' TypeList '}' '{' ConstVector '}' { + StructType::ElementTypes Types($2->begin(), $2->end()); + delete $2; + + const StructType *St = StructType::getStructType(Types); + $$ = new ConstPoolStruct(St, *$5); + delete $5; + } + | '{' '}' '{' '}' { + const StructType *St = + StructType::getStructType(StructType::ElementTypes()); + vector Empty; + $$ = new ConstPoolStruct(St, Empty); + } +/* + | Types '*' ConstVal { + assert(0); + $$ = 0; + } +*/ + + +ConstVector : ConstVector ',' ConstVal { + ($$ = $1)->push_back(addConstValToConstantPool($3)); + } + | ConstVal { + $$ = new vector(); + $$->push_back(addConstValToConstantPool($1)); + } + + +ConstPool : ConstPool OptAssign ConstVal { + if ($2) { + $3->setName($2); + free($2); + } + + addConstValToConstantPool($3); + } + | /* empty: end of list */ { + } + + +//===----------------------------------------------------------------------===// +// Rules to match Modules +//===----------------------------------------------------------------------===// + +// Module rule: Capture the result of parsing the whole file into a result +// variable... +// +Module : MethodList { + $$ = ParserResult = $1; + CurModule.ModuleDone(); +} + +MethodList : MethodList Method { + $1->getMethodList().push_back($2); + CurMeth.MethodDone(); + $$ = $1; + } + | ConstPool IMPLEMENTATION { + $$ = CurModule.CurrentModule; + } + + +//===----------------------------------------------------------------------===// +// Rules to match Method Headers +//===----------------------------------------------------------------------===// + +OptVAR_ID : VAR_ID | /*empty*/ { $$ = 0; } + +ArgVal : Types OptVAR_ID { + $$ = new MethodArgument($1); + if ($2) { // Was the argument named? + $$->setName($2); + free($2); // The string was strdup'd, so free it now. + } +} + +ArgListH : ArgVal ',' ArgListH { + $$ = $3; + $3->push_front($1); + } + | ArgVal { + $$ = new list(); + $$->push_front($1); + } + +ArgList : ArgListH { + $$ = $1; + } + | /* empty */ { + $$ = 0; + } + +MethodHeaderH : TypesV STRINGCONSTANT '(' ArgList ')' { + MethodType::ParamTypes ParamTypeList; + if ($4) + for (list::iterator I = $4->begin(); I != $4->end(); I++) + ParamTypeList.push_back((*I)->getType()); + + const MethodType *MT = MethodType::getMethodType($1, ParamTypeList); + + Method *M = new Method(MT, $2); + free($2); // Free strdup'd memory! + + InsertValue(M, CurModule.Values); + + CurMeth.MethodStart(M); + + // Add all of the arguments we parsed to the method... + if ($4) { // Is null if empty... + Method::ArgumentListType &ArgList = M->getArgumentList(); + + for (list::iterator I = $4->begin(); I != $4->end(); I++) { + InsertValue(*I); + ArgList.push_back(*I); + } + delete $4; // We're now done with the argument list + } +} + +MethodHeader : MethodHeaderH ConstPool BEGINTOK { + $$ = CurMeth.CurrentMethod; +} + +Method : BasicBlockList END { + $$ = $1; +} + + +//===----------------------------------------------------------------------===// +// Rules to match Basic Blocks +//===----------------------------------------------------------------------===// + +ConstValueRef : ESINT64VAL { // A reference to a direct constant + $$ = ValID::create($1); + } + | EUINT64VAL { + $$ = ValID::create($1); + } + | TRUE { + $$ = ValID::create((int64_t)1); + } + | FALSE { + $$ = ValID::create((int64_t)0); + } + | STRINGCONSTANT { // Quoted strings work too... especially for methods + $$ = ValID::create_conststr($1); + } + +// ValueRef - A reference to a definition... +ValueRef : INTVAL { // Is it an integer reference...? + $$ = ValID::create($1); + } + | VAR_ID { // It must be a named reference then... + $$ = ValID::create($1); + } + | ConstValueRef { + $$ = $1; + } + +// The user may refer to a user defined type by its typeplane... check for this +// now... +// +Types : ValueRef { + Value *D = getVal(Type::TypeTy, $1, true); + if (D == 0) ThrowException("Invalid user defined type: " + $1.getName()); + assert (D->getValueType() == Value::ConstantVal && + "Internal error! User defined type not in const pool!"); + ConstPoolType *CPT = (ConstPoolType*)D; + $$ = CPT->getValue(); + } + | TypesV '(' TypeList ')' { // Method derived type? + MethodType::ParamTypes Params($3->begin(), $3->end()); + delete $3; + $$ = MethodType::getMethodType($1, Params); + } + | TypesV '(' ')' { // Method derived type? + MethodType::ParamTypes Params; // Empty list + $$ = MethodType::getMethodType($1, Params); + } + | '[' Types ']' { + $$ = ArrayType::getArrayType($2); + } + | '[' EUINT64VAL 'x' Types ']' { + $$ = ArrayType::getArrayType($4, (int)$2); + } + | '{' TypeList '}' { + StructType::ElementTypes Elements($2->begin(), $2->end()); + delete $2; + $$ = StructType::getStructType(Elements); + } + | '{' '}' { + $$ = StructType::getStructType(StructType::ElementTypes()); + } + | Types '*' { + $$ = PointerType::getPointerType($1); + } + + +TypeList : Types { + $$ = new list(); + $$->push_back($1); + } + | TypeList ',' Types { + ($$=$1)->push_back($3); + } + + +BasicBlockList : BasicBlockList BasicBlock { + $1->getBasicBlocks().push_back($2); + $$ = $1; + } + | MethodHeader BasicBlock { // Do not allow methods with 0 basic blocks + $$ = $1; // in them... + $1->getBasicBlocks().push_back($2); + } + + +// Basic blocks are terminated by branching instructions: +// br, br/cc, switch, ret +// +BasicBlock : InstructionList BBTerminatorInst { + $1->getInstList().push_back($2); + InsertValue($1); + $$ = $1; + } + | LABELSTR InstructionList BBTerminatorInst { + $2->getInstList().push_back($3); + $2->setName($1); + free($1); // Free the strdup'd memory... + + InsertValue($2); + $$ = $2; + } + +InstructionList : InstructionList Inst { + $1->getInstList().push_back($2); + $$ = $1; + } + | /* empty */ { + $$ = new BasicBlock(); + } + +BBTerminatorInst : RET Types ValueRef { // Return with a result... + $$ = new ReturnInst(getVal($2, $3)); + } + | RET VOID { // Return with no result... + $$ = new ReturnInst(); + } + | BR LABEL ValueRef { // Unconditional Branch... + $$ = new BranchInst((BasicBlock*)getVal(Type::LabelTy, $3)); + } // Conditional Branch... + | BR BOOL ValueRef ',' LABEL ValueRef ',' LABEL ValueRef { + $$ = new BranchInst((BasicBlock*)getVal(Type::LabelTy, $6), + (BasicBlock*)getVal(Type::LabelTy, $9), + getVal(Type::BoolTy, $3)); + } + | SWITCH IntType ValueRef ',' LABEL ValueRef '[' JumpTable ']' { + SwitchInst *S = new SwitchInst(getVal($2, $3), + (BasicBlock*)getVal(Type::LabelTy, $6)); + $$ = S; + + list >::iterator I = $8->begin(), + end = $8->end(); + for (; I != end; I++) + S->dest_push_back(I->first, I->second); + } + +JumpTable : JumpTable IntType ConstValueRef ',' LABEL ValueRef { + $$ = $1; + ConstPoolVal *V = (ConstPoolVal*)getVal($2, $3, true); + if (V == 0) + ThrowException("May only switch on a constant pool value!"); + + $$->push_back(make_pair(V, (BasicBlock*)getVal($5, $6))); + } + | IntType ConstValueRef ',' LABEL ValueRef { + $$ = new list >(); + ConstPoolVal *V = (ConstPoolVal*)getVal($1, $2, true); + + if (V == 0) + ThrowException("May only switch on a constant pool value!"); + + $$->push_back(make_pair(V, (BasicBlock*)getVal($4, $5))); + } + +Inst : OptAssign InstVal { + if ($1) // Is this definition named?? + $2->setName($1); // if so, assign the name... + + InsertValue($2); + $$ = $2; +} + +ValueRefList : Types ValueRef { // Used for PHI nodes and call statements... + $$ = new list(); + $$->push_back(getVal($1, $2)); + } + | ValueRefList ',' ValueRef { + $$ = $1; + $1->push_back(getVal($1->front()->getType(), $3)); + } + +// ValueRefListE - Just like ValueRefList, except that it may also be empty! +ValueRefListE : ValueRefList | /*empty*/ { $$ = 0; } + +InstVal : BinaryOps Types ValueRef ',' ValueRef { + $$ = Instruction::getBinaryOperator($1, getVal($2, $3), getVal($2, $5)); + if ($$ == 0) + ThrowException("binary operator returned null!"); + } + | UnaryOps Types ValueRef { + $$ = Instruction::getUnaryOperator($1, getVal($2, $3)); + if ($$ == 0) + ThrowException("unary operator returned null!"); + } + | PHI ValueRefList { + $$ = new PHINode($2->front()->getType()); + while ($2->begin() != $2->end()) { + // TODO: Ensure all types are the same... + ((PHINode*)$$)->addIncoming($2->front()); + $2->pop_front(); + } + delete $2; // Free the list... + } + | CALL Types ValueRef '(' ValueRefListE ')' { + if (!$2->isMethodType()) + ThrowException("Can only call methods: invalid type '" + + $2->getName() + "'!"); + + const MethodType *Ty = (const MethodType*)$2; + + Value *V = getVal(Ty, $3); + if (V->getValueType() != Value::MethodVal || V->getType() != Ty) + ThrowException("Cannot call: " + $3.getName() + "!"); + + // Create or access a new type that corresponds to the function call... + vector Params; + + if ($5) { + // Pull out just the arguments... + Params.insert(Params.begin(), $5->begin(), $5->end()); + delete $5; + + // Loop through MethodType's arguments and ensure they are specified + // correctly! + // + MethodType::ParamTypes::const_iterator I = Ty->getParamTypes().begin(); + unsigned i; + for (i = 0; i < Params.size() && I != Ty->getParamTypes().end(); ++i,++I){ + if (Params[i]->getType() != *I) + ThrowException("Parameter " + utostr(i) + " is not of type '" + + (*I)->getName() + "'!"); + } + + if (i != Params.size() || I != Ty->getParamTypes().end()) + ThrowException("Invalid number of parameters detected!"); + } + + // Create the call node... + $$ = new CallInst((Method*)V, Params); + } + | MemoryInst { + $$ = $1; + } + +MemoryInst : MALLOC Types { + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2)); + TyVal = addConstValToConstantPool(TyVal); + $$ = new MallocInst((ConstPoolType*)TyVal); + } + | MALLOC Types ',' UINT ValueRef { + if (!$2->isArrayType() || ((const ArrayType*)$2)->isSized()) + ThrowException("Trying to allocate " + $2->getName() + + " as unsized array!"); + + Value *ArrSize = getVal($4, $5); + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2)); + TyVal = addConstValToConstantPool(TyVal); + $$ = new MallocInst((ConstPoolType*)TyVal, ArrSize); + } + | ALLOCA Types { + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2)); + TyVal = addConstValToConstantPool(TyVal); + $$ = new AllocaInst((ConstPoolType*)TyVal); + } + | ALLOCA Types ',' UINT ValueRef { + if (!$2->isArrayType() || ((const ArrayType*)$2)->isSized()) + ThrowException("Trying to allocate " + $2->getName() + + " as unsized array!"); + + Value *ArrSize = getVal($4, $5); + ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2)); + TyVal = addConstValToConstantPool(TyVal); + $$ = new AllocaInst((ConstPoolType*)TyVal, ArrSize); + } + | FREE Types ValueRef { + if (!$2->isPointerType()) + ThrowException("Trying to free nonpointer type " + $2->getName() + "!"); + $$ = new FreeInst(getVal($2, $3)); + } + +%% +int yyerror(char *ErrorMsg) { + ThrowException(string("Parse error: ") + ErrorMsg); + return 0; +} diff --git a/lib/Bytecode/Makefile b/lib/Bytecode/Makefile new file mode 100644 index 00000000000..75d4f7cd10c --- /dev/null +++ b/lib/Bytecode/Makefile @@ -0,0 +1,5 @@ +LEVEL = ../.. +DIRS = Reader Writer + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp new file mode 100644 index 00000000000..b85bd887ef8 --- /dev/null +++ b/lib/Bytecode/Reader/ConstantReader.cpp @@ -0,0 +1,218 @@ +//===- ReadConst.cpp - Code to constants and constant pools -----------------=== +// +// This file implements functionality to deserialize constants and entire +// constant pools. +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +//===------------------------------------------------------------------------=== + +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/DerivedTypes.h" +#include "ReaderInternals.h" + +bool BytecodeParser::parseTypeConstant(const uchar *&Buf, const uchar *EndBuf, + ConstPoolVal *&V) { + const Type *Val = 0; + + unsigned PrimType; + if (read_vbr(Buf, EndBuf, PrimType)) return true; + + if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) { + V = new ConstPoolType(Val); // It's just a primitive ID. + return false; + } + + switch (PrimType) { + case Type::MethodTyID: { + unsigned Typ; + if (read_vbr(Buf, EndBuf, Typ)) return true; + const Type *RetType = getType(Typ); + if (RetType == 0) return true; + + MethodType::ParamTypes Params; + + if (read_vbr(Buf, EndBuf, Typ)) return true; + while (Typ) { + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + Params.push_back(Ty); + + if (read_vbr(Buf, EndBuf, Typ)) return true; + } + + Val = MethodType::getMethodType(RetType, Params); + break; + } + case Type::ArrayTyID: { + unsigned ElTyp; + if (read_vbr(Buf, EndBuf, ElTyp)) return true; + const Type *ElementType = getType(ElTyp); + if (ElementType == 0) return true; + + int NumElements; + if (read_vbr(Buf, EndBuf, NumElements)) return true; + Val = ArrayType::getArrayType(ElementType, NumElements); + break; + } + case Type::StructTyID: { + unsigned Typ; + StructType::ElementTypes Elements; + + if (read_vbr(Buf, EndBuf, Typ)) return true; + while (Typ) { // List is terminated by void/0 typeid + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + Elements.push_back(Ty); + + if (read_vbr(Buf, EndBuf, Typ)) return true; + } + + Val = StructType::getStructType(Elements); + break; + } + case Type::PointerTyID: { + unsigned ElTyp; + if (read_vbr(Buf, EndBuf, ElTyp)) return true; + const Type *ElementType = getType(ElTyp); + if (ElementType == 0) return true; + Val = PointerType::getPointerType(ElementType); + break; + } + + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to deserialize" + << " primitive Type " << PrimType << "\n"; + return true; + } + + V = new ConstPoolType(Val); + return false; +} + +bool BytecodeParser::parseConstPoolValue(const uchar *&Buf, + const uchar *EndBuf, + const Type *Ty, ConstPoolVal *&V) { + switch (Ty->getPrimitiveID()) { + case Type::BoolTyID: { + unsigned Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (Val != 0 && Val != 1) return true; + V = new ConstPoolBool(Val == 1); + break; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (!ConstPoolUInt::isValueValidForType(Ty, Val)) return true; + V = new ConstPoolUInt(Ty, Val); + break; + } + + case Type::ULongTyID: { + uint64_t Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + V = new ConstPoolUInt(Ty, Val); + break; + } + + case Type::SByteTyID: // Unsigned integer types... + case Type::ShortTyID: + case Type::IntTyID: { + int Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (!ConstPoolSInt::isValueValidForType(Ty, Val)) return 0; + V = new ConstPoolSInt(Ty, Val); + break; + } + + case Type::LongTyID: { + int64_t Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + V = new ConstPoolSInt(Ty, Val); + break; + } + + case Type::TypeTyID: + if (parseTypeConstant(Buf, EndBuf, V)) return true; + break; + + case Type::ArrayTyID: { + const ArrayType *AT = (const ArrayType*)Ty; + unsigned NumElements; + if (AT->isSized()) // Sized array, # elements stored in type! + NumElements = (unsigned)AT->getNumElements(); + else // Unsized array, # elements stored in stream! + if (read_vbr(Buf, EndBuf, NumElements)) return true; + + vector Elements; + while (NumElements--) { // Read all of the elements of the constant. + unsigned Slot; + if (read_vbr(Buf, EndBuf, Slot)) return true; + Value *V = getValue(AT->getElementType(), Slot, false); + if (!V || V->getValueType() != Value::ConstantVal) + return true; + Elements.push_back((ConstPoolVal*)V); + } + V = new ConstPoolArray(AT, Elements); + break; + } + + case Type::StructTyID: { + const StructType *ST = (const StructType*)Ty; + const StructType::ElementTypes &ET = ST->getElementTypes(); + + vector Elements; + for (unsigned i = 0; i < ET.size(); ++i) { + unsigned Slot; + if (read_vbr(Buf, EndBuf, Slot)) return true; + Value *V = getValue(ET[i], Slot, false); + if (!V || V->getValueType() != Value::ConstantVal) + return true; + Elements.push_back((ConstPoolVal*)V); + } + + V = new ConstPoolStruct(ST, Elements); + break; + } + + default: + cerr << __FILE__ << ":" << __LINE__ + << ": Don't know how to deserialize constant value of type '" + << Ty->getName() << "'\n"; + return true; + } + return false; +} + +bool BytecodeParser::ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, + SymTabValue::ConstantPoolType &CP, + ValueTable &Tab) { + while (Buf < EndBuf) { + unsigned NumEntries, Typ; + + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + ConstPoolVal *I; + if (parseConstPoolValue(Buf, EndBuf, Ty, I)) return true; +#if 0 + cerr << " Read const value: <" << I->getType()->getName() + << ">: " << I->getStrValue() << endl; +#endif + insertValue(I, Tab); + CP.insert(I); + } + } + + return Buf > EndBuf; +} diff --git a/lib/Bytecode/Reader/InstructionReader.cpp b/lib/Bytecode/Reader/InstructionReader.cpp new file mode 100644 index 00000000000..667e144673c --- /dev/null +++ b/lib/Bytecode/Reader/InstructionReader.cpp @@ -0,0 +1,213 @@ +//===- ReadInst.cpp - Code to read an instruction from bytecode -------------=== +// +// This file defines the mechanism to read an instruction from a bytecode +// stream. +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Change from getValue(Raw.Arg1) etc, to getArg(Raw, 1) +// Make it check type, so that casts are checked. +// +//===------------------------------------------------------------------------=== + +#include "llvm/iOther.h" +#include "llvm/iTerminators.h" +#include "llvm/iMemory.h" +#include "llvm/DerivedTypes.h" +#include "ReaderInternals.h" + +bool BytecodeParser::ParseRawInst(const uchar *&Buf, const uchar *EndBuf, + RawInst &Result) { + unsigned Op, Typ; + if (read(Buf, EndBuf, Op)) return true; + + Result.NumOperands = Op >> 30; + Result.Opcode = (Op >> 24) & 63; + + switch (Result.NumOperands) { + case 1: + Result.Ty = getType((Op >> 12) & 4095); + Result.Arg1 = Op & 4095; + if (Result.Arg1 == 4095) // Handle special encoding for 0 operands... + Result.NumOperands = 0; + break; + case 2: + Result.Ty = getType((Op >> 16) & 255); + Result.Arg1 = (Op >> 8 ) & 255; + Result.Arg2 = (Op >> 0 ) & 255; + break; + case 3: + Result.Ty = getType((Op >> 18) & 63); + Result.Arg1 = (Op >> 12) & 63; + Result.Arg2 = (Op >> 6 ) & 63; + Result.Arg3 = (Op >> 0 ) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + if (read_vbr(Buf, EndBuf, Result.Opcode)) return true; + if (read_vbr(Buf, EndBuf, Typ)) return true; + Result.Ty = getType(Typ); + if (read_vbr(Buf, EndBuf, Result.NumOperands)) return true; + + switch (Result.NumOperands) { + case 0: + cerr << "Zero Arg instr found!\n"; + return true; // This encoding is invalid! + case 1: + if (read_vbr(Buf, EndBuf, Result.Arg1)) return true; + break; + case 2: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2)) return true; + break; + case 3: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2) || + read_vbr(Buf, EndBuf, Result.Arg3)) return true; + break; + default: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2)) return true; + + // Allocate a vector to hold arguments 3, 4, 5, 6 ... + Result.VarArgs = new vector(Result.NumOperands-2); + for (unsigned a = 0; a < Result.NumOperands-2; a++) + if (read_vbr(Buf, EndBuf, (*Result.VarArgs)[a])) return true; + break; + } + if (align32(Buf, EndBuf)) return true; + break; + } + + //cerr << "NO: " << Result.NumOperands << " opcode: " << Result.Opcode + // << " Ty: " << Result.Ty->getName() << " arg1: " << Result.Arg1 << endl; + return false; +} + + +bool BytecodeParser::ParseInstruction(const uchar *&Buf, const uchar *EndBuf, + Instruction *&Res) { + RawInst Raw; + if (ParseRawInst(Buf, EndBuf, Raw)) return true;; + + if (Raw.Opcode >= Instruction::FirstUnaryOp && + Raw.Opcode < Instruction::NumUnaryOps && Raw.NumOperands == 1) { + Res = Instruction::getUnaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1)); + return false; + } else if (Raw.Opcode >= Instruction::FirstBinaryOp && + Raw.Opcode < Instruction::NumBinaryOps && Raw.NumOperands == 2) { + Res = Instruction::getBinaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1), + getValue(Raw.Ty, Raw.Arg2)); + return false; + } else if (Raw.Opcode == Instruction::PHINode) { + PHINode *PN = new PHINode(Raw.Ty); + switch (Raw.NumOperands) { + case 0: cerr << "Invalid phi node encountered!\n"; + delete PN; + return true; + case 1: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); break; + case 2: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); break; + case 3: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg3)); break; + default: + PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); + { + vector &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i++) + PN->addIncoming(getValue(Raw.Ty, args[i])); + } + delete Raw.VarArgs; + } + Res = PN; + return false; + } else if (Raw.Opcode == Instruction::Ret) { + if (Raw.NumOperands == 0) { + Res = new ReturnInst(); return false; + } else if (Raw.NumOperands == 1) { + Res = new ReturnInst(getValue(Raw.Ty, Raw.Arg1)); return false; + } + } else if (Raw.Opcode == Instruction::Br) { + if (Raw.NumOperands == 1) { + Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1)); + return false; + } else if (Raw.NumOperands == 3) { + Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1), + (BasicBlock*)getValue(Type::LabelTy, Raw.Arg2), + getValue(Type::BoolTy , Raw.Arg3)); + return false; + } + } else if (Raw.Opcode == Instruction::Switch) { + SwitchInst *I = + new SwitchInst(getValue(Raw.Ty, Raw.Arg1), + (BasicBlock*)getValue(Type::LabelTy, Raw.Arg2)); + Res = I; + if (Raw.NumOperands < 3) return false; // No destinations? Wierd. + + if (Raw.NumOperands == 3 || Raw.VarArgs->size() & 1) { + cerr << "Switch statement with odd number of arguments!\n"; + delete I; + return true; + } + + vector &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i += 2) + I->dest_push_back((ConstPoolVal*)getValue(Raw.Ty, args[i]), + (BasicBlock*)getValue(Type::LabelTy, args[i+1])); + + delete Raw.VarArgs; + return false; + } else if (Raw.Opcode == Instruction::Call) { + Method *M = (Method*)getValue(Raw.Ty, Raw.Arg1); + if (M == 0) return true; + + const MethodType::ParamTypes &PL = M->getMethodType()->getParamTypes(); + MethodType::ParamTypes::const_iterator It = PL.begin(); + + vector Params; + switch (Raw.NumOperands) { + case 0: cerr << "Invalid call instruction encountered!\n"; + return true; + case 1: break; + case 2: Params.push_back(getValue(*It++, Raw.Arg2)); break; + case 3: Params.push_back(getValue(*It++, Raw.Arg2)); + if (It == PL.end()) return true; + Params.push_back(getValue(*It++, Raw.Arg3)); break; + default: + Params.push_back(getValue(*It++, Raw.Arg2)); + { + vector &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i++) { + if (It == PL.end()) return true; + Params.push_back(getValue(*It++, args[i])); + } + } + delete Raw.VarArgs; + } + if (It != PL.end()) return true; + + Res = new CallInst(M, Params); + return false; + } else if (Raw.Opcode == Instruction::Malloc) { + if (Raw.NumOperands > 2) return true; + Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0; + Res = new MallocInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz); + return false; + } else if (Raw.Opcode == Instruction::Alloca) { + if (Raw.NumOperands > 2) return true; + Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0; + Res = new AllocaInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz); + return false; + } else if (Raw.Opcode == Instruction::Free) { + Value *Val = getValue(Raw.Ty, Raw.Arg1); + if (!Val->getType()->isPointerType()) return true; + Res = new FreeInst(Val); + return false; + } + + cerr << "Unrecognized instruction! " << Raw.Opcode << endl; + return true; +} diff --git a/lib/Bytecode/Reader/Makefile b/lib/Bytecode/Reader/Makefile new file mode 100644 index 00000000000..2c79d151046 --- /dev/null +++ b/lib/Bytecode/Reader/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = bcreader + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp new file mode 100644 index 00000000000..c3f4c907fea --- /dev/null +++ b/lib/Bytecode/Reader/Reader.cpp @@ -0,0 +1,478 @@ +//===- Reader.cpp - Code to read bytecode files -----------------------------=== +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Make error message outputs be configurable depending on an option? +// TODO: Allow passing in an option to ignore the symbol table +// +//===------------------------------------------------------------------------=== + +#include "llvm/Bytecode/Reader.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "ReaderInternals.h" +#include +#include +#include +#include +#include +#include + +bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) { + if (Ty->isPrimitiveType()) { + Slot = Ty->getPrimitiveID(); + } else { + TypeMapType::iterator I = TypeMap.find(Ty); + if (I == TypeMap.end()) return true; // Didn't find type! + Slot = I->second; + } + //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl; + return false; +} + +const Type *BytecodeParser::getType(unsigned ID) { + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID); + if (T) return T; + + //cerr << "Looking up Type ID: " << ID << endl; + + const Value *D = getValue(Type::TypeTy, ID, false); + if (D == 0) return 0; + + assert(D->getType() == Type::TypeTy && + D->getValueType() == Value::ConstantVal); + + + return ((const ConstPoolType*)D)->getValue();; +} + +bool BytecodeParser::insertValue(Value *Def, vector &ValueTab) { + unsigned type; + if (getTypeSlot(Def->getType(), type)) return true; + + if (ValueTab.size() <= type) + ValueTab.resize(type+1, ValueList()); + + //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() + // << "] = " << Def << endl; + + if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) { + const Type *Ty = ((const ConstPoolType*)Def)->getValue(); + unsigned ValueOffset = FirstDerivedTyID; + + if (&ValueTab == &Values) // Take into consideration module level types + ValueOffset += ModuleValues[type].size(); + + if (TypeMap.find(Ty) == TypeMap.end()) + TypeMap[Ty] = ValueTab[type].size()+ValueOffset; + } + + ValueTab[type].push_back(Def); + + return false; +} + +Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) { + unsigned Num = oNum; + unsigned type; // The type plane it lives in... + + if (getTypeSlot(Ty, type)) return 0; // TODO: true + + if (type == Type::TypeTyID) { // The 'type' plane has implicit values + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num); + if (T) return (Value*)T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + Num -= FirstDerivedTyID; + } + + if (ModuleValues.size() > type) { + if (ModuleValues[type].size() > Num) + return ModuleValues[type][Num]; + Num -= ModuleValues[type].size(); + } + + if (Values.size() > type && Values[type].size() > Num) + return Values[type][Num]; + + if (!Create) return 0; // Do not create a placeholder? + + Value *d = 0; + switch (Ty->getPrimitiveID()) { + case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break; + case Type::MethodTyID: + cerr << "Creating method pholder! : " << type << ":" << oNum << " " + << Ty->getName() << endl; + d = new MethPHolder(Ty, oNum); + insertValue(d, LateResolveModuleValues); + return d; + default: d = new DefPHolder(Ty, oNum); break; + } + + assert(d != 0 && "How did we not make something?"); + if (insertValue(d, LateResolveValues)) return 0; + return d; +} + +bool BytecodeParser::postResolveValues(ValueTable &ValTab) { + bool Error = false; + for (unsigned ty = 0; ty < ValTab.size(); ty++) { + ValueList &DL = ValTab[ty]; + unsigned Size; + while ((Size = DL.size())) { + unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]); + + Value *D = DL[Size-1]; + DL.pop_back(); + + Value *NewDef = getValue(D->getType(), IDNumber, false); + if (NewDef == 0) { + Error = true; // Unresolved thinger + cerr << "Unresolvable reference found: <" << D->getType()->getName() + << ">:" << IDNumber << "!\n"; + } else { + // Fixup all of the uses of this placeholder def... + D->replaceAllUsesWith(NewDef); + + // Now that all the uses are gone, delete the placeholder... + // If we couldn't find a def (error case), then leak a little + delete D; // memory, 'cause otherwise we can't remove all uses! + } + } + } + + return Error; +} + +bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf, + BasicBlock *&BB) { + BB = new BasicBlock(); + + while (Buf < EndBuf) { + Instruction *Def; + if (ParseInstruction(Buf, EndBuf, Def)) { + delete BB; + return true; + } + + if (Def == 0) { delete BB; return true; } + if (insertValue(Def, Values)) { delete BB; return true; } + + BB->getInstList().push_back(Def); + } + + return false; +} + +bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) { + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries, Typ; + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + // Symtab entry: [def slot #][name] + unsigned slot; + if (read_vbr(Buf, EndBuf, slot)) return true; + string Name; + if (read(Buf, EndBuf, Name, false)) // Not aligned... + return true; + + Value *D = getValue(Ty, slot, false); // Find mapping... + if (D == 0) return true; + D->setName(Name); + } + } + + return Buf > EndBuf; +} + + +bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf, + Module *C) { + // Clear out the local values table... + Values.clear(); + if (MethodSignatureList.empty()) return true; // Unexpected method! + + const MethodType *MTy = MethodSignatureList.front().first; + unsigned MethSlot = MethodSignatureList.front().second; + MethodSignatureList.pop_front(); + Method *M = new Method(MTy); + + const MethodType::ParamTypes &Params = MTy->getParamTypes(); + for (MethodType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); It++) { + MethodArgument *MA = new MethodArgument(*It); + if (insertValue(MA, Values)) { delete M; return true; } + M->getArgumentList().push_back(MA); + } + + while (Buf < EndBuf) { + unsigned Type, Size; + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; } + + switch (Type) { + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) { + cerr << "Error reading constant pool!\n"; + delete M; return true; + } + break; + + case BytecodeFormat::BasicBlock: { + BasicBlock *BB; + if (ParseBasicBlock(Buf, Buf+Size, BB) || + insertValue(BB, Values)) { + cerr << "Error parsing basic block!\n"; + delete M; return true; // Parse error... :( + } + + M->getBasicBlocks().push_back(BB); + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading method symbol table!\n"; + delete M; return true; + } + break; + + default: + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { + delete M; // Malformed bc file, read past end of block. + return true; + } + } + + if (postResolveValues(LateResolveValues) || + postResolveValues(LateResolveModuleValues)) { + delete M; return true; // Unresolvable references! + } + + Value *MethPHolder = getValue(MTy, MethSlot, false); + assert(MethPHolder && "Something is broken no placeholder found!"); + assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?"); + + unsigned type; // Type slot + assert(!getTypeSlot(MTy, type) && "How can meth type not exist?"); + getTypeSlot(MTy, type); + + C->getMethodList().push_back(M); + + // Replace placeholder with the real method pointer... + ModuleValues[type][MethSlot] = M; + + // If anyone is using the placeholder make them use the real method instead + MethPHolder->replaceAllUsesWith(M); + + // We don't need the placeholder anymore! + delete MethPHolder; + + return false; +} + +bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End, + Module *C) { + + if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks? + + // Read the method signatures for all of the methods that are coming, and + // create fillers in the Value tables. + unsigned MethSignature; + if (read_vbr(Buf, End, MethSignature)) return true; + while (MethSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(MethSignature); + if (!Ty || !Ty->isMethodType()) { + cerr << "Method not meth type! "; + if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl; + return true; + } + + // When the ModuleGlobalInfo section is read, we load the type of each method + // and the 'ModuleValues' slot that it lands in. We then load a placeholder + // into its slot to reserve it. When the method is loaded, this placeholder + // is replaced. + + // Insert the placeholder... + Value *Def = new MethPHolder(Ty, 0); + insertValue(Def, ModuleValues); + + // Figure out which entry of its typeslot it went into... + unsigned TypeSlot; + if (getTypeSlot(Def->getType(), TypeSlot)) return true; + + unsigned SlotNo = ModuleValues[TypeSlot].size()-1; + + // Keep track of this information in a linked list that is emptied as + // methods are loaded... + // + MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo)); + if (read_vbr(Buf, End, MethSignature)) return true; + } + + if (align32(Buf, End)) return true; + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + return false; +} + +bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf, + Module *&C) { + + unsigned Type, Size; + if (readBlock(Buf, EndBuf, Type, Size)) return true; + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + return true; // Hrm, not a class? + + MethodSignatureList.clear(); // Just in case... + + // Read into instance variables... + if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true; + if (align32(Buf, EndBuf)) return true; + + C = new Module(); + + while (Buf < EndBuf) { + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; } + switch (Type) { + case BytecodeFormat::ModuleGlobalInfo: + if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) { + cerr << "Error reading class global info section!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) { + cerr << "Error reading class constant pool!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::Method: { + if (ParseMethod(Buf, Buf+Size, C)) { + delete C; return true; // Error parsing method + } + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading class symbol table!\n"; + delete C; return true; + } + break; + + default: + cerr << "Unknown class block: " << Type << endl; + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { delete C; return true; } + } + + if (!MethodSignatureList.empty()) // Expected more methods! + return true; + return false; +} + +Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) { + LateResolveValues.clear(); + unsigned Sig; + // Read and check signature... + if (read(Buf, EndBuf, Sig) || + Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) + return 0; // Invalid signature! + + Module *Result; + if (ParseModule(Buf, EndBuf, Result)) return 0; + return Result; +} + + +Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) { + BytecodeParser Parser; + return Parser.ParseBytecode(Buffer, Buffer+Length); +} + +// Parse and return a class file... +// +Module *ParseBytecodeFile(const string &Filename) { + struct stat StatBuf; + Module *Result = 0; + + if (Filename != string("-")) { // Read from a file... + int FD = open(Filename.data(), O_RDONLY); + if (FD == -1) return 0; + + if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; } + + int Length = StatBuf.st_size; + if (Length == 0) { close(FD); return 0; } + uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ, + MAP_PRIVATE, FD, 0); + if (Buffer == (uchar*)-1) { close(FD); return 0; } + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buffer, Buffer+Length); + + munmap((char*)Buffer, Length); + close(FD); + } else { // Read from stdin + size_t FileSize = 0; + int BlockSize; + uchar Buffer[4096], *FileData = 0; + while ((BlockSize = read(0, Buffer, 4))) { + if (BlockSize == -1) { free(FileData); return 0; } + + FileData = (uchar*)realloc(FileData, FileSize+BlockSize); + memcpy(FileData+FileSize, Buffer, BlockSize); + FileSize += BlockSize; + } + + if (FileSize == 0) { free(FileData); return 0; } + +#define ALIGN_PTRS 1 +#if ALIGN_PTRS + uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert((Buf != (uchar*)-1) && "mmap returned error!"); + free(FileData); + memcpy(Buf, FileData, FileSize); +#else + uchar *Buf = FileData; +#endif + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buf, Buf+FileSize); + +#if ALIGN_PTRS + munmap((char*)Buf, FileSize); // Free mmap'd data area +#else + free(FileData); // Free realloc'd block of memory +#endif + } + + return Result; +} diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h new file mode 100644 index 00000000000..3bb04726743 --- /dev/null +++ b/lib/Bytecode/Reader/ReaderInternals.h @@ -0,0 +1,146 @@ +//===-- ReaderInternals.h - Definitions internal to the reader ---*- C++ -*--=// +// +// This header file defines various stuff that is used by the bytecode reader. +// +//===----------------------------------------------------------------------===// + +#ifndef READER_INTERNALS_H +#define READER_INTERNALS_H + +#include "llvm/Bytecode/Primitives.h" +#include "llvm/SymTabValue.h" +#include "llvm/Method.h" +#include "llvm/Instruction.h" +#include +#include + +class BasicBlock; +class Method; +class Module; +class Type; + +typedef unsigned char uchar; + +struct RawInst { // The raw fields out of the bytecode stream... + unsigned NumOperands; + unsigned Opcode; + const Type *Ty; + unsigned Arg1, Arg2; + union { + unsigned Arg3; + vector *VarArgs; // Contains arg #3,4,5... if NumOperands > 3 + }; +}; + +class BytecodeParser { +public: + BytecodeParser() { + // Define this in case we don't see a ModuleGlobalInfo block. + FirstDerivedTyID = Type::FirstDerivedTyID; + } + + Module *ParseBytecode(const uchar *Buf, const uchar *EndBuf); +private: // All of this data is transient across calls to ParseBytecode + typedef vector ValueList; + typedef vector ValueTable; + typedef map TypeMapType; + ValueTable Values, LateResolveValues; + ValueTable ModuleValues, LateResolveModuleValues; + TypeMapType TypeMap; + + // Information read from the ModuleGlobalInfo section of the file... + unsigned FirstDerivedTyID; + + // When the ModuleGlobalInfo section is read, we load the type of each method + // and the 'ModuleValues' slot that it lands in. We then load a placeholder + // into its slot to reserve it. When the method is loaded, this placeholder + // is replaced. + // + list > MethodSignatureList; + +private: + bool ParseModule (const uchar * Buf, const uchar *End, Module *&); + bool ParseModuleGlobalInfo (const uchar *&Buf, const uchar *End, Module *); + bool ParseSymbolTable (const uchar *&Buf, const uchar *End); + bool ParseMethod (const uchar *&Buf, const uchar *End, Module *); + bool ParseBasicBlock (const uchar *&Buf, const uchar *End, BasicBlock *&); + bool ParseInstruction (const uchar *&Buf, const uchar *End, Instruction *&); + bool ParseRawInst (const uchar *&Buf, const uchar *End, RawInst &); + + bool ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, + SymTabValue::ConstantPoolType &CP, ValueTable &Tab); + + + bool parseConstPoolValue(const uchar *&Buf, const uchar *End, + const Type *Ty, ConstPoolVal *&V); + bool parseTypeConstant (const uchar *&Buf, const uchar *, ConstPoolVal *&); + + Value *getValue(const Type *Ty, unsigned num, bool Create = true); + const Type *getType(unsigned ID); + + bool insertValue(Value *D, vector &D); + bool postResolveValues(ValueTable &ValTab); + + bool getTypeSlot(const Type *Ty, unsigned &Slot); +}; + +template +class PlaceholderDef : public SuperType { + unsigned ID; +public: + PlaceholderDef(const Type *Ty, unsigned id) : SuperType(Ty), ID(id) {} + unsigned getID() { return ID; } +}; + +struct InstPlaceHolderHelper : public Instruction { + InstPlaceHolderHelper(const Type *Ty) : Instruction(Ty, UserOp1, "") {} + inline virtual void dropAllReferences() {} + virtual string getOpcode() const { return "placeholder"; } + + virtual Instruction *clone() const { abort(); return 0; } + + // No "operands"... + virtual Value *getOperand(unsigned i) { return 0; } + virtual const Value *getOperand(unsigned i) const { return 0; } + virtual bool setOperand(unsigned i, Value *Val) { return false; } + virtual unsigned getNumOperands() const { return 0; } +}; + +struct BBPlaceHolderHelper : public BasicBlock { + BBPlaceHolderHelper(const Type *Ty) : BasicBlock() { + assert(Ty->isLabelType()); + } +}; + +struct MethPlaceHolderHelper : public Method { + MethPlaceHolderHelper(const Type *Ty) + : Method((const MethodType*)Ty) { + assert(Ty->isMethodType() && "Method placeholders must be method types!"); + } +}; + +typedef PlaceholderDef DefPHolder; +typedef PlaceholderDef BBPHolder; +typedef PlaceholderDef MethPHolder; + +static inline unsigned getValueIDNumberFromPlaceHolder(Value *Def) { + switch (Def->getType()->getPrimitiveID()) { + case Type::LabelTyID: return ((BBPHolder*)Def)->getID(); + case Type::MethodTyID: return ((MethPHolder*)Def)->getID(); + default: return ((DefPHolder*)Def)->getID(); + } +} + +static inline bool readBlock(const uchar *&Buf, const uchar *EndBuf, + unsigned &Type, unsigned &Size) { +#if DEBUG_OUTPUT + bool Result = read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size); + cerr << "StartLoc = " << ((unsigned)Buf & 4095) + << " Type = " << Type << " Size = " << Size << endl; + return Result; +#else + return read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size); +#endif +} + +#endif diff --git a/lib/Bytecode/Writer/ConstantWriter.cpp b/lib/Bytecode/Writer/ConstantWriter.cpp new file mode 100644 index 00000000000..e0504a5b38f --- /dev/null +++ b/lib/Bytecode/Writer/ConstantWriter.cpp @@ -0,0 +1,154 @@ +//===-- WriteConst.cpp - Functions for writing constants ---------*- C++ -*--=// +// +// This file implements the routines for encoding constants to a bytecode +// stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/SymbolTable.h" +#include "llvm/DerivedTypes.h" + +void BytecodeWriter::outputType(const Type *T) { + output_vbr((unsigned)T->getPrimitiveID(), Out); + + // That's all there is to handling primitive types... + if (T->isPrimitiveType()) + return; // We might do this if we alias a prim type: %x = type int + + switch (T->getPrimitiveID()) { // Handle derived types now. + case Type::MethodTyID: { + const MethodType *MT = (const MethodType*)T; + int Slot = Table.getValSlot(MT->getReturnType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + + // Output all of the arguments... + MethodType::ParamTypes::const_iterator I = MT->getParamTypes().begin(); + for (; I != MT->getParamTypes().end(); I++) { + Slot = Table.getValSlot(*I); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + + // Terminate list with VoidTy + output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out); + break; + } + + case Type::ArrayTyID: { + const ArrayType *AT = (const ArrayType*)T; + int Slot = Table.getValSlot(AT->getElementType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + //cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl; + + output_vbr(AT->getNumElements(), Out); + break; + } + + case Type::StructTyID: { + const StructType *ST = (const StructType*)T; + + // Output all of the element types... + StructType::ElementTypes::const_iterator I = ST->getElementTypes().begin(); + for (; I != ST->getElementTypes().end(); I++) { + int Slot = Table.getValSlot(*I); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + + // Terminate list with VoidTy + output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out); + break; + } + + case Type::PointerTyID: { + const PointerType *PT = (const PointerType*)T; + int Slot = Table.getValSlot(PT->getValueType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + break; + } + + case Type::ModuleTyID: + case Type::PackedTyID: + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " Type '" << T->getName() << "'\n"; + break; + } +} + +bool BytecodeWriter::outputConstant(const ConstPoolVal *CPV) { + switch (CPV->getType()->getPrimitiveID()) { + case Type::BoolTyID: // Boolean Types + if (((const ConstPoolBool*)CPV)->getValue()) + output_vbr((unsigned)1, Out); + else + output_vbr((unsigned)0, Out); + break; + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: + case Type::ULongTyID: + output_vbr(((const ConstPoolUInt*)CPV)->getValue(), Out); + break; + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: + case Type::LongTyID: + output_vbr(((const ConstPoolSInt*)CPV)->getValue(), Out); + break; + + case Type::TypeTyID: // Serialize type type + outputType(((const ConstPoolType*)CPV)->getValue()); + break; + + case Type::ArrayTyID: { + const ConstPoolArray *CPA = (const ConstPoolArray *)CPV; + unsigned size = CPA->getValues().size(); + if (!((const ArrayType *)CPA->getType())->isSized()) + output_vbr(size, Out); // Not for sized arrays!!! + + for (unsigned i = 0; i < size; i++) { + int Slot = Table.getValSlot(CPA->getValues()[i]); + assert(Slot != -1 && "Constant used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + break; + } + + case Type::StructTyID: { + const ConstPoolStruct *CPS = (const ConstPoolStruct*)CPV; + const vector &Vals = CPS->getValues(); + + for (unsigned i = 0; i < Vals.size(); ++i) { + int Slot = Table.getValSlot(Vals[i]); + assert(Slot != -1 && "Constant used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + break; + } + + case Type::FloatTyID: // Floating point types... + case Type::DoubleTyID: + // TODO: Floating point type serialization + + + case Type::VoidTyID: + case Type::LabelTyID: + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " type '" << CPV->getType()->getName() << "'\n"; + break; + } + return false; +} diff --git a/lib/Bytecode/Writer/InstructionWriter.cpp b/lib/Bytecode/Writer/InstructionWriter.cpp new file mode 100644 index 00000000000..c7c04efb731 --- /dev/null +++ b/lib/Bytecode/Writer/InstructionWriter.cpp @@ -0,0 +1,184 @@ +//===-- WriteInst.cpp - Functions for writing instructions -------*- C++ -*--=// +// +// This file implements the routines for encoding instruction opcodes to a +// bytecode stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instruction.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Tools/DataTypes.h" +#include + +typedef unsigned char uchar; + +// outputInstructionFormat0 - Output those wierd instructions that have a large +// number of operands or have large operands themselves... +// +// Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg] +// +static void outputInstructionFormat0(const Instruction *I, + const SlotCalculator &Table, + unsigned Type, vector &Out) { + // Opcode must have top two bits clear... + output_vbr(I->getInstType(), Out); // Instruction Opcode ID + output_vbr(Type, Out); // Result type + + unsigned NumArgs; // Count the number of arguments to the instruction + for (NumArgs = 0; I->getOperand(NumArgs); NumArgs++) /*empty*/; + output_vbr(NumArgs, Out); + + for (unsigned i = 0; const Value *N = I->getOperand(i); i++) { + assert(i < NumArgs && "Count of arguments failed!"); + + int Slot = Table.getValSlot(N); + output_vbr((unsigned)Slot, Out); + } + align32(Out); // We must maintain correct alignment! +} + + +// outputInstructionFormat1 - Output one operand instructions, knowing that no +// operand index is >= 2^12. +// +static void outputInstructionFormat1(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 1. + // 29-24: Opcode + // 23-12: Resulting type plane + // 11- 0: Operand #1 (if set to (2^12-1), then zero operands) + // + unsigned Opcode = (1 << 30) | (IType << 24) | (Type << 12) | Slots[0]; + // cerr << "1 " << IType << " " << Type << " " << Slots[0] << endl; + output(Opcode, Out); +} + + +// outputInstructionFormat2 - Output two operand instructions, knowing that no +// operand index is >= 2^8. +// +static void outputInstructionFormat2(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 2. + // 29-24: Opcode + // 23-16: Resulting type plane + // 15- 8: Operand #1 + // 7- 0: Operand #2 + // + unsigned Opcode = (2 << 30) | (IType << 24) | (Type << 16) | + (Slots[0] << 8) | (Slots[1] << 0); + // cerr << "2 " << IType << " " << Type << " " << Slots[0] << " " + // << Slots[1] << endl; + output(Opcode, Out); +} + + +// outputInstructionFormat3 - Output three operand instructions, knowing that no +// operand index is >= 2^6. +// +static void outputInstructionFormat3(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 3 + // 29-24: Opcode + // 23-18: Resulting type plane + // 17-12: Operand #1 + // 11- 6: Operand #2 + // 5- 0: Operand #3 + // + unsigned Opcode = (3 << 30) | (IType << 24) | (Type << 18) | + (Slots[0] << 12) | (Slots[1] << 6) | (Slots[2] << 0); + // cerr << "3 " << IType << " " << Type << " " << Slots[0] << " " + // << Slots[1] << " " << Slots[2] << endl; + output(Opcode, Out); +} + +bool BytecodeWriter::processInstruction(const Instruction *I) { + assert(I->getInstType() < 64 && "Opcode too big???"); + + unsigned NumOperands = 0; + int MaxOpSlot = 0; + int Slots[3]; Slots[0] = (1 << 12)-1; + + const Value *Def; + while ((Def = I->getOperand(NumOperands))) { + int slot = Table.getValSlot(Def); + assert(slot != -1 && "Broken bytecode!"); + if (slot > MaxOpSlot) MaxOpSlot = slot; + if (NumOperands < 3) Slots[NumOperands] = slot; + NumOperands++; + } + + // Figure out which type to encode with the instruction. Typically we want + // the type of the first parameter, as opposed to the type of the instruction + // (for example, with setcc, we always know it returns bool, but the type of + // the first param is actually interesting). But if we have no arguments + // we take the type of the instruction itself. + // + + const Type *Ty; + if (NumOperands) + Ty = I->getOperand(0)->getType(); + else + Ty = I->getType(); + + unsigned Type; + int Slot = Table.getValSlot(Ty); + assert(Slot != -1 && "Type not available!!?!"); + Type = (unsigned)Slot; + + + // Decide which instruction encoding to use. This is determined primarily by + // the number of operands, and secondarily by whether or not the max operand + // will fit into the instruction encoding. More operands == fewer bits per + // operand. + // + switch (NumOperands) { + case 0: + case 1: + if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops + outputInstructionFormat1(I, Table, Slots, Type, Out); + return false; + } + break; + + case 2: + if (MaxOpSlot < (1 << 8)) { + outputInstructionFormat2(I, Table, Slots, Type, Out); + return false; + } + break; + + case 3: + if (MaxOpSlot < (1 << 6)) { + outputInstructionFormat3(I, Table, Slots, Type, Out); + return false; + } + break; + } + + outputInstructionFormat0(I, Table, Type, Out); + return false; +} diff --git a/lib/Bytecode/Writer/Makefile b/lib/Bytecode/Writer/Makefile new file mode 100644 index 00000000000..c03db561543 --- /dev/null +++ b/lib/Bytecode/Writer/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = bcwriter + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp new file mode 100644 index 00000000000..d03c9454713 --- /dev/null +++ b/lib/Bytecode/Writer/Writer.cpp @@ -0,0 +1,182 @@ +//===-- Writer.cpp - Library for writing VM bytecode files -------*- C++ -*--=// +// +// This library implements the functionality defined in llvm/Bytecode/Writer.h +// +// This library uses the Analysis library to figure out offsets for +// variables in the method tables... +// +// Note that this file uses an unusual technique of outputting all the bytecode +// to a vector of unsigned char's, then copies the vector to an ostream. The +// reason for this is that we must do "seeking" in the stream to do back- +// patching, and some very important ostreams that we want to support (like +// pipes) do not support seeking. :( :( :( +// +// The choice of the vector data structure is influenced by the extremely fast +// "append" speed, plus the free "seek"/replace in the middle of the stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/SymbolTable.h" +#include "llvm/DerivedTypes.h" +#include +#include + +BytecodeWriter::BytecodeWriter(vector &o, const Module *M) + : Out(o), Table(M, false) { + + outputSignature(); + + // Emit the top level CLASS block. + BytecodeBlock ModuleBlock(BytecodeFormat::Module, Out); + + // Output largest ID of first "primitive" type: + output_vbr((unsigned)Type::FirstDerivedTyID, Out); + align32(Out); + + // Do the whole module now! + processModule(M); + + // If needed, output the symbol table for the class... + if (M->hasSymbolTable()) + outputSymbolTable(*M->getSymbolTable()); +} + +// TODO: REMOVE +#include "llvm/Assembly/Writer.h" + +bool BytecodeWriter::processConstPool(const ConstantPool &CP, bool isMethod) { + BytecodeBlock *CPool = new BytecodeBlock(BytecodeFormat::ConstantPool, Out); + + unsigned NumPlanes = Table.getNumPlanes(); + + for (unsigned pno = 0; pno < NumPlanes; pno++) { + const vector &Plane = Table.getPlane(pno); + if (Plane.empty()) continue; // Skip empty type planes... + + unsigned ValNo = 0; // Don't reemit module constants + if (isMethod) ValNo = Table.getModuleLevel(pno); + + unsigned NumConstants = 0; + for (unsigned vn = ValNo; vn < Plane.size(); vn++) + if (Plane[vn]->getValueType() == Value::ConstantVal) + NumConstants++; + + if (NumConstants == 0) continue; // Skip empty type planes... + + // Output type header: [num entries][type id number] + // + output_vbr(NumConstants, Out); + + // Output the Type ID Number... + int Slot = Table.getValSlot(Plane.front()->getType()); + assert (Slot != -1 && "Type in constant pool but not in method!!"); + output_vbr((unsigned)Slot, Out); + + //cerr << "NC: " << NumConstants << " Slot = " << hex << Slot << endl; + + for (; ValNo < Plane.size(); ValNo++) { + const Value *V = Plane[ValNo]; + if (V->getValueType() == Value::ConstantVal) { + //cerr << "Serializing value: <" << V->getType() << ">: " + // << ((const ConstPoolVal*)V)->getStrValue() << ":" + // << Out.size() << "\n"; + outputConstant((const ConstPoolVal*)V); + } + } + } + + delete CPool; // End bytecode block section! + + if (!isMethod) { // The ModuleInfoBlock follows directly after the c-pool + assert(CP.getParent()->getValueType() == Value::ModuleVal); + outputModuleInfoBlock((const Module*)CP.getParent()); + } + + return false; +} + +void BytecodeWriter::outputModuleInfoBlock(const Module *M) { + BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfo, Out); + + // Output the types of the methods in this class + Module::MethodListType::const_iterator I = M->getMethodList().begin(); + while (I != M->getMethodList().end()) { + int Slot = Table.getValSlot((*I)->getType()); + assert(Slot != -1 && "Module const pool is broken!"); + assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); + output_vbr((unsigned)Slot, Out); + I++; + } + output_vbr((unsigned)Table.getValSlot(Type::VoidTy), Out); + align32(Out); +} + +bool BytecodeWriter::processMethod(const Method *M) { + BytecodeBlock MethodBlock(BytecodeFormat::Method, Out); + + Table.incorporateMethod(M); + + if (ModuleAnalyzer::processMethod(M)) return true; + + // If needed, output the symbol table for the method... + if (M->hasSymbolTable()) + outputSymbolTable(*M->getSymbolTable()); + + Table.purgeMethod(); + return false; +} + + +bool BytecodeWriter::processBasicBlock(const BasicBlock *BB) { + BytecodeBlock MethodBlock(BytecodeFormat::BasicBlock, Out); + return ModuleAnalyzer::processBasicBlock(BB); +} + +void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { + BytecodeBlock MethodBlock(BytecodeFormat::SymbolTable, Out); + + for (SymbolTable::const_iterator TI = MST.begin(); TI != MST.end(); TI++) { + SymbolTable::type_const_iterator I = MST.type_begin(TI->first); + SymbolTable::type_const_iterator End = MST.type_end(TI->first); + int Slot; + + if (I == End) continue; // Don't mess with an absent type... + + // Symtab block header: [num entries][type id number] + output_vbr(MST.type_size(TI->first), Out); + + Slot = Table.getValSlot(TI->first); + assert(Slot != -1 && "Type in symtab, but not in table!"); + output_vbr((unsigned)Slot, Out); + + for (; I != End; I++) { + // Symtab entry: [def slot #][name] + Slot = Table.getValSlot(I->second); + assert (Slot != -1 && "Value in symtab but not in method!!"); + output_vbr((unsigned)Slot, Out); + output(I->first, Out, false); // Don't force alignment... + } + } +} + +void WriteBytecodeToFile(const Module *C, ostream &Out) { + assert(C && "You can't write a null class!!"); + + vector Buffer; + + // This object populates buffer for us... + BytecodeWriter BCW(Buffer, C); + + // Okay, write the vector out to the ostream now... + Out.write(&Buffer[0], Buffer.size()); + Out.flush(); +} diff --git a/lib/Bytecode/Writer/WriterInternals.h b/lib/Bytecode/Writer/WriterInternals.h new file mode 100644 index 00000000000..be9ccf96672 --- /dev/null +++ b/lib/Bytecode/Writer/WriterInternals.h @@ -0,0 +1,74 @@ +//===-- WriterInternals.h - Data structures shared by the Writer -*- C++ -*--=// +// +// This header defines the interface used between components of the bytecode +// writer. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H +#define LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H + +#include "llvm/Bytecode/Writer.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Bytecode/Primitives.h" +#include "llvm/Analysis/SlotCalculator.h" +#include "llvm/Tools/DataTypes.h" +#include "llvm/Instruction.h" + +class BytecodeWriter : public ModuleAnalyzer { + vector &Out; + SlotCalculator Table; +public: + BytecodeWriter(vector &o, const Module *M); + +protected: + virtual bool processConstPool(const ConstantPool &CP, bool isMethod); + virtual bool processMethod(const Method *M); + virtual bool processBasicBlock(const BasicBlock *BB); + virtual bool processInstruction(const Instruction *I); + +private : + inline void outputSignature() { + static const unsigned char *Sig = (const unsigned char*)"llvm"; + Out.insert(Out.end(), Sig, Sig+4); // output the bytecode signature... + } + + void outputModuleInfoBlock(const Module *C); + void outputSymbolTable(const SymbolTable &ST); + bool outputConstant(const ConstPoolVal *CPV); + void outputType(const Type *T); +}; + + + + +// BytecodeBlock - Little helper class that helps us do backpatching of bytecode +// block sizes really easily. It backpatches when it goes out of scope. +// +class BytecodeBlock { + unsigned Loc; + vector &Out; + + BytecodeBlock(const BytecodeBlock &); // do not implement + void operator=(const BytecodeBlock &); // do not implement +public: + inline BytecodeBlock(unsigned ID, vector &o) : Out(o) { + output(ID, Out); + output((unsigned)0, Out); // Reserve the space for the block size... + Loc = Out.size(); + } + + inline ~BytecodeBlock() { // Do backpatch when block goes out + // of scope... + // cerr << "OldLoc = " << Loc << " NewLoc = " << NewLoc << " diff = " << (NewLoc-Loc) << endl; + output((unsigned)(Out.size()-Loc), Out, (int)Loc-4); + align32(Out); // Blocks must ALWAYS be aligned + } +}; + + +#endif diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 00000000000..1f74058a221 --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,5 @@ +LEVEL = .. +DIRS = VMCore Analysis Assembly Bytecode Optimizations + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp new file mode 100644 index 00000000000..a1e3156b3b4 --- /dev/null +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -0,0 +1,283 @@ +//===- MethodInlining.cpp - Code to perform method inlining ---------------===// +// +// This file implements inlining of methods. +// +// Specifically, this: +// * Exports functionality to inline any method call +// * Inlines methods that consist of a single basic block +// * Is able to inline ANY method call +// . Has a smart heuristic for when to inline a method +// +// Notice that: +// * This pass has a habit of introducing duplicated constant pool entries, +// and also opens up a lot of opportunities for constant propogation. It is +// a good idea to to run a constant propogation pass, then a DCE pass +// sometime after running this pass. +// +// TODO: Currently this throws away all of the symbol names in the method being +// inlined to try to avoid name clashes. Use a name if it's not taken +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/iTerminators.h" +#include "llvm/iOther.h" +#include "llvm/Opt/AllOpts.h" +#include +#include + +#include "llvm/Assembly/Writer.h" + +// RemapInstruction - Convert the instruction operands from referencing the +// current values into those specified by ValueMap. +// +static inline void RemapInstruction(Instruction *I, + map &ValueMap) { + + for (unsigned op = 0; const Value *Op = I->getOperand(op); op++) { + Value *V = ValueMap[Op]; + if (!V && Op->getValueType() == Value::MethodVal) + continue; // Methods don't get relocated + + if (!V) { + cerr << "Val = " << endl << Op << "Addr = " << (void*)Op << endl; + cerr << "Inst = " << I; + } + assert(V && "Referenced value not in value map!"); + I->setOperand(op, V); + } +} + +// InlineMethod - This function forcibly inlines the called method into the +// basic block of the caller. This returns false if it is not possible to +// inline this call. The program is still in a well defined state if this +// occurs though. +// +// Note that this only does one level of inlining. For example, if the +// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +// exists in the instruction stream. Similiarly this will inline a recursive +// method by one level. +// +bool InlineMethod(BasicBlock::InstListType::iterator CIIt) { + assert((*CIIt)->getInstType() == Instruction::Call && + "InlineMethod only works on CallInst nodes!"); + assert((*CIIt)->getParent() && "Instruction not embedded in basic block!"); + assert((*CIIt)->getParent()->getParent() && "Instruction not in method!"); + + CallInst *CI = (CallInst*)*CIIt; + const Method *CalledMeth = CI->getCalledMethod(); + Method *CurrentMeth = CI->getParent()->getParent(); + + //cerr << "Inlining " << CalledMeth->getName() << " into " + // << CurrentMeth->getName() << endl; + + BasicBlock *OrigBB = CI->getParent(); + + // Call splitBasicBlock - The original basic block now ends at the instruction + // immediately before the call. The original basic block now ends with an + // unconditional branch to NewBB, and NewBB starts with the call instruction. + // + BasicBlock *NewBB = OrigBB->splitBasicBlock(CIIt); + + // Remove (unlink) the CallInst from the start of the new basic block. + NewBB->getInstList().remove(CI); + + // If we have a return value generated by this call, convert it into a PHI + // node that gets values from each of the old RET instructions in the original + // method. + // + PHINode *PHI = 0; + if (CalledMeth->getReturnType() != Type::VoidTy) { + PHI = new PHINode(CalledMeth->getReturnType(), CI->getName()); + + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + // + NewBB->getInstList().push_front(PHI); + + // Anything that used the result of the function call should now use the PHI + // node as their operand. + // + CI->replaceAllUsesWith(PHI); + } + + // Keep a mapping between the original method's values and the new duplicated + // code's values. This includes all of: Method arguments, instruction values, + // constant pool entries, and basic blocks. + // + map ValueMap; + + // Add the method arguments to the mapping: (start counting at 1 to skip the + // method reference itself) + // + Method::ArgumentListType::const_iterator PTI = + CalledMeth->getArgumentList().begin(); + for (unsigned a = 1; Value *Operand = CI->getOperand(a); ++a, ++PTI) { + ValueMap[*PTI] = Operand; + } + + + ValueMap[NewBB] = NewBB; // Returns get converted to reference NewBB + + // Loop over all of the basic blocks in the method, inlining them as + // appropriate. Keep track of the first basic block of the method... + // + for (Method::BasicBlocksType::const_iterator BI = + CalledMeth->getBasicBlocks().begin(); + BI != CalledMeth->getBasicBlocks().end(); BI++) { + const BasicBlock *BB = *BI; + assert(BB->getTerminator() && "BasicBlock doesn't have terminator!?!?"); + + // Create a new basic block to copy instructions into! + BasicBlock *IBB = new BasicBlock("", NewBB->getParent()); + + ValueMap[*BI] = IBB; // Add basic block mapping. + + // Make sure to capture the mapping that a return will use... + // TODO: This assumes that the RET is returning a value computed in the same + // basic block as the return was issued from! + // + const TerminatorInst *TI = BB->getTerminator(); + + // Loop over all instructions copying them over... + Instruction *NewInst; + for (BasicBlock::InstListType::const_iterator II = BB->getInstList().begin(); + II != (BB->getInstList().end()-1); II++) { + IBB->getInstList().push_back((NewInst = (*II)->clone())); + ValueMap[*II] = NewInst; // Add instruction map to value. + } + + // Copy over the terminator now... + switch (TI->getInstType()) { + case Instruction::Ret: { + const ReturnInst *RI = (const ReturnInst*)TI; + + if (PHI) { // The PHI node should include this value! + assert(RI->getReturnValue() && "Ret should have value!"); + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in method!"); + PHI->addIncoming((Value*)RI->getReturnValue()); + } + + // Add a branch to the code that was after the original Call. + IBB->getInstList().push_back(new BranchInst(NewBB)); + break; + } + case Instruction::Br: + IBB->getInstList().push_back(TI->clone()); + break; + + default: + cerr << "MethodInlining: Don't know how to handle terminator: " << TI; + abort(); + } + } + + + // Copy over the constant pool... + // + const ConstantPool &CP = CalledMeth->getConstantPool(); + ConstantPool &NewCP = CurrentMeth->getConstantPool(); + for (ConstantPool::plane_const_iterator PI = CP.begin(); PI != CP.end(); ++PI){ + ConstantPool::PlaneType &Plane = **PI; + for (ConstantPool::PlaneType::const_iterator I = Plane.begin(); + I != Plane.end(); ++I) { + ConstPoolVal *NewVal = (*I)->clone(); // Copy existing constant + NewCP.insert(NewVal); // Insert the new copy into local const pool + ValueMap[*I] = NewVal; // Keep track of constant value mappings + } + } + + // Loop over all of the instructions in the method, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + for (Method::BasicBlocksType::const_iterator BI = + CalledMeth->getBasicBlocks().begin(); + BI != CalledMeth->getBasicBlocks().end(); BI++) { + const BasicBlock *BB = *BI; + BasicBlock *NBB = (BasicBlock*)ValueMap[BB]; + + // Loop over all instructions, fixing each one as we find it... + // + for (BasicBlock::InstListType::iterator II = NBB->getInstList().begin(); + II != NBB->getInstList().end(); II++) + RemapInstruction(*II, ValueMap); + } + + if (PHI) RemapInstruction(PHI, ValueMap); // Fix the PHI node also... + + // Change the branch that used to go to NewBB to branch to the first basic + // block of the inlined method. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getInstType() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, ValueMap[CalledMeth->getBasicBlocks().front()]); + + // Since we are now done with the CallInst, we can finally delete it. + delete CI; + return true; +} + +bool InlineMethod(CallInst *CI) { + assert(CI->getParent() && "CallInst not embeded in BasicBlock!"); + BasicBlock *PBB = CI->getParent(); + + BasicBlock::InstListType::iterator CallIt = find(PBB->getInstList().begin(), + PBB->getInstList().end(), + CI); + assert(CallIt != PBB->getInstList().end() && + "CallInst has parent that doesn't contain CallInst?!?"); + return InlineMethod(CallIt); +} + +static inline bool ShouldInlineMethod(const CallInst *CI, const Method *M) { + assert(CI->getParent() && CI->getParent()->getParent() && + "Call not embedded into a method!"); + + // Don't inline a recursive call. + if (CI->getParent()->getParent() == M) return false; + + // Don't inline something too big. This is a really crappy heuristic + if (M->getBasicBlocks().size() > 3) return false; + + // Don't inline into something too big. This is a **really** crappy heuristic + if (CI->getParent()->getParent()->getBasicBlocks().size() > 10) return false; + + // Go ahead and try just about anything else. + return true; +} + + +static inline bool DoMethodInlining(BasicBlock *BB) { + for (BasicBlock::InstListType::iterator I = BB->getInstList().begin(); + I != BB->getInstList().end(); I++) { + if ((*I)->getInstType() == Instruction::Call) { + // Check to see if we should inline this method + CallInst *CI = (CallInst*)*I; + Method *M = CI->getCalledMethod(); + if (ShouldInlineMethod(CI, M)) + return InlineMethod(I); + } + } + return false; +} + +bool DoMethodInlining(Method *M) { + Method::BasicBlocksType &BBs = M->getBasicBlocks(); + bool Changed = false; + + // Loop through now and inline instructions a basic block at a time... + for (Method::BasicBlocksType::iterator I = BBs.begin(); I != BBs.end(); ) + if (DoMethodInlining(*I)) { + Changed = true; + // Iterator is now invalidated by new basic blocks inserted + I = BBs.begin(); + } else { + ++I; + } + + return Changed; +} diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp new file mode 100644 index 00000000000..eef5a039f03 --- /dev/null +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -0,0 +1,239 @@ +//===- ConstantProp.cpp - Code to perform Constant Propogation ------------===// +// +// This file implements constant propogation and merging: +// +// Specifically, this: +// * Folds multiple identical constants in the constant pool together +// Note that if one is named and the other is not, that the result gets the +// original name. +// * Converts instructions like "add int %1, %2" into a direct def of %3 in +// the constant pool +// * Converts conditional branches on a constant boolean value into direct +// branches. +// * Converts phi nodes with one incoming def to the incoming def directly +// . Converts switch statements with one entry into a test & conditional +// branch +// . Converts switches on constant values into an unconditional branch. +// +// Notice that: +// * This pass has a habit of making definitions be dead. It is a good idea +// to to run a DCE pass sometime after running this pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/iTerminators.h" +#include "llvm/iOther.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/ConstantPool.h" +#include "llvm/Opt/AllOpts.h" +#include "llvm/Opt/ConstantHandling.h" + +// Merge identical constant values in the constant pool. +// +// TODO: We can do better than this simplistic N^2 algorithm... +// +static bool MergeConstantPoolReferences(ConstantPool &CP) { + bool Modified = false; + for (ConstantPool::plane_iterator PI = CP.begin(); PI != CP.end(); ++PI) { + for (ConstantPool::PlaneType::iterator I = (*PI)->begin(); + I != (*PI)->end(); I++) { + ConstPoolVal *C = *I; + + ConstantPool::PlaneType::iterator J = I; + for (++J; J != (*PI)->end(); J++) { + if (C->equals(*J)) { + Modified = true; + // Okay we know that *I == *J. So now we need to make all uses of *I + // point to *J. + // + C->replaceAllUsesWith(*J); + + (*PI)->remove(I); // Remove C from constant pool... + + if (C->hasName() && !(*J)->hasName()) // The merged constant inherits + (*J)->setName(C->getName()); // the old name... + + delete C; // Delete the constant itself. + break; // Break out of inner for loop + } + } + } + } + return Modified; +} + +inline static bool +ConstantFoldUnaryInst(Method *M, Method::inst_iterator &DI, + UnaryOperator *Op, ConstPoolVal *D) { + ConstPoolVal *ReplaceWith = 0; + + switch (Op->getInstType()) { + case Instruction::Not: ReplaceWith = !*D; break; + case Instruction::Neg: ReplaceWith = -*D; break; + } + + if (!ReplaceWith) return false; // Nothing new to change... + + + // Add the new value to the constant pool... + M->getConstantPool().insert(ReplaceWith); + + // Replaces all of the uses of a variable with uses of the constant. + Op->replaceAllUsesWith(ReplaceWith); + + // Remove the operator from the list of definitions... + Op->getParent()->getInstList().remove(DI.getInstructionIterator()); + + // The new constant inherits the old name of the operator... + if (Op->hasName()) ReplaceWith->setName(Op->getName()); + + // Delete the operator now... + delete Op; + return true; +} + +inline static bool +ConstantFoldBinaryInst(Method *M, Method::inst_iterator &DI, + BinaryOperator *Op, + ConstPoolVal *D1, ConstPoolVal *D2) { + ConstPoolVal *ReplaceWith = 0; + + switch (Op->getInstType()) { + case Instruction::Add: ReplaceWith = *D1 + *D2; break; + case Instruction::Sub: ReplaceWith = *D1 - *D2; break; + + case Instruction::SetEQ: ReplaceWith = *D1 == *D2; break; + case Instruction::SetNE: ReplaceWith = *D1 != *D2; break; + case Instruction::SetLE: ReplaceWith = *D1 <= *D2; break; + case Instruction::SetGE: ReplaceWith = *D1 >= *D2; break; + case Instruction::SetLT: ReplaceWith = *D1 < *D2; break; + case Instruction::SetGT: ReplaceWith = *D1 > *D2; break; + } + + if (!ReplaceWith) return false; // Nothing new to change... + + // Add the new value to the constant pool... + M->getConstantPool().insert(ReplaceWith); + + // Replaces all of the uses of a variable with uses of the constant. + Op->replaceAllUsesWith(ReplaceWith); + + // Remove the operator from the list of definitions... + Op->getParent()->getInstList().remove(DI.getInstructionIterator()); + + // The new constant inherits the old name of the operator... + if (Op->hasName()) ReplaceWith->setName(Op->getName()); + + // Delete the operator now... + delete Op; + return true; +} + +inline static bool ConstantFoldTerminator(TerminatorInst *T) { + // Branch - See if we are conditional jumping on constant + if (T->getInstType() == Instruction::Br) { + BranchInst *BI = (BranchInst*)T; + if (!BI->isUnconditional() && // Are we branching on constant? + BI->getOperand(2)->getValueType() == Value::ConstantVal) { + // YES. Change to unconditional branch... + ConstPoolBool *Cond = (ConstPoolBool*)BI->getOperand(2); + Value *Destination = BI->getOperand(Cond->getValue() ? 0 : 1); + + BI->setOperand(0, Destination); // Set the unconditional destination + BI->setOperand(1, 0); // Clear the conditional destination + BI->setOperand(2, 0); // Clear the condition... + return true; + } + } + return false; +} + +// ConstantFoldInstruction - If an instruction references constants, try to fold +// them together... +// +inline static bool +ConstantFoldInstruction(Method *M, Method::inst_iterator &II) { + Instruction *Inst = *II; + if (Inst->isBinaryOp()) { + Value *D1, *D2; + if (((D1 = Inst->getOperand(0))->getValueType() == Value::ConstantVal) & + ((D2 = Inst->getOperand(1))->getValueType() == Value::ConstantVal)) + return ConstantFoldBinaryInst(M, II, (BinaryOperator*)Inst, + (ConstPoolVal*)D1, (ConstPoolVal*)D2); + + } else if (Inst->isUnaryOp()) { + Value *D; + if ((D = Inst->getOperand(0))->getValueType() == Value::ConstantVal) + return ConstantFoldUnaryInst(M, II, (UnaryOperator*)Inst, + (ConstPoolVal*)D); + } else if (Inst->isTerminator()) { + return ConstantFoldTerminator((TerminatorInst*)Inst); + + } else if (Inst->getInstType() == Instruction::PHINode) { + PHINode *PN = (PHINode*)Inst; // If it's a PHI node and only has one operand + // Then replace it directly with that operand. + assert(PN->getOperand(0) && "PHI Node must have at least one operand!"); + if (PN->getOperand(1) == 0) { // If the PHI Node has exactly 1 operand + Value *V = PN->getOperand(0); + PN->replaceAllUsesWith(V); // Replace all uses of this PHI + // Unlink from basic block + PN->getParent()->getInstList().remove(II.getInstructionIterator()); + if (PN->hasName()) V->setName(PN->getName()); // Inherit PHINode name + delete PN; // Finally, delete the node... + return true; + } + } + return false; +} + +// DoConstPropPass - Propogate constants and do constant folding on instructions +// this returns true if something was changed, false if nothing was changed. +// +static bool DoConstPropPass(Method *M) { + bool SomethingChanged = false; + +#if 1 + Method::inst_iterator It = M->inst_begin(); + while (It != M->inst_end()) + if (ConstantFoldInstruction(M, It)) { + SomethingChanged = true; // If returned true, iter is already incremented + + // Incrementing the iterator in an unchecked manner could mess up the + // internals of 'It'. To make sure everything is happy, tell it we might + // have broken it. + It.resyncInstructionIterator(); + } else { + ++It; + } +#else + Method::BasicBlocksType::iterator BBIt = M->getBasicBlocks().begin(); + for (; BBIt != M->getBasicBlocks().end(); BBIt++) { + BasicBlock *BB = *BBIt; + + BasicBlock::InstListType::iterator DI = BB->getInstList().begin(); + for (; DI != BB->getInstList().end(); DI++) + SomethingChanged |= ConstantFoldInstruction(M, DI); + } +#endif + return SomethingChanged; +} + + +// returns true on failure, false on success... +// +bool DoConstantPropogation(Method *M) { + bool Modified = false; + + // Fold constants until we make no progress... + while (DoConstPropPass(M)) Modified = true; + + // Merge identical constants last: this is important because we may have just + // introduced constants that already exist! + // + Modified |= MergeConstantPoolReferences(M->getConstantPool()); + + return Modified; +} diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp new file mode 100644 index 00000000000..797edf50544 --- /dev/null +++ b/lib/Transforms/Scalar/DCE.cpp @@ -0,0 +1,193 @@ +//===- DCE.cpp - Code to perform dead code elimination --------------------===// +// +// This file implements dead code elimination and basic block merging. +// +// Specifically, this: +// * removes definitions with no uses (including unused constants) +// * removes basic blocks with no predecessors +// * merges a basic block into its predecessor if there is only one and the +// predecessor only has one successor. +// +// TODO: This should REALLY be recursive instead of iterative. Right now, we +// scan linearly through values, removing unused ones as we go. The problem is +// that this may cause other earlier values to become unused. To make sure that +// we get them all, we iterate until things stop changing. Instead, when +// removing a value, recheck all of its operands to see if they are now unused. +// Piece of cake, and more efficient as well. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/iTerminators.h" +#include "llvm/Opt/AllOpts.h" + +struct ConstPoolDCE { + enum { EndOffs = 0 }; + static bool isDCEable(const Value *) { return true; } +}; + +struct BasicBlockDCE { + enum { EndOffs = 1 }; + static bool isDCEable(const Instruction *I) { + return !I->hasSideEffects(); + } +}; + +template +static bool RemoveUnusedDefs(ValueHolder &Vals, + DCEController DCEControl) { + bool Changed = false; + typedef ValueHolder Container; + + int Offset = DCEController::EndOffs; + for (Container::iterator DI = Vals.begin(); DI != Vals.end()-Offset; ) { + // Look for un"used" definitions... + if ((*DI)->use_empty() && DCEController::isDCEable(*DI)) { + // Bye bye + delete Vals.remove(DI); + Changed = true; + } else { + DI++; + } + } + return Changed; +} + + +bool DoRemoveUnusedConstants(SymTabValue *S) { + bool Changed = false; + ConstantPool &CP = S->getConstantPool(); + for (ConstantPool::plane_iterator PI = CP.begin(); PI != CP.end(); ++PI) + Changed |= RemoveUnusedDefs(**PI, ConstPoolDCE()); + return Changed; +} + + +static void ReplaceUsesWithConstant(Instruction *I) { + // Get the method level constant pool + ConstantPool &CP = I->getParent()->getParent()->getConstantPool(); + + ConstPoolVal *CPV = 0; + ConstantPool::PlaneType *P; + if (!CP.getPlane(I->getType(), P)) { // Does plane exist? + // Yes, is it empty? + if (!P->empty()) CPV = P->front(); + } + + if (CPV == 0) { // We don't have an existing constant to reuse. Just add one. + CPV = ConstPoolVal::getNullConstant(I->getType()); // Create a new constant + + // Add the new value to the constant pool... + CP.insert(CPV); + } + + // Make all users of this instruction reference the constant instead + I->replaceAllUsesWith(CPV); +} + +static bool DoDCEPass(Method *M) { + Method::BasicBlocksType::iterator BBIt; + Method::BasicBlocksType &BBs = M->getBasicBlocks(); + bool Changed = false; + + // Loop through now and remove instructions that have no uses... + for (BBIt = BBs.begin(); BBIt != BBs.end(); BBIt++) + Changed |= RemoveUnusedDefs((*BBIt)->getInstList(), BasicBlockDCE()); + + // Scan through and remove basic blocks that have no predecessors (except, + // of course, the first one. :) (so skip first block) + // + for (BBIt = BBs.begin(), ++BBIt; BBIt != BBs.end(); BBIt++) { + BasicBlock *BB = *BBIt; + assert(BB->getTerminator() && + "Degenerate basic block encountered!"); // Empty bb??? + + if (BB->pred_begin() == BB->pred_end() && + !BB->hasConstantPoolReferences()) { + + while (!BB->getInstList().empty()) { + Instruction *I = BB->getInstList().front(); + // If this instruction is used, replace uses with an arbitrary + // constant value. Because control flow can't get here, we don't care + // what we replace the value with. + if (!I->use_empty()) ReplaceUsesWithConstant(I); + + // Remove the instruction from the basic block + BasicBlock::InstListType::iterator f = BB->getInstList().begin(); + delete BB->getInstList().remove(f); + } + + delete BBs.remove(BBIt); + ++BBIt; // remove puts use on the previous block, we want the next one + Changed = true; + } + } + + // Loop through an merge basic blocks into their predecessor if there is only + // one, and if there is only one successor of the predecessor. + // + for (BBIt = BBs.begin(); BBIt != BBs.end(); BBIt++) { + BasicBlock *BB = *BBIt; + + // Is there exactly one predecessor to this block? + BasicBlock::pred_iterator PI(BB->pred_begin()); + if (PI != BB->pred_end() && ++PI == BB->pred_end() && + !BB->hasConstantPoolReferences()) { + BasicBlock *Pred = *BB->pred_begin(); + TerminatorInst *Term = Pred->getTerminator(); + if (Term == 0) continue; // Err... malformed basic block! + + // Is it an unconditional branch? + if (Term->getInstType() != Instruction::Br || + !((BranchInst*)Term)->isUnconditional()) + continue; // Nope, maybe next time... + + Changed = true; + + // Make all branches to the predecessor now point to the successor... + Pred->replaceAllUsesWith(BB); + + // Move all definitions in the predecessor to the successor... + BasicBlock::InstListType::iterator DI = Pred->getInstList().end(); + assert(Pred->getTerminator() && + "Degenerate basic block encountered!"); // Empty bb??? + delete Pred->getInstList().remove(--DI); // Remove terminator + + while (Pred->getInstList().begin() != (DI = Pred->getInstList().end())) { + Instruction *Def = Pred->getInstList().remove(--DI); // Remove from end + BB->getInstList().push_front(Def); // Add to front... + } + + // Remove basic block from the method... + BBs.remove(Pred); + + // Always inherit predecessors name if it exists... + if (Pred->hasName()) BB->setName(Pred->getName()); + + // So long you waste of a basic block you... + delete Pred; + } + } + + // Remove unused constants + Changed |= DoRemoveUnusedConstants(M); + return Changed; +} + + +// It is possible that we may require multiple passes over the code to fully +// eliminate dead code. Iterate until we are done. +// +bool DoDeadCodeElimination(Method *M) { + bool Changed = false; + while (DoDCEPass(M)) Changed = true; + return Changed; +} + +bool DoDeadCodeElimination(Module *C) { + bool Val = ApplyOptToAllMethods(C, DoDeadCodeElimination); + while (DoRemoveUnusedConstants(C)) Val = true; + return Val; +} diff --git a/lib/Transforms/Scalar/SymbolStripping.cpp b/lib/Transforms/Scalar/SymbolStripping.cpp new file mode 100644 index 00000000000..af5f18f305b --- /dev/null +++ b/lib/Transforms/Scalar/SymbolStripping.cpp @@ -0,0 +1,55 @@ +//===- SymbolStripping.cpp - Code to string symbols for methods and modules -=// +// +// This file implements stripping symbols out of symbol tables. +// +// Specifically, this allows you to strip all of the symbols out of: +// * A method +// * All methods in a module +// * All symbols in a module (all method symbols + all module scope symbols) +// +// Notice that: +// * This pass makes code much less readable, so it should only be used in +// situations where the 'strip' utility would be used (such as reducing +// code size, and making it harder to reverse engineer code). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/SymbolTable.h" +#include "llvm/Opt/AllOpts.h" + +static bool StripSymbolTable(SymbolTable *SymTab) { + if (SymTab == 0) return false; // No symbol table? No problem. + bool RemovedSymbol = false; + + for (SymbolTable::iterator I = SymTab->begin(); I != SymTab->end(); I++) { + map &Plane = I->second; + + map::iterator B; + while ((B = Plane.begin()) != Plane.end()) { // Found nonempty type plane! + B->second->setName(""); // Set name to "", removing from symbol table! + RemovedSymbol = true; + assert(Plane.begin() != B); + } + } + + return RemovedSymbol; +} + + +// DoSymbolStripping - Remove all symbolic information from a method +// +bool DoSymbolStripping(Method *M) { + return StripSymbolTable(M->getSymbolTable()); +} + +// DoFullSymbolStripping - Remove all symbolic information from all methods +// in a module, and all module level symbols. (method names, etc...) +// +bool DoFullSymbolStripping(Module *M) { + // Remove all symbols from methods in this module... and then strip all of the + // symbols in this module... + // + return DoSymbolStripping(M) | StripSymbolTable(M->getSymbolTable()); +} diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp new file mode 100644 index 00000000000..e23403b0d0d --- /dev/null +++ b/lib/VMCore/AsmWriter.cpp @@ -0,0 +1,328 @@ +//===-- Writer.cpp - Library for Printing VM assembly files ------*- C++ -*--=// +// +// This library implements the functionality defined in llvm/Assembly/Writer.h +// +// This library uses the Analysis library to figure out offsets for +// variables in the method tables... +// +// TODO: print out the type name instead of the full type if a particular type +// is in the symbol table... +// +//===----------------------------------------------------------------------===// + +#include "llvm/Assembly/Writer.h" +#include "llvm/Analysis/SlotCalculator.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "llvm/iMemory.h" + +class AssemblyWriter : public ModuleAnalyzer { + ostream &Out; + SlotCalculator &Table; +public: + inline AssemblyWriter(ostream &o, SlotCalculator &Tab) : Out(o), Table(Tab) { + } + + inline void write(const Module *M) { processModule(M); } + inline void write(const Method *M) { processMethod(M); } + inline void write(const BasicBlock *BB) { processBasicBlock(BB); } + inline void write(const Instruction *I) { processInstruction(I); } + inline void write(const ConstPoolVal *CPV) { processConstant(CPV); } + +protected: + virtual bool visitMethod(const Method *M); + virtual bool processConstPool(const ConstantPool &CP, bool isMethod); + virtual bool processConstant(const ConstPoolVal *CPV); + virtual bool processMethod(const Method *M); + virtual bool processMethodArgument(const MethodArgument *MA); + virtual bool processBasicBlock(const BasicBlock *BB); + virtual bool processInstruction(const Instruction *I); + +private : + void writeOperand(const Value *Op, bool PrintType, bool PrintName = true); +}; + + + +// visitMethod - This member is called after the above two steps, visting each +// method, because they are effectively values that go into the constant pool. +// +bool AssemblyWriter::visitMethod(const Method *M) { + return false; +} + +bool AssemblyWriter::processConstPool(const ConstantPool &CP, bool isMethod) { + // Done printing arguments... + if (isMethod) Out << ")\n"; + + ModuleAnalyzer::processConstPool(CP, isMethod); + + if (isMethod) + Out << "begin"; + else + Out << "implementation\n"; + return false; +} + + +// processConstant - Print out a constant pool entry... +// +bool AssemblyWriter::processConstant(const ConstPoolVal *CPV) { + Out << "\t"; + + // Print out name if it exists... + if (CPV->hasName()) + Out << "%" << CPV->getName() << " = "; + + // Print out the opcode... + Out << CPV->getType(); + + // Write the value out now... + writeOperand(CPV, false, false); + + if (!CPV->hasName() && CPV->getType() != Type::VoidTy) { + int Slot = Table.getValSlot(CPV); // Print out the def slot taken... + Out << "\t\t; <" << CPV->getType() << ">:"; + if (Slot >= 0) Out << Slot; + else Out << ""; + } + + Out << endl; + return false; +} + +// processMethod - Process all aspects of a method. +// +bool AssemblyWriter::processMethod(const Method *M) { + // Print out the return type and name... + Out << "\n" << M->getReturnType() << " \"" << M->getName() << "\"("; + Table.incorporateMethod(M); + ModuleAnalyzer::processMethod(M); + Table.purgeMethod(); + Out << "end\n"; + return false; +} + +// processMethodArgument - This member is called for every argument that +// is passed into the method. Simply print it out +// +bool AssemblyWriter::processMethodArgument(const MethodArgument *Arg) { + // Insert commas as we go... the first arg doesn't get a comma + if (Arg != Arg->getParent()->getArgumentList().front()) Out << ", "; + + // Output type... + Out << Arg->getType(); + + // Output name, if available... + if (Arg->hasName()) + Out << " %" << Arg->getName(); + else if (Table.getValSlot(Arg) < 0) + Out << ""; + + return false; +} + +// processBasicBlock - This member is called for each basic block in a methd. +// +bool AssemblyWriter::processBasicBlock(const BasicBlock *BB) { + if (BB->hasName()) { // Print out the label if it exists... + Out << "\n" << BB->getName() << ":\n"; + } else { + int Slot = Table.getValSlot(BB); + Out << "\t\t\t\t;