MuckyFoot-UrbanChaos/MuckyBasic/link.cpp
2017-05-20 11:14:17 +10:00

976 lines
21 KiB
C++

//
// The linker
//
#include "always.h"
#include "ml.h"
#include "link.h"
#include "sysvar.h"
#include "st.h"
//
// The data we accumulate as we link.
//
SLONG *LINK_instruction;
SLONG LINK_instruction_max;
SLONG LINK_instruction_upto;
CBYTE *LINK_table_data;
SLONG LINK_table_data_max;
SLONG LINK_table_data_upto;
LINK_Global *LINK_global;
SLONG LINK_global_max;
SLONG LINK_global_upto;
LINK_Function *LINK_function;
SLONG LINK_function_max;
SLONG LINK_function_upto;
LINK_Line *LINK_line;
SLONG LINK_line_max;
SLONG LINK_line_upto;
LINK_Jump *LINK_jump;
SLONG LINK_jump_max;
SLONG LINK_jump_upto;
LINK_Field *LINK_field;
SLONG LINK_field_max;
SLONG LINK_field_upto;
LINK_Globalref *LINK_global_ref;
SLONG LINK_global_ref_max;
SLONG LINK_global_ref_upto;
LINK_Undefref *LINK_undef_ref;
SLONG LINK_undef_ref_max;
SLONG LINK_undef_ref_upto;
LINK_Fieldref *LINK_field_ref;
SLONG LINK_field_ref_max;
SLONG LINK_field_ref_upto;
LINK_Datatableref *LINK_data_table_ref;
SLONG LINK_data_table_ref_max;
SLONG LINK_data_table_ref_upto;
CBYTE *LINK_debug_data;
SLONG LINK_debug_data_max;
SLONG LINK_debug_data_upto;
//
// Each file.
//
typedef struct
{
SLONG first_instruction;
SLONG num_instructions;
SLONG first_table_datum;
SLONG num_table_data;
SLONG first_global;
SLONG num_globals;
SLONG first_function;
SLONG num_functions;
SLONG first_line;
SLONG num_lines;
SLONG first_jump;
SLONG num_jumps;
SLONG first_field;
SLONG num_fields;
SLONG first_global_ref;
SLONG num_global_refs;
SLONG first_undef_ref;
SLONG num_undef_refs;
SLONG first_field_ref;
SLONG num_field_refs;
SLONG first_data_table_ref;
SLONG num_data_table_refs;
SLONG first_debug_datum;
SLONG num_debug_data;
} LINK_File;
LINK_File *LINK_file;
SLONG LINK_file_max;
SLONG LINK_file_upto;
//
// The number of globals and fields in the executable.
//
SLONG LINK_global_id_upto;
SLONG LINK_field_id_upto;
//
// Allocates initial memory.
//
void LINK_allocate_memory(void)
{
LINK_instruction_max = 1;
LINK_instruction_upto = 0;
LINK_instruction = (SLONG *) malloc(sizeof(SLONG) * LINK_instruction_max);
LINK_table_data_max = 1;
LINK_table_data_upto = 0;
LINK_table_data = (CBYTE *) malloc(sizeof(CBYTE) * LINK_table_data_max);
LINK_global_max = 1;
LINK_global_upto = 0;
LINK_global = (LINK_Global *) malloc(sizeof(LINK_Global) * LINK_global_max);
LINK_function_max = 1;
LINK_function_upto = 0;
LINK_function = (LINK_Function *) malloc(sizeof(LINK_Function) * LINK_function_max);
LINK_line_max = 1;
LINK_line_upto = 0;
LINK_line = (LINK_Line *) malloc(sizeof(LINK_Line) * LINK_line_max);
LINK_jump_max = 1;
LINK_jump_upto = 0;
LINK_jump = (LINK_Jump *) malloc(sizeof(LINK_Jump) * LINK_jump_max);
LINK_field_max = 1;
LINK_field_upto = 0;
LINK_field = (LINK_Field *) malloc(sizeof(LINK_Field) * LINK_field_max);
LINK_global_ref_max = 1;
LINK_global_ref_upto = 0;
LINK_global_ref = (LINK_Globalref *) malloc(sizeof(LINK_Globalref) * LINK_global_ref_max);
LINK_undef_ref_max = 1;
LINK_undef_ref_upto = 0;
LINK_undef_ref = (LINK_Undefref *) malloc(sizeof(LINK_Undefref) * LINK_undef_ref_max);
LINK_field_ref_max = 1;
LINK_field_ref_upto = 0;
LINK_field_ref = (LINK_Fieldref *) malloc(sizeof(LINK_Fieldref) * LINK_field_ref_max);
LINK_data_table_ref_max = 1;
LINK_data_table_ref_upto = 0;
LINK_data_table_ref = (LINK_Datatableref *) malloc(sizeof(LINK_Datatableref) * LINK_data_table_ref_max);
LINK_debug_data_max = 1;
LINK_debug_data_upto = 0;
LINK_debug_data = (CBYTE *) malloc(sizeof(CBYTE) * LINK_debug_data_max);
LINK_file_max = 1;
LINK_file_upto = 0;
LINK_file = (LINK_File *) malloc(sizeof(LINK_File) * LINK_file_max);
}
//
// Frees all memory.
//
void LINK_free_memory(void)
{
free(LINK_instruction );
free(LINK_table_data );
free(LINK_global );
free(LINK_function );
free(LINK_line );
free(LINK_jump );
free(LINK_field );
free(LINK_global_ref );
free(LINK_undef_ref );
free(LINK_field_ref );
free(LINK_data_table_ref);
free(LINK_debug_data );
free(LINK_file );
LINK_instruction = NULL;
LINK_table_data = NULL;
LINK_global = NULL;
LINK_function = NULL;
LINK_line = NULL;
LINK_jump = NULL;
LINK_field = NULL;
LINK_global_ref = NULL;
LINK_undef_ref = NULL;
LINK_field_ref = NULL;
LINK_data_table_ref = NULL;
LINK_debug_data = NULL;
LINK_file = NULL;
}
SLONG LINK_do(CBYTE *object_fname[], SLONG num_object_files, CBYTE *exec_fname)
{
SLONG i;
SLONG j;
SLONG instruction;
SLONG magic;
LINK_Header lh;
LINK_File *lf;
LINK_Function *lc;
LINK_Jump *lj;
LINK_Global *lg;
LINK_Field *ld;
LINK_Undefref *lu;
LINK_Datatableref *lt;
FILE *handle;
#define LINK_MAGIC_CHECK() {if (fread(&magic, sizeof(SLONG), 1, handle) != 1) goto file_error; ASSERT(magic == 12345678);}
//
// Initialise all data.
//
LINK_allocate_memory();
//
// Load in each file.
//
for (i = 0; i < num_object_files; i++)
{
//
// Open the file.
//
handle = fopen(object_fname[i], "rb");
if (handle == NULL)
{
//
// ERROR! Can't open object file...
//
goto file_error;
}
//
// Load in the header.
//
if (fread(&lh, sizeof(LINK_Header), 1, handle) != 1) goto file_error;
//
// Correct version number?
//
if (lh.version != 1)
{
//
// ERROR!
//
}
//
// Get another LINK_File.
//
if (LINK_file_upto >= LINK_file_max)
{
LINK_file_max *= 2;
LINK_file = (LINK_File *) realloc(LINK_file, sizeof(LINK_File) * LINK_file_max);
}
lf = &LINK_file[LINK_file_upto++];
//
// Initialise.
//
memset(lf, 0, sizeof(LINK_File));
//
// Instructions.
//
lf->first_instruction = LINK_instruction_upto;
lf->num_instructions = lh.num_instructions;
while(LINK_instruction_upto + lh.num_instructions > LINK_instruction_max)
{
LINK_instruction_max *= 2;
LINK_instruction = (SLONG *) realloc(LINK_instruction, sizeof(SLONG) * LINK_instruction_max);
}
if (fread(LINK_instruction + LINK_instruction_upto, sizeof(SLONG), lh.num_instructions, handle) != lh.num_instructions) goto file_error;
LINK_instruction_upto += lh.num_instructions;
LINK_MAGIC_CHECK();
//
// The data table.
//
lf->first_table_datum = LINK_table_data_upto;
lf->num_table_data = lh.data_table_length_in_bytes;
while(LINK_table_data_upto + lh.data_table_length_in_bytes > LINK_table_data_max)
{
LINK_table_data_max *= 2;
LINK_table_data = (CBYTE *) realloc(LINK_table_data, sizeof(CBYTE) * LINK_table_data_max);
}
if (fread(LINK_table_data + LINK_table_data_upto, sizeof(CBYTE), lh.data_table_length_in_bytes, handle) != lh.data_table_length_in_bytes) goto file_error;
LINK_table_data_upto += lh.data_table_length_in_bytes;
LINK_MAGIC_CHECK();
//
// Globals.
//
lf->first_global = LINK_global_upto;
lf->num_globals = lh.num_globals;
while(LINK_global_upto + lh.num_globals > LINK_global_max)
{
LINK_global_max *= 2;
LINK_global = (LINK_Global *) realloc(LINK_global, sizeof(LINK_Global) * LINK_global_max);
}
if (fread(LINK_global + LINK_global_upto, sizeof(LINK_Global), lh.num_globals, handle) != lh.num_globals) goto file_error;
LINK_global_upto += lh.num_globals;
LINK_MAGIC_CHECK();
//
// Functions.
//
lf->first_function = LINK_function_upto;
lf->num_functions = lh.num_functions;
while(LINK_function_upto + lh.num_functions > LINK_function_max)
{
LINK_function_max *= 2;
LINK_function = (LINK_Function *) realloc(LINK_function, sizeof(LINK_Function) * LINK_function_max);
}
if (fread(LINK_function + LINK_function_upto, sizeof(LINK_Function), lh.num_functions, handle) != lh.num_functions) goto file_error;
LINK_function_upto += lh.num_functions;
LINK_MAGIC_CHECK();
//
// Lines.
//
lf->first_line = LINK_line_upto;
lf->num_lines = lh.num_lines;
while(LINK_line_upto + lh.num_lines > LINK_line_max)
{
LINK_line_max *= 2;
LINK_line = (LINK_Line *) realloc(LINK_line, sizeof(LINK_Line) * LINK_line_max);
}
if (fread(LINK_line + LINK_line_upto, sizeof(LINK_Line), lh.num_lines, handle) != lh.num_lines) goto file_error;
LINK_line_upto += lh.num_lines;
LINK_MAGIC_CHECK();
//
// Jumps.
//
lf->first_jump = LINK_jump_upto;
lf->num_jumps = lh.num_jumps;
while(LINK_jump_upto + lh.num_jumps > LINK_jump_max)
{
LINK_jump_max *= 2;
LINK_jump = (LINK_Jump *) realloc(LINK_jump, sizeof(LINK_Jump) * LINK_jump_max);
}
if (fread(LINK_jump + LINK_jump_upto, sizeof(LINK_Jump), lh.num_jumps, handle) != lh.num_jumps) goto file_error;
LINK_jump_upto += lh.num_jumps;
LINK_MAGIC_CHECK();
//
// Fields.
//
lf->first_field = LINK_field_upto;
lf->num_fields = lh.num_fields;
while(LINK_field_upto + lh.num_fields > LINK_field_max)
{
LINK_field_max *= 2;
LINK_field = (LINK_Field *) realloc(LINK_field, sizeof(LINK_Field) * LINK_field_max);
}
if (fread(LINK_field + LINK_field_upto, sizeof(LINK_Field), lh.num_fields, handle) != lh.num_fields) goto file_error;
LINK_field_upto += lh.num_fields;
LINK_MAGIC_CHECK();
//
// Globalrefs.
//
lf->first_global_ref = LINK_global_ref_upto;
lf->num_global_refs = lh.num_global_refs;
while(LINK_global_ref_upto + lh.num_global_refs > LINK_global_ref_max)
{
LINK_global_ref_max *= 2;
LINK_global_ref = (LINK_Globalref *) realloc(LINK_global_ref, sizeof(LINK_Globalref) * LINK_global_ref_max);
}
if (fread(LINK_global_ref + LINK_global_ref_upto, sizeof(LINK_Globalref), lh.num_global_refs, handle) != lh.num_global_refs) goto file_error;
LINK_global_ref_upto += lh.num_global_refs;
LINK_MAGIC_CHECK();
//
// Undefined function references.
//
lf->first_undef_ref = LINK_undef_ref_upto;
lf->num_undef_refs = lh.num_undef_refs;
while(LINK_undef_ref_upto + lh.num_undef_refs > LINK_undef_ref_max)
{
LINK_undef_ref_max *= 2;
LINK_undef_ref = (LINK_Undefref *) realloc(LINK_undef_ref, sizeof(LINK_Undefref) * LINK_undef_ref_max);
}
if (fread(LINK_undef_ref + LINK_undef_ref_upto, sizeof(LINK_Undefref), lh.num_undef_refs, handle) != lh.num_undef_refs) goto file_error;
LINK_undef_ref_upto += lh.num_undef_refs;
LINK_MAGIC_CHECK();
//
// Fieldrefs.
//
lf->first_field_ref = LINK_field_ref_upto;
lf->num_field_refs = lh.num_field_refs;
while(LINK_field_ref_upto + lh.num_field_refs > LINK_field_ref_max)
{
LINK_field_ref_max *= 2;
LINK_field_ref = (LINK_Fieldref *) realloc(LINK_field_ref, sizeof(LINK_Fieldref) * LINK_field_ref_max);
}
if (fread(LINK_field_ref + LINK_field_ref_upto, sizeof(LINK_Fieldref), lh.num_field_refs, handle) != lh.num_field_refs) goto file_error;
LINK_field_ref_upto += lh.num_field_refs;
LINK_MAGIC_CHECK();
//
// Data table refs.
//
lf->first_data_table_ref = LINK_data_table_ref_upto;
lf->num_data_table_refs = lh.num_data_table_refs;
while(LINK_data_table_ref_upto + lh.num_data_table_refs > LINK_data_table_ref_max)
{
LINK_data_table_ref_max *= 2;
LINK_data_table_ref = (LINK_Datatableref *) realloc(LINK_data_table_ref, sizeof(LINK_Datatableref) * LINK_data_table_ref_max);
}
if (fread(LINK_data_table_ref + LINK_data_table_ref_upto, sizeof(LINK_Datatableref), lh.num_data_table_refs, handle) != lh.num_data_table_refs) goto file_error;
LINK_data_table_ref_upto += lh.num_data_table_refs;
LINK_MAGIC_CHECK();
//
// Load in the debug data.
//
lf->first_debug_datum = LINK_debug_data_upto;
if (fread(&lf->num_debug_data, sizeof(SLONG), 1, handle) != 1) goto file_error;
while(LINK_debug_data_upto + lf->num_debug_data > LINK_debug_data_max)
{
LINK_debug_data_max *= 2;
LINK_debug_data = (CBYTE *) realloc(LINK_debug_data, sizeof(CBYTE) * LINK_debug_data_max);
}
if (fread(LINK_debug_data + LINK_debug_data_upto, sizeof(CBYTE), lf->num_debug_data, handle) != lf->num_debug_data) goto file_error;
LINK_debug_data_upto += lf->num_debug_data;
//
// Finished. We should have read the entire file.
//
#ifdef _DEBUG
//
// Make sure there is no file left!
//
{
UBYTE junk[1024];
SLONG read;
read = fread(junk, 1, 1024, handle);
ASSERT(read == 0);
}
#endif
//
// Finished reading the file.
//
fclose(handle);
}
//
// Add all exported globals, fields and exported functions
// to the symbol table. This helps us allocate global_ids
// and field_ids and resolve calls to functions across files.
//
ST_clear_all();
SYSVAR_init();
LINK_global_id_upto = 0;
LINK_field_id_upto = SYSVAR_FIELD_NUMBER;
for (i = 0; i < LINK_file_upto; i++)
{
lf = &LINK_file[i];
//
// Overwrite each global's index field with the new global_id.
//
for (j = 0; j < lf->num_globals; j++)
{
lg = &LINK_global[lf->first_global + j];
if (lg->export)
{
ASSERT(WITHIN(lg->name + lf->first_debug_datum, 0, LINK_debug_data_upto - 1));
//
// Is this global already in the symbol table?
//
if (ST_find(LINK_debug_data + lg->name + lf->first_debug_datum))
{
//
// ERROR! Multiply defined exported global.
//
ASSERT(0);
}
else
{
//
// The global wasn't found. Allocate a new global_id and add
// this global to the symbol table.
//
lg->index = LINK_global_id_upto++;
ST_add(ST_TABLE_GLOBAL, LINK_debug_data + lg->name + lf->first_debug_datum, lg->index, 0);
}
}
else
{
ASSERT(WITHIN(lg->name + lf->first_debug_datum, 0, LINK_debug_data_upto - 1));
if (lg->local)
{
//
// This global is local for sure, so it needs it's own ID.
//
lg->index = LINK_global_id_upto++;
}
else
{
//
// Is this global already in the symbol table?
//
if (ST_find(LINK_debug_data + lg->name + lf->first_debug_datum))
{
//
// Use the same global id.
//
lg->index = ST_found_value;
}
else
{
//
// Allocate a new global_id.
//
lg->index = LINK_global_id_upto++;
}
}
}
}
//
// Overwrite each field's index field with the new field_id.
//
for (j = 0; j < lf->num_fields; j++)
{
ld = &LINK_field[lf->first_field + j];
ASSERT(WITHIN(ld->name + lf->first_debug_datum, 0, LINK_debug_data_upto - 1));
//
// Is this field already in the symbol table?
//
if (ST_find(LINK_debug_data + ld->name + lf->first_debug_datum))
{
//
// Use the same field_id.
//
ld->index = ST_found_value;
}
else
{
//
// Allocate a new field_id and add it to the symbol table.
//
ld->index = LINK_field_id_upto++;
ST_add(ST_TABLE_GLOBAL, LINK_debug_data + ld->name + lf->first_debug_datum, ld->index, 0);
}
}
//
// Add all the exported functions to the symbol table.
//
for (j = 0; j < lf->num_functions; j++)
{
lc = &LINK_function[lf->first_function + j];
if (lc->export)
{
ASSERT(WITHIN(lc->name + lf->first_debug_datum, 0, LINK_debug_data_upto - 1));
//
// The value associated with the function is the first instruction
// of the function body.
//
if (ST_find(LINK_debug_data + lc->name + lf->first_debug_datum))
{
//
// ERROR! Multiply defined function!
//
ASSERT(0);
}
else
{
//
// What is the first instruction of the function body?
//
ASSERT(WITHIN(lc->line_start, 0, lf->num_lines - 1));
ASSERT(WITHIN(lc->line_start + lf->first_line, 0, LINK_line_upto - 1));
instruction = LINK_line[lf->first_line + lc->line_start].instruction;
ASSERT(WITHIN(instruction, 0, lf->num_instructions - 1));
instruction += lf->first_instruction;
ASSERT(WITHIN(instruction, 0, LINK_instruction_upto - 1));
//
// The first two instructions of a function are a GOTO to the
// end of the funciton. This is so that execution will jump
// over the function instead of leaking into it accidentally.
//
instruction += 2;
//
// Add the function to the symbol table.
//
ST_add(
ST_TABLE_GLOBAL,
LINK_debug_data + lc->name + lf->first_debug_datum,
instruction,
0);
}
}
}
}
//
// Do the actual linking.
//
for (i = 0; i < LINK_file_upto; i++)
{
lf = &LINK_file[i];
//
// Fix instructions that refer to globals.
//
for (j = 0; j < lf->num_global_refs; j++)
{
ASSERT(WITHIN(lf->first_global_ref + j, 0, LINK_global_ref_upto - 1));
instruction = LINK_global_ref[lf->first_global_ref + j].instruction;
instruction += lf->first_instruction;
ASSERT(WITHIN(instruction, 0, LINK_instruction_upto - 1));
//
// We have the instruction that contains the old global_id.
// Overwrite it with the new one.
//
ASSERT(WITHIN(LINK_instruction[instruction], 0, lf->num_globals - 1));
LINK_instruction[instruction] = LINK_global[lf->first_global + LINK_instruction[instruction]].index;
}
//
// Fix instructions that refer to fields.
//
for (j = 0; j < lf->num_field_refs; j++)
{
ASSERT(WITHIN(lf->first_field_ref + j, 0, LINK_field_ref_upto - 1));
instruction = LINK_field_ref[lf->first_field_ref + j].instruction;
instruction += lf->first_instruction;
ASSERT(WITHIN(instruction, 0, LINK_instruction_upto - 1));
//
// We have the instruction that contains the old field_id.
// Overwrite it with the new one.
//
ASSERT(WITHIN(LINK_instruction[instruction], 0, lf->num_fields - 1));
LINK_instruction[instruction] = LINK_field[lf->first_field + LINK_instruction[instruction]].index;
}
//
// Fix calls to functions across files.
//
for (j = 0; j < lf->num_undef_refs; j++)
{
ASSERT(WITHIN(lf->first_undef_ref + j, 0, LINK_undef_ref_upto - 1));
lu = &LINK_undef_ref[lf->first_undef_ref + j];
//
// Look for this undefined function in the symbol table.
//
ASSERT(WITHIN(lu->name + lf->first_debug_datum, 0, LINK_debug_data_upto - 1));
if (!ST_find(LINK_debug_data + lu->name + lf->first_debug_datum))
{
//
// ERROR! Undefined function.
//
ASSERT(0);
}
else
{
//
// Write the address of the function into the instructions.
//
ASSERT(WITHIN(lu->instruction, 0, lf->num_instructions - 1));
ASSERT(WITHIN(lf->first_instruction + lu->instruction, 0, LINK_instruction_upto - 1));
ASSERT(WITHIN(ST_found_value, 0, LINK_instruction_upto - 1));
LINK_instruction[lf->first_instruction + lu->instruction] = ST_found_value;
}
}
//
// Fix jumps.
//
for (j = 0; j < lf->num_jumps; j++)
{
ASSERT(WITHIN(lf->first_jump + j, 0, LINK_jump_upto - 1));
lj = &LINK_jump[lf->first_jump + j];
ASSERT(WITHIN(lj->instruction, 0, lf->num_instructions - 1));
ASSERT(WITHIN(lj->instruction + lf->first_instruction, 0, LINK_instruction_upto - 1));
LINK_instruction[lf->first_instruction + lj->instruction] += lf->first_instruction;
}
//
// Fix references into the data table.
//
for (j = 0; j < lf->num_data_table_refs; j++)
{
ASSERT(WITHIN(lf->first_data_table_ref + j, 0, LINK_data_table_ref_upto - 1));
lt = &LINK_data_table_ref[lf->first_data_table_ref + j];
ASSERT(WITHIN(lt->instruction, 0, lf->num_instructions - 1));
ASSERT(WITHIN(lt->instruction + lf->first_instruction, 0, LINK_instruction_upto - 1));
ASSERT(WITHIN(LINK_instruction[lt->instruction + lf->first_instruction], 0, lf->num_table_data - 1));
LINK_instruction[lt->instruction + lf->first_instruction] += lf->first_table_datum;
ASSERT(WITHIN(LINK_instruction[lt->instruction + lf->first_instruction], 0, LINK_table_data_upto - 1));
}
//
// The last instruction of any file is an EXIT. Replace all these
// EXIT instrucitons with NOPs except for the last one.
//
if (i == LINK_file_upto - 1)
{
//
// This is the last file.
//
}
else
{
ASSERT(WITHIN(lf->first_instruction + lf->num_instructions - 1, 0, LINK_instruction_upto - 1));
ASSERT(LINK_instruction[lf->first_instruction + lf->num_instructions - 1] == ML_DO_EXIT);
LINK_instruction[lf->first_instruction + lf->num_instructions - 1] = ML_DO_NOP;
}
}
//
// Ready to write out the executable now. Create the
// header of an executable.
//
ML_Header mh;
mh.version = ML_VERSION_NUMBER;
mh.instructions_memory_in_bytes = LINK_instruction_upto * sizeof(SLONG);
mh.data_table_length_in_bytes = LINK_table_data_upto;
mh.num_globals = LINK_global_upto;
//
// Open the executable file.
//
handle = fopen(exec_fname, "wb");
if (handle == NULL)
{
//
// ERROR!
//
goto file_error;
}
if (fwrite(&mh, sizeof(mh), 1, handle) != 1) goto file_error;
//
// The instructions.
//
if (fwrite(LINK_instruction, sizeof(SLONG), LINK_instruction_upto, handle) != LINK_instruction_upto) goto file_error;
//
// The string table.
//
if (fwrite(LINK_table_data, sizeof(UBYTE), LINK_table_data_upto, handle) != LINK_table_data_upto) goto file_error;
fclose(handle);
//
// All done.
//
LINK_free_memory();
return TRUE;
file_error:;
//
// ERROR!
//
if (handle)
{
fclose(handle);
}
LINK_free_memory();
return FALSE;
}