Changeset - 41b1cad0b9fe
[Not reviewed]
0 2 1
MH - 3 years ago 2022-02-11 16:01:46
contact@maxhenger.nl
Integrated rewrite pass into compiler
3 files changed with 83 insertions and 3 deletions:
0 comments (0 inline, 0 general)
src/protocol/parser/mod.rs
Show inline comments
 
pub(crate) mod symbol_table;
 
pub(crate) mod type_table;
 
pub(crate) mod tokens;
 
pub(crate) mod token_parsing;
 
pub(crate) mod pass_tokenizer;
 
pub(crate) mod pass_symbols;
 
pub(crate) mod pass_imports;
 
pub(crate) mod pass_definitions;
 
pub(crate) mod pass_definitions_types;
 
pub(crate) mod pass_validation_linking;
 
pub(crate) mod pass_rewriting;
 
pub(crate) mod pass_typing;
 
pub(crate) mod pass_stack_size;
 
mod visitor;
 

	
 
use tokens::*;
 
use crate::collections::*;
 
use visitor::Visitor;
 
use pass_tokenizer::PassTokenizer;
 
use pass_symbols::PassSymbols;
 
use pass_imports::PassImport;
 
use pass_definitions::PassDefinitions;
 
use pass_validation_linking::PassValidationLinking;
 
use pass_typing::{PassTyping, ResolveQueue};
 
use pass_rewriting::PassRewriting;
 
use pass_stack_size::PassStackSize;
 
use symbol_table::*;
 
use type_table::TypeTable;
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::*;
 

	
 
use crate::protocol::ast_printer::ASTWriter;
 

	
 
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 
pub enum ModuleCompilationPhase {
 
    Tokenized,              // source is tokenized
 
    SymbolsScanned,         // all definitions are linked to their type class
 
    ImportsResolved,        // all imports are added to the symbol table
 
    DefinitionsParsed,      // produced the AST for the entire module
 
    TypesAddedToTable,      // added all definitions to the type table
 
    ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
 
    Typed,                  // Type inference and checking has been performed
 
    Rewritten,              // Special AST nodes are rewritten into regular AST nodes
 
    // When we continue with the compiler:
 
    // Typed,                  // Type inference and checking has been performed
 
    // StackSize
 
}
 

	
 
pub struct Module {
 
    // Buffers
 
    pub source: InputSource,
 
    pub tokens: TokenBuffer,
 
    // Identifiers
 
    pub root_id: RootId,
 
    pub name: Option<(PragmaId, StringRef<'static>)>,
 
    pub version: Option<(PragmaId, i64)>,
 
    pub phase: ModuleCompilationPhase,
 
}
 

	
 
// TODO: This is kind of wrong. Because when we're producing bytecode we would
 
//       like the bytecode itself to not have the notion of the size of a pointer
 
//       type. But until I figure out what we do want I'll just set everything
 
//       to a 64-bit architecture.
 
pub struct TargetArch {
 
    pub array_size_alignment: (usize, usize),
 
    pub slice_size_alignment: (usize, usize),
 
    pub string_size_alignment: (usize, usize),
 
    pub port_size_alignment: (usize, usize),
 
    pub pointer_size_alignment: (usize, usize),
 
}
 

	
 
pub struct PassCtx<'a> {
 
    heap: &'a mut Heap,
 
    symbols: &'a mut SymbolTable,
 
    pool: &'a mut StringPool,
 
    arch: &'a TargetArch,
 
}
 

	
 
pub struct Parser {
 
    // Storage of all information created/gathered during compilation.
 
    pub(crate) heap: Heap,
 
    pub(crate) string_pool: StringPool, // Do not deallocate, holds all strings
 
    pub(crate) modules: Vec<Module>,
 
    pub(crate) symbol_table: SymbolTable,
 
    pub(crate) type_table: TypeTable,
 
    // Compiler passes, used as little state machine that keep their memory
 
    // around.
 
    pass_tokenizer: PassTokenizer,
 
    pass_symbols: PassSymbols,
 
    pass_import: PassImport,
 
    pass_definitions: PassDefinitions,
 
    pass_validation: PassValidationLinking,
 
    pass_typing: PassTyping,
 
    pass_rewriting: PassRewriting,
 
    pass_stack_size: PassStackSize,
 
    // Compiler options
 
    pub write_ast_to: Option<String>,
 
    pub(crate) arch: TargetArch,
 
}
 

	
 
impl Parser {
 
    pub fn new() -> Self {
 
        let mut parser = Parser{
 
            heap: Heap::new(),
 
            string_pool: StringPool::new(),
 
            modules: Vec::new(),
 
            symbol_table: SymbolTable::new(),
 
            type_table: TypeTable::new(),
 
            pass_tokenizer: PassTokenizer::new(),
 
            pass_symbols: PassSymbols::new(),
 
            pass_import: PassImport::new(),
 
            pass_definitions: PassDefinitions::new(),
 
            pass_validation: PassValidationLinking::new(),
 
            pass_typing: PassTyping::new(),
 
            pass_rewriting: PassRewriting::new(),
 
            pass_stack_size: PassStackSize::new(),
 
            write_ast_to: None,
 
            arch: TargetArch {
 
                array_size_alignment: (3*8, 8), // pointer, length, capacity
 
                slice_size_alignment: (2*8, 8), // pointer, length
 
                string_size_alignment: (3*8, 8), // pointer, length, capacity
 
                port_size_alignment: (3*4, 4), // two u32s: connector + port ID
 
                pointer_size_alignment: (8, 8),
 
            }
 
        };
 

	
 
        parser.symbol_table.insert_scope(None, SymbolScope::Global);
 

	
 
        fn quick_type(variants: &[ParserTypeVariant]) -> ParserType {
 
            let mut t = ParserType{ elements: Vec::with_capacity(variants.len()), full_span: InputSpan::new() };
 
            for variant in variants {
 
                t.elements.push(ParserTypeElement{ element_span: InputSpan::new(), variant: variant.clone() });
 
            }
 
            t
 
        }
 

	
 
        use ParserTypeVariant as PTV;
 
        insert_builtin_function(&mut parser, "get", &["T"], |id| (
 
            vec![
 
                ("input", quick_type(&[PTV::Input, PTV::PolymorphicArgument(id.upcast(), 0)]))
 
            ],
 
            quick_type(&[PTV::PolymorphicArgument(id.upcast(), 0)])
 
        ));
 
        insert_builtin_function(&mut parser, "put", &["T"], |id| (
 
            vec![
 
                ("output", quick_type(&[PTV::Output, PTV::PolymorphicArgument(id.upcast(), 0)])),
 
                ("value", quick_type(&[PTV::PolymorphicArgument(id.upcast(), 0)])),
 
            ],
 
            quick_type(&[PTV::Void])
 
        ));
 
        insert_builtin_function(&mut parser, "fires", &["T"], |id| (
 
            vec![
 
                ("port", quick_type(&[PTV::InputOrOutput, PTV::PolymorphicArgument(id.upcast(), 0)]))
 
            ],
 
            quick_type(&[PTV::Bool])
 
        ));
 
        insert_builtin_function(&mut parser, "create", &["T"], |id| (
 
            vec![
 
                ("length", quick_type(&[PTV::IntegerLike]))
 
            ],
 
            quick_type(&[PTV::ArrayLike, PTV::PolymorphicArgument(id.upcast(), 0)])
 
        ));
 
        insert_builtin_function(&mut parser, "length", &["T"], |id| (
 
            vec![
 
@@ -209,96 +217,111 @@ impl Parser {
 
        for module_idx in 0..self.modules.len() {
 
            self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
 
            self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
 
        }
 

	
 
        // Add every known type to the type table
 
        self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?;
 

	
 
        // Continue compilation with the remaining phases now that the types
 
        // are all in the type table
 
        for module_idx in 0..self.modules.len() {
 
            let mut ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                modules: &mut self.modules,
 
                module_idx,
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
                arch: &self.arch,
 
            };
 
            self.pass_validation.visit_module(&mut ctx)?;
 
        }
 

	
 
        // Perform typechecking on all modules
 
        let mut queue = ResolveQueue::new();
 
        for module_idx in 0..self.modules.len() {
 
            let mut ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                modules: &mut self.modules,
 
                module_idx,
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
                arch: &self.arch,
 
            };
 
            PassTyping::queue_module_definitions(&mut ctx, &mut queue);
 
        };
 
        while !queue.is_empty() {
 
            let top = queue.pop().unwrap();
 
            let mut ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                modules: &mut self.modules,
 
                module_idx: top.root_id.index as usize,
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
                arch: &self.arch,
 
            };
 
            self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?;
 
        }
 

	
 
        // Rewrite nodes in tree, then prepare for execution of code
 
        for module_idx in 0..self.modules.len() {
 
            self.modules[module_idx].phase = ModuleCompilationPhase::Typed;
 
            let mut ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                modules: &mut self.modules,
 
                module_idx,
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
                arch: &self.arch,
 
            };
 
            self.pass_rewriting.visit_module(&mut ctx);
 
            self.pass_stack_size.visit_module(&mut ctx);
 
        }
 

	
 
        // Write out desired information
 
        if let Some(filename) = &self.write_ast_to {
 
            let mut writer = ASTWriter::new();
 
            let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
 
            writer.write_ast(&mut file, &self.heap);
 
        }
 

	
 
        Ok(())
 
    }
 
}
 

	
 
// Note: args and return type need to be a function because we need to know the function ID.
 
fn insert_builtin_function<T: Fn(FunctionDefinitionId) -> (Vec<(&'static str, ParserType)>, ParserType)> (
 
    p: &mut Parser, func_name: &str, polymorphic: &[&str], arg_and_return_fn: T) {
 

	
 
    let mut poly_vars = Vec::with_capacity(polymorphic.len());
 
    for poly_var in polymorphic {
 
        poly_vars.push(Identifier{ span: InputSpan::new(), value: p.string_pool.intern(poly_var.as_bytes()) });
 
    }
 

	
 
    let func_ident_ref = p.string_pool.intern(func_name.as_bytes());
 
    let func_id = p.heap.alloc_function_definition(|this| FunctionDefinition{
 
        this,
 
        defined_in: RootId::new_invalid(),
 
        builtin: true,
 
        span: InputSpan::new(),
 
        identifier: Identifier{ span: InputSpan::new(), value: func_ident_ref.clone() },
 
        poly_vars,
 
        return_type: ParserType{ elements: Vec::new(), full_span: InputSpan::new() },
 
        parameters: Vec::new(),
 
        scope: ScopeId::new_invalid(),
 
        body: BlockStatementId::new_invalid(),
 
        num_expressions_in_body: -1,
 
    });
 

	
 
    let (arguments, return_type) = arg_and_return_fn(func_id);
 

	
 
    let mut parameters = Vec::with_capacity(arguments.len());
 
    for (arg_name, arg_type) in arguments {
 
        let identifier = Identifier{ span: InputSpan::new(), value: p.string_pool.intern(arg_name.as_bytes()) };
 
        let param_id = p.heap.alloc_variable(|this| Variable{
 
            this,
 
            kind: VariableKind::Parameter,
 
            parser_type: arg_type.clone(),
 
            identifier,
 
            relative_pos_in_parent: 0,
 
            unique_id_in_scope: 0
 
        });
src/protocol/parser/pass_rewriting.rs
Show inline comments
 
use crate::collections::*;
 
use crate::protocol::*;
 

	
 
use super::visitor::*;
 

	
 
pub(crate) struct PassRewriting {
 
    current_scope: ScopeId,
 
    definition_buffer: ScopedBuffer<DefinitionId>,
 
    statement_buffer: ScopedBuffer<StatementId>,
 
    call_expr_buffer: ScopedBuffer<CallExpressionId>,
 
    expression_buffer: ScopedBuffer<ExpressionId>,
 
    scope_buffer: ScopedBuffer<ScopeId>,
 
}
 

	
 
impl PassRewriting {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            current_scope: ScopeId::new_invalid(),
 
            definition_buffer: ScopedBuffer::with_capacity(BUFFER_INIT_CAP_LARGE),
 
            statement_buffer: ScopedBuffer::with_capacity(BUFFER_INIT_CAP_SMALL),
 
            call_expr_buffer: ScopedBuffer::with_capacity(BUFFER_INIT_CAP_SMALL),
 
            expression_buffer: ScopedBuffer::with_capacity(BUFFER_INIT_CAP_SMALL),
 
            scope_buffer: ScopedBuffer::with_capacity(BUFFER_INIT_CAP_SMALL),
 
        }
 
    }
 
}
 

	
 
impl Visitor for PassRewriting {
 
    fn visit_module(&mut self, ctx: &mut Ctx) -> VisitorResult {
 
        let module = ctx.module();
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::Typed);
 

	
 
        let root_id = module.root_id;
 
        let root = &ctx.heap[root_id];
 
        let definition_section = self.definition_buffer.start_section_initialized(&root.definitions);
 
        for definition_index in 0..definition_section.len() {
 
            let definition_id = definition_section[definition_index];
 
            self.visit_definition(ctx, definition_id)?;
 
        }
 

	
 
        definition_section.forget();
 
        ctx.module_mut().phase = ModuleCompilationPhase::Rewritten;
 
        return Ok(())
 
    }
 

	
 
    // --- Visiting procedures
 

	
 
    fn visit_component_definition(&mut self, ctx: &mut Ctx, id: ComponentDefinitionId) -> VisitorResult {
 
        let def = &ctx.heap[id];
 
        let body_id = def.body;
 
        self.current_scope = def.scope;
 
        return self.visit_block_stmt(ctx, body_id);
 
    }
 

	
 
    fn visit_function_definition(&mut self, ctx: &mut Ctx, id: FunctionDefinitionId) -> VisitorResult {
 
        let def = &ctx.heap[id];
 
        let body_id = def.body;
 
        self.current_scope = def.scope;
 
        return self.visit_block_stmt(ctx, body_id);
 
    }
 

	
 
    // --- Visiting statements (that are not the select statement)
 

	
 
    fn visit_block_stmt(&mut self, ctx: &mut Ctx, id: BlockStatementId) -> VisitorResult {
 
        let block_stmt = &ctx.heap[id];
 
        let stmt_section = self.statement_buffer.start_section_initialized(&block_stmt.statements);
 

	
 
        self.current_scope = block_stmt.scope;
 
        for stmt_idx in 0..stmt_section.len() {
 
            self.visit_stmt(ctx, stmt_section[stmt_idx])?;
 
        }
 

	
 
        stmt_section.forget();
 
        return Ok(())
 
    }
 

	
 
    fn visit_labeled_stmt(&mut self, ctx: &mut Ctx, id: LabeledStatementId) -> VisitorResult {
 
        let labeled_stmt = &ctx.heap[id];
 
        let body_id = labeled_stmt.body;
 
        return self.visit_stmt(ctx, body_id);
 
    }
 

	
 
    fn visit_if_stmt(&mut self, ctx: &mut Ctx, id: IfStatementId) -> VisitorResult {
 
        let if_stmt = &ctx.heap[id];
 
        let true_case = if_stmt.true_case;
 
        let false_case = if_stmt.false_case;
 

	
 
        self.current_scope = true_case.scope;
 
        self.visit_stmt(ctx, true_case.body)?;
 
        if let Some(false_case) = false_case {
 
            self.current_scope = false_case.scope;
 
            self.visit_stmt(ctx, false_case.body)?;
 
        }
 
@@ -90,106 +109,105 @@ impl Visitor for PassRewriting {
 
        return self.visit_stmt(ctx, body_id);
 
    }
 

	
 
    // --- Visiting the select statement
 

	
 
    fn visit_select_stmt(&mut self, ctx: &mut Ctx, id: SelectStatementId) -> VisitorResult {
 
        // Utility for the last stage of rewriting process. Note that caller
 
        // still needs to point the end of the if-statement to the end of the
 
        // replacement statement of the select statement.
 
        fn transform_select_case_code(ctx: &mut Ctx, select_id: SelectStatementId, case_index: usize, select_var_id: VariableId) -> (IfStatementId, EndIfStatementId) {
 
            // Retrieve statement IDs associated with case
 
            let case = &ctx.heap[select_id].cases[case_index];
 
            let case_guard_id = case.guard;
 
            let case_body_id = case.body;
 
            let case_scope_id = case.scope;
 

	
 
            // Create the if-statement for the result of the select statement
 
            let compare_expr_id = create_ast_equality_comparison_expr(ctx, select_var_id, case_index as u64);
 
            let true_case = IfStatementCase{
 
                body: case_guard_id, // which is linked up to the body
 
                scope: case_scope_id,
 
            };
 
            let (if_stmt_id, end_if_stmt_id) = create_ast_if_stmt(ctx, compare_expr_id.upcast(), true_case, None);
 

	
 
            // Link up body statement to end-if
 
            set_ast_statement_next(ctx, case_body_id, end_if_stmt_id.upcast());
 

	
 
            return (if_stmt_id, end_if_stmt_id)
 
        }
 

	
 
        // Precreate the block that will end up containing all of the
 
        // transformed statements. Also precreate the scope associated with it
 
        let (outer_block_id, outer_end_block_id, outer_scope_id) =
 
            create_ast_block_stmt(ctx, Vec::new());
 

	
 
        // The "select" and the "end select" statement will act like trampolines
 
        // that jump to the replacement block. So set the child/parent
 
        // relationship already.
 
        // --- for the statements
 
        let select_stmt = &mut ctx.heap[id];
 
        select_stmt.next = outer_block_id.upcast();
 
        let end_select_stmt_id = select_stmt.end_select;
 
        let select_stmt_relative_pos = select_stmt.relative_pos_in_parent;
 

	
 
        let outer_end_block_stmt = &mut ctx.heap[outer_end_block_id];
 
        outer_end_block_stmt.next = end_select_stmt_id.upcast();
 

	
 
        // --- for the scopes
 
        link_existing_child_to_new_parent_scope(ctx, &mut self.scope_buffer, self.current_scope, outer_scope_id, select_stmt_relative_pos);
 
        link_new_child_to_existing_parent_scope(ctx, &mut self.scope_buffer, self.current_scope, outer_scope_id, select_stmt_relative_pos);
 

	
 
        // Create statements that will create temporary variables for all of the
 
        // ports passed to the "get" calls in the select case guards.
 
        let select_stmt = &ctx.heap[id];
 
        let total_num_cases = select_stmt.cases.len();
 
        let mut total_num_ports = 0;
 
        let end_select_stmt_id = select_stmt.end_select;
 
        let end_select = &ctx.heap[end_select_stmt_id];
 
        let stmt_id_after_select_stmt = end_select.next;
 

	
 
        // Put heap IDs into temporary buffers to handle borrowing rules
 
        let mut call_id_section = self.call_expr_buffer.start_section();
 
        let mut expr_id_section = self.expression_buffer.start_section();
 

	
 
        for case in select_stmt.cases.iter() {
 
            total_num_ports += case.involved_ports.len();
 
            for (call_id, expr_id) in case.involved_ports.iter().copied() {
 
                call_id_section.push(call_id);
 
                expr_id_section.push(expr_id);
 
            }
 
        }
 

	
 
        // Transform all of the call expressions by takings its argument (the
 
        // port from which we `get`) and turning it into a temporary variable.
 
        let mut transformed_stmts = Vec::with_capacity(total_num_ports); // TODO: Recompute this preallocated length, put assert at the end
 
        let mut locals = Vec::with_capacity(total_num_ports);
 

	
 
        for port_var_idx in 0..call_id_section.len() {
 
            let get_call_expr_id = call_id_section[port_var_idx];
 
            let port_expr_id = expr_id_section[port_var_idx];
 

	
 
            // Move the port expression such that it gets assigned to a temporary variable
 
            let variable_id = create_ast_variable(ctx, outer_scope_id);
 
            let variable_decl_stmt_id = create_ast_variable_declaration_stmt(ctx, variable_id, port_expr_id);
 

	
 
            // Replace the original port expression in the call with a reference
 
            // to the replacement variable
 
            let variable_expr_id = create_ast_variable_expr(ctx, variable_id);
 
            let call_expr = &mut ctx.heap[get_call_expr_id];
 
            call_expr.arguments[0] = variable_expr_id.upcast();
 

	
 
            transformed_stmts.push(variable_decl_stmt_id.upcast().upcast());
 
            locals.push(variable_id);
 
        }
 

	
 
        // Insert runtime calls that facilitate the semantics of the select
 
        // block.
 

	
 
        // Create the call that indicates the start of the select block
 
        {
 
            let num_cases_expression_id = create_ast_literal_integer_expr(ctx, total_num_cases as u64);
 
            let num_ports_expression_id = create_ast_literal_integer_expr(ctx, total_num_ports as u64);
 
            let arguments = vec![
 
                num_cases_expression_id.upcast(),
 
                num_ports_expression_id.upcast()
 
            ];
 

	
src/protocol/parser/pass_stack_size.rs
Show inline comments
 
new file 100644
 
use crate::collections::*;
 
use crate::protocol::*;
 

	
 
use super::visitor::*;
 

	
 
// Will get a rename. Will probably become bytecode emitter or something. For
 
// now it just scans the scopes and assigns a unique number for each variable
 
// such that, at any point in the program's execution, all accessible in-scope
 
// variables will have a unique position "on the stack".
 
pub(crate) struct PassStackSize {
 
    definition_buffer: ScopedBuffer<DefinitionId>,
 
}
 

	
 
impl PassStackSize {
 
    pub(crate) fn new() -> Self {
 
        return Self{
 
            definition_buffer: ScopedBuffer::with_capacity(BUFFER_INIT_CAP_LARGE),
 
        }
 
    }
 
}
 

	
 
impl Visitor for PassStackSize {
 
    fn visit_module(&mut self, ctx: &mut Ctx) -> VisitorResult {
 
        let module = ctx.module();
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::Rewritten);
 

	
 
        let root_id = module.root_id;
 
        let root = &ctx.heap[root_id];
 
        let definition_section = self.definition_buffer.start_section_initialized(&root.definitions);
 
        for definition_index in 0..definition_section.len() {
 
            let definition_id = definition_section[definition_index];
 
            self.visit_definition(ctx, definition_id)?
 
        }
 

	
 
        definition_section.forget();
 
        // ctx.module_mut().phase = ModuleCompilationPhase::StackSizeStuffAndStuff;
 
        return Ok(())
 
    }
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)