diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index d4eb2b451287cfb26ab0e15ce4670342dde36c9c..3019bb25b7f8ac5560829531525e454724b2ca41 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,6 +1,5 @@ mod depth_visitor; pub(crate) mod symbol_table; -pub(crate) mod symbol_table2; pub(crate) mod type_table; pub(crate) mod tokens; pub(crate) mod token_parsing; @@ -8,48 +7,49 @@ pub(crate) mod pass_tokenizer; pub(crate) mod pass_symbols; pub(crate) mod pass_imports; pub(crate) mod pass_definitions; -mod type_resolver; +pub(crate) mod pass_validation_linking; +pub(crate) mod pass_typing; mod visitor; -mod pass_validation_linking; -mod utils; use depth_visitor::*; use tokens::*; use crate::collections::*; -use symbol_table2::SymbolTable; +use symbol_table::SymbolTable; use visitor::Visitor2; +use pass_tokenizer::PassTokenizer; +use pass_symbols::PassSymbols; +use pass_imports::PassImport; +use pass_definitions::PassDefinitions; use pass_validation_linking::PassValidationLinking; -use type_resolver::{TypeResolvingVisitor, ResolveQueue}; -use type_table::{TypeTable, TypeCtx}; +use pass_typing::{PassTyping, ResolveQueue}; +use type_table::TypeTable; use crate::protocol::ast::*; -use crate::protocol::input_source2::{InputSource2 as InputSource}; -use crate::protocol::lexer::*; +use crate::protocol::input_source::*; -use std::collections::HashMap; use crate::protocol::ast_printer::ASTWriter; -#[derive(PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum ModuleCompilationPhase { Source, // only source is set Tokenized, // source is tokenized SymbolsScanned, // all definitions are linked to their type class ImportsResolved, // all imports are added to the symbol table DefinitionsParsed, // produced the AST for the entire module - TypesParsed, // added all definitions to the type table + TypesAddedToTable, // added all definitions to the type table ValidatedAndLinked, // AST is traversed and has linked the required AST nodes Typed, // Type inference and checking has been performed } pub struct Module { // Buffers - source: InputSource, - tokens: TokenBuffer, + pub source: InputSource, + pub tokens: TokenBuffer, // Identifiers - root_id: RootId, - name: Option<(PragmaId, StringRef<'static>)>, - version: Option<(PragmaId, i64)>, - phase: ModuleCompilationPhase, + pub root_id: RootId, + pub name: Option<(PragmaId, StringRef<'static>)>, + pub version: Option<(PragmaId, i64)>, + pub phase: ModuleCompilationPhase, } pub struct PassCtx<'a> { @@ -58,190 +58,91 @@ pub struct PassCtx<'a> { pool: &'a mut StringPool, } -// TODO: @fixme, pub qualifier -pub(crate) struct LexedModule { - pub(crate) source: InputSource, - module_name: Vec, - version: Option, - pub(crate) root_id: RootId, -} - pub struct Parser { pub(crate) heap: Heap, - pub(crate) modules: Vec, - pub(crate) module_lookup: HashMap, usize>, // from (optional) module name to `modules` idx + pub(crate) string_pool: StringPool, + pub(crate) modules: Vec, pub(crate) symbol_table: SymbolTable, pub(crate) type_table: TypeTable, + // Compiler passes + pass_tokenizer: PassTokenizer, + pass_symbols: PassSymbols, + pass_import: PassImport, + pass_definitions: PassDefinitions, + pass_validation: PassValidationLinking, + pass_typing: PassTyping, } impl Parser { pub fn new() -> Self { Parser{ heap: Heap::new(), + string_pool: StringPool::new(), modules: Vec::new(), - module_lookup: HashMap::new(), symbol_table: SymbolTable::new(), type_table: TypeTable::new(), + pass_tokenizer: PassTokenizer::new(), + pass_symbols: PassSymbols::new(), + pass_import: PassImport::new(), + pass_definitions: PassDefinitions::new(), + pass_validation: PassValidationLinking::new(), + pass_typing: PassTyping::new(), } } - pub fn feed(&mut self, mut source: InputSource) -> Result { - // Lex the input source - let mut lex = Lexer::new(&mut source); - let pd = lex.consume_protocol_description(&mut self.heap)?; - - // Seek the module name and version - let root = &self.heap[pd]; - let mut module_name_pos = InputPosition::default(); - let mut module_name = Vec::new(); - let mut module_version_pos = InputPosition::default(); - let mut module_version = None; - - for pragma in &root.pragmas { - match &self.heap[*pragma] { - Pragma::Module(module) => { - if !module_name.is_empty() { - return Err( - ParseError::new_error(&source, module.position, "Double definition of module name in the same file") - .with_postfixed_info(&source, module_name_pos, "Previous definition was here") - ) - } - - module_name_pos = module.position.clone(); - module_name = module.value.clone(); - }, - Pragma::Version(version) => { - if module_version.is_some() { - return Err( - ParseError::new_error(&source, version.position, "Double definition of module version") - .with_postfixed_info(&source, module_version_pos, "Previous definition was here") - ) - } - - module_version_pos = version.position.clone(); - module_version = Some(version.version); - }, - } - } - - // Add module to list of modules and prevent naming conflicts - let cur_module_idx = self.modules.len(); - if let Some(prev_module_idx) = self.module_lookup.get(&module_name) { - // Find `#module` statement in other module again - let prev_module = &self.modules[*prev_module_idx]; - let prev_module_pos = self.heap[prev_module.root_id].pragmas - .iter() - .find_map(|p| { - match &self.heap[*p] { - Pragma::Module(module) => Some(module.position.clone()), - _ => None - } - }) - .unwrap_or(InputPosition::default()); - - let module_name_msg = if module_name.is_empty() { - format!("a nameless module") - } else { - format!("module '{}'", String::from_utf8_lossy(&module_name)) - }; - - return Err( - ParseError::new_error(&source, module_name_pos, &format!("Double definition of {} across files", module_name_msg)) - .with_postfixed_info(&prev_module.source, prev_module_pos, "Other definition was here") - ); - } + pub fn feed(&mut self, mut source: InputSource) -> Result<(), ParseError> { + // TODO: @Optimize + let mut token_buffer = TokenBuffer::new(); + self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?; - self.modules.push(LexedModule{ + let module = Module{ source, - module_name: module_name.clone(), - version: module_version, - root_id: pd - }); - self.module_lookup.insert(module_name, cur_module_idx); - Ok(pd) - } - - fn resolve_symbols_and_types(&mut self) -> Result<(), ParseError> { - // Construct the symbol table to resolve any imports and/or definitions, - // then use the symbol table to actually annotate all of the imports. - // If the type table is constructed correctly then all imports MUST be - // resolvable. - self.symbol_table.build(&self.heap, &self.modules)?; - - // Not pretty, but we need to work around rust's borrowing rules, it is - // totally safe to mutate the contents of an AST element that we are - // not borrowing anywhere else. - let mut module_index = 0; - let mut import_index = 0; - loop { - if module_index >= self.modules.len() { - break; - } - - let module_root_id = self.modules[module_index].root_id; - let import_id = { - let root = &self.heap[module_root_id]; - if import_index >= root.imports.len() { - module_index += 1; - import_index = 0; - continue - } - root.imports[import_index] - }; + tokens: token_buffer, + root_id: RootId::new_invalid(), + name: None, + version: None, + phase: ModuleCompilationPhase::Tokenized, + }; + self.modules.push(module); - let import = &mut self.heap[import_id]; - match import { - Import::Module(import) => { - debug_assert!(import.module_id.is_none(), "module import already resolved"); - let target_module_id = self.symbol_table.resolve_module(&import.module) - .expect("module import is resolved by symbol table"); - import.module_id = Some(target_module_id) - }, - Import::Symbols(import) => { - debug_assert!(import.module_id.is_none(), "module of symbol import already resolved"); - let target_module_id = self.symbol_table.resolve_module(&import.module) - .expect("symbol import's module is resolved by symbol table"); - import.module_id = Some(target_module_id); + Ok(()) + } - for symbol in &mut import.symbols { - debug_assert!(symbol.definition_id.is_none(), "symbol import already resolved"); - let (_, target_definition_id) = self.symbol_table.resolve_identifier(module_root_id, &symbol.alias) - .expect("symbol import is resolved by symbol table") - .as_definition() - .expect("symbol import does not resolve to namespace symbol"); - symbol.definition_id = Some(target_definition_id); - } - } - } + pub fn parse(&mut self) -> Result<(), ParseError> { + let mut pass_ctx = PassCtx{ + heap: &mut self.heap, + symbols: &mut self.symbol_table, + pool: &mut self.string_pool, + }; - import_index += 1; + // Advance all modules to the phase where all symbols are scanned + for module_idx in 0..self.modules.len() { + self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?; } - // All imports in the AST are now annotated. We now use the symbol table - // to construct the type table. - let mut type_ctx = TypeCtx::new(&self.symbol_table, &mut self.heap, &self.modules); - self.type_table.build_base_types(&mut type_ctx)?; - - Ok(()) - } + // With all symbols scanned, perform further compilation until we can + // add all base types to the type table. + for module_idx in 0..self.modules.len() { + self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?; + self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?; + } - pub fn parse(&mut self) -> Result<(), ParseError> { - self.resolve_symbols_and_types()?; + // Add every known type to the type table + self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?; - // Validate and link all modules - let mut visit = PassValidationLinking::new(); - for module in &self.modules { + // Continue compilation with the remaining phases now that the types + // are all in the type table + for module_idx in 0..self.modules.len() { let mut ctx = visitor::Ctx{ heap: &mut self.heap, - module, + module: &self.modules[module_idx], symbols: &mut self.symbol_table, types: &mut self.type_table, }; - visit.visit_module(&mut ctx)?; + self.pass_validation.visit_module(&mut ctx)?; } // Perform typechecking on all modules - let mut visit = TypeResolvingVisitor::new(); let mut queue = ResolveQueue::new(); for module in &self.modules { let ctx = visitor::Ctx{ @@ -250,7 +151,7 @@ impl Parser { symbols: &mut self.symbol_table, types: &mut self.type_table, }; - TypeResolvingVisitor::queue_module_definitions(&ctx, &mut queue); + PassTyping::queue_module_definitions(&ctx, &mut queue); }; while !queue.is_empty() { let top = queue.pop().unwrap(); @@ -260,7 +161,7 @@ impl Parser { symbols: &mut self.symbol_table, types: &mut self.type_table, }; - visit.handle_module_definition(&mut ctx, &mut queue, top)?; + self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?; } // Perform remaining steps @@ -268,7 +169,7 @@ impl Parser { for module in &self.modules { let root_id = module.root_id; if let Err((position, message)) = Self::parse_inner(&mut self.heap, root_id) { - return Err(ParseError::new_error(&self.modules[0].source, position, &message)) + return Err(ParseError::new_error_str_at_pos(&self.modules[0].source, position, &message)) } }