use crate::protocol::ast::*; use crate::protocol::inputsource::*; use std::collections::{HashMap, hash_map::Entry}; use crate::protocol::parser::LexedModule; #[derive(PartialEq, Eq, Hash)] struct SymbolKey { module_id: RootId, symbol_name: Vec, } pub(crate) enum Symbol { Namespace(RootId), Definition((RootId, DefinitionId)), } pub(crate) struct SymbolValue { // Position refers to the origin of the symbol definition (i.e. the module's // RootId that is in the key being used to lookup this value) position: InputPosition, symbol: Symbol, } impl SymbolValue { pub(crate) fn as_namespace(&self) -> Option { match &self.symbol { Symbol::Namespace(root_id) => Some(*root_id), _ => None, } } pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> { match &self.symbol { Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)), _ => None, } } } /// `SymbolTable` is responsible for two parts of the parsing process: firstly /// it ensures that there are no clashing symbol definitions within each file, /// and secondly it will resolve all symbols within a module to their /// appropriate definitions (in case of enums, functions, etc.) and namespaces /// (currently only external modules can act as namespaces). If a symbol clashes /// or if a symbol cannot be resolved this will be an error. /// /// Within the compilation process the symbol table is responsible for resolving /// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate /// definition (i.e. not namespaces; as the language has no way to use /// namespaces except for using them in namespaced identifiers). // TODO: Maybe allow namespaced-aliased imports. It is currently not possible // to express the following: // import Module.Submodule as ModSub // import SubMod::{Symbol} pub(crate) struct SymbolTable { // Lookup from module name (not any aliases) to the root id module_lookup: HashMap, RootId>, // Lookup from within a module, to a particular imported (potentially // aliased) or defined symbol. Basically speaking: if the source code of a // module contains correctly imported/defined symbols, then this lookup // will always return the corresponding definition symbol_lookup: HashMap, } impl SymbolTable { pub(crate) fn new(heap: &Heap, modules: &[LexedModule]) -> Result { // Sanity check if cfg!(debug_assertions) { for (index, module) in modules.iter().enumerate() { debug_assert_eq!( index, module.root_id.0.index as usize, "module RootId does not correspond to LexedModule index" ) } } // Preparation: create a lookup from module name to root id. This does // not take aliasing into account. let mut module_lookup = HashMap::with_capacity(modules.len()); for module in modules { // TODO: Maybe put duplicate module name checking here? // TODO: @string module_lookup.insert(module.module_name.clone(), module.root_id); } // Preparation: determine total number of imports we will be inserting // into the lookup table. We could just iterate over the arena, but then // we don't know the source file the import belongs to. let mut lookup_reserve_size = 0; for module in modules { let module_root = &heap[module.root_id]; for import_id in &module_root.imports { match &heap[*import_id] { Import::Module(_) => lookup_reserve_size += 1, Import::Symbols(import) => { if import.symbols.is_empty() { // Add all symbols from the other module match module_lookup.get(&import.module_name) { Some(target_module_id) => { lookup_reserve_size += heap[*target_module_id].definitions.len() }, None => { return Err( ParseError2::new_error(&module.source, import.position, "Cannot resolve module") ); } } } } } } } let mut table = Self{ module_lookup, symbol_lookup: HashMap::with_capacity(lookup_reserve_size) }; // First pass: we go through all of the modules and add lookups to // symbols that are defined within that module. Cross-module imports are // not yet resolved for module in modules { let root = &heap[module.root_id]; for definition_id in &root.definitions { let definition = &heap[*definition_id]; let identifier = definition.identifier(); if let Err(previous_position) = table.add_definition_symbol( module.root_id, identifier.position, &identifier.value, module.root_id, *definition_id ) { return Err( ParseError2::new_error(&module.source, definition.position(), "Symbol is multiply defined") .with_postfixed_info(&module.source, previous_position, "Previous definition was here") ) } } } // Second pass: now that we can find symbols in modules, we can resolve // all imports (if they're correct, that is) for module in modules { let root = &heap[module.root_id]; for import_id in &root.imports { let import = &heap[*import_id]; match import { Import::Module(import) => { // Find the module using its name let target_root_id = table.resolve_module(&import.module_name); if target_root_id.is_none() { return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module")); } let target_root_id = target_root_id.unwrap(); if target_root_id == module.root_id { return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self")); } // Add the target module under its alias if let Err(previous_position) = table.add_namespace_symbol( module.root_id, import.position, &import.alias, target_root_id ) { return Err( ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined") .with_postfixed_info(&module.source, previous_position, "Previous definition was here") ); } }, Import::Symbols(import) => { // Find the target module using its name let target_root_id = table.resolve_module(&import.module_name); if target_root_id.is_none() { return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module of symbol imports")); } let target_root_id = target_root_id.unwrap(); if target_root_id == module.root_id { return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self")); } // Determine which symbols to import if import.symbols.is_empty() { // Import of all symbols, not using any aliases for definition_id in &heap[target_root_id].definitions { let definition = &heap[*definition_id]; let identifier = definition.identifier(); if let Err(previous_position) = table.add_definition_symbol( module.root_id, import.position, &identifier.value, target_root_id, *definition_id ) { return Err( ParseError2::new_error( &module.source, import.position, &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value)) ) .with_postfixed_info( &modules[target_root_id.0.index as usize].source, definition.position(), "The imported symbol is defined here" ) .with_postfixed_info( &module.source, previous_position, "And is previously defined here" ) ) } } } else { // Import of specific symbols, optionally using aliases for symbol in &import.symbols { // Because we have already added per-module definitions, we can use // the table to lookup this particular symbol. Note: within a single // module a namespace-import and a symbol-import may not collide. // Hence per-module symbols are unique. // However: if we import a symbol from another module, we don't want // to "import a module's imported symbol". And so if we do find // a symbol match, we need to make sure it is a definition from // within that module by checking `source_root_id == target_root_id` let target_symbol = table.resolve_symbol(target_root_id, &symbol.name); let symbol_definition_id = match target_symbol { Some(target_symbol) => { match target_symbol.symbol { Symbol::Definition((symbol_root_id, symbol_definition_id)) => { if symbol_root_id == target_root_id { Some(symbol_definition_id) } else { // This is imported within the target module, and not // defined within the target module None } }, Symbol::Namespace(_) => { // We don't import a module's "module import" None } } }, None => None }; if symbol_definition_id.is_none() { return Err( ParseError2::new_error(&module.source, symbol.position, "Could not resolve symbol") ) } let symbol_definition_id = symbol_definition_id.unwrap(); if let Err(previous_position) = table.add_definition_symbol( module.root_id, symbol.position, &symbol.alias, target_root_id, symbol_definition_id ) { return Err( ParseError2::new_error(&module.source, symbol.position, "Symbol is multiply defined") .with_postfixed_info(&module.source, previous_position, "Previous definition was here") ) } } } } } } } debug_assert_eq!( table.symbol_lookup.len(), lookup_reserve_size, "miscalculated reserved size for symbol lookup table" ); Ok(table) } /// Resolves a module by its defined name pub(crate) fn resolve_module(&self, identifier: &Vec) -> Option { self.module_lookup.get(identifier).map(|v| *v) } /// Resolves a symbol within a particular module, indicated by its RootId, /// with a single non-namespaced identifier pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec) -> Option<&SymbolValue> { self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() }) } /// Resolves a namespaced symbol. It will try to go as far as possible in /// actually finding a definition or a namespace. So a namespace might be /// resolved, after it which it finds an actual definition. It may be that /// the namespaced identifier has more elements that should be checked /// (i.e. an enum variant, or simply an erroneous instance of too many /// chained identifiers). This function will return None if nothing could be /// resolved at all. pub(crate) fn resolve_namespaced_symbol(&self, _within_module_id: RootId) { todo!("implement") } /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was /// inserted. If the symbol already exists then `Err` will be returned /// together with the previous definition's source position (in the origin /// module's source file). // Note: I would love to return a reference to the value, but Rust is // preventing me from doing so... fn add_namespace_symbol( &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, target_module_id: RootId ) -> Result<(), InputPosition> { let key = SymbolKey{ module_id: origin_module_id, symbol_name: symbol_name.clone() }; match self.symbol_lookup.entry(key) { Entry::Occupied(o) => Err(o.get().position), Entry::Vacant(v) => { v.insert(SymbolValue{ position: origin_position, symbol: Symbol::Namespace(target_module_id) }); Ok(()) } } } /// Attempts to add a definition symbol. Returns `Ok` if the symbol was /// inserted. If the symbol already exists then `Err` will be returned /// together with the previous definition's source position (in the origin /// module's source file). fn add_definition_symbol( &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, target_module_id: RootId, target_definition_id: DefinitionId, ) -> Result<(), InputPosition> { let key = SymbolKey{ module_id: origin_module_id, symbol_name: symbol_name.clone() }; match self.symbol_lookup.entry(key) { Entry::Occupied(o) => Err(o.get().position), Entry::Vacant(v) => { v.insert(SymbolValue { position: origin_position, symbol: Symbol::Definition((target_module_id, target_definition_id)) }); Ok(()) } } } }