diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs new file mode 100644 index 0000000000000000000000000000000000000000..9902584d39451161353cf41d5ace4cf7828e6f0e --- /dev/null +++ b/src/protocol/parser/symbol_table.rs @@ -0,0 +1,338 @@ +use crate::protocol::ast::*; +use crate::protocol::inputsource::*; + +use std::collections::{HashMap, hash_map::Entry}; +use crate::protocol::parser::LexedModule; + +#[derive(PartialEq, Eq, Hash)] +struct SymbolKey { + module_id: RootId, + symbol_name: Vec, +} + +pub(crate) enum Symbol { + Namespace(RootId), + Definition((RootId, DefinitionId)), +} + +pub(crate) struct SymbolValue { + // Position refers to the origin of the symbol definition (i.e. the module's + // RootId that is in the key being used to lookup this value) + position: InputPosition, + symbol: Symbol, +} + +impl SymbolValue { + pub(crate) fn as_namespace(&self) -> Option { + match &self.symbol { + Symbol::Namespace(root_id) => Some(*root_id), + _ => None, + } + } + + pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> { + match &self.symbol { + Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)), + _ => None, + } + } +} +/// `SymbolTable` is responsible for two parts of the parsing process: firstly +/// it ensures that there are no clashing symbol definitions within each file, +/// and secondly it will resolve all symbols within a module to their +/// appropriate definitions (in case of enums, functions, etc.) and namespaces +/// (currently only external modules can act as namespaces). If a symbol clashes +/// or if a symbol cannot be resolved this will be an error. +/// +/// Within the compilation process the symbol table is responsible for resolving +/// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate +/// definition (i.e. not namespaces; as the language has no way to use +/// namespaces except for using them in namespaced identifiers). +// TODO: Maybe allow namespaced-aliased imports. It is currently not possible +// to express the following: +// import Module.Submodule as ModSub +// import SubMod::{Symbol} +pub(crate) struct SymbolTable { + // Lookup from module name (not any aliases) to the root id + module_lookup: HashMap, RootId>, + // Lookup from within a module, to a particular imported (potentially + // aliased) or defined symbol. Basically speaking: if the source code of a + // module contains correctly imported/defined symbols, then this lookup + // will always return the corresponding definition + symbol_lookup: HashMap, +} + +impl SymbolTable { + pub(crate) fn new(heap: &Heap, modules: &[LexedModule]) -> Result { + // Sanity check + if cfg!(debug_assertions) { + for (index, module) in modules.iter().enumerate() { + debug_assert_eq!( + index, module.root_id.0.index as usize, + "module RootId does not correspond to LexedModule index" + ) + } + } + + // Preparation: create a lookup from module name to root id. This does + // not take aliasing into account. + let mut module_lookup = HashMap::with_capacity(modules.len()); + for module in modules { + // TODO: Maybe put duplicate module name checking here? + // TODO: @string + module_lookup.insert(module.module_name.clone(), module.root_id); + } + + // Preparation: determine total number of imports we will be inserting + // into the lookup table. We could just iterate over the arena, but then + // we don't know the source file the import belongs to. + let mut lookup_reserve_size = 0; + for module in modules { + let module_root = &heap[module.root_id]; + for import_id in &module_root.imports { + match &heap[*import_id] { + Import::Module(_) => lookup_reserve_size += 1, + Import::Symbols(import) => { + if import.symbols.is_empty() { + // Add all symbols from the other module + match module_lookup.get(&import.module_name) { + Some(target_module_id) => { + lookup_reserve_size += heap[*target_module_id].definitions.len() + }, + None => { + return Err( + ParseError2::new_error(&module.source, import.position, "Cannot resolve module") + ); + } + } + } + } + } + } + } + + let mut table = Self{ + module_lookup, + symbol_lookup: HashMap::with_capacity(lookup_reserve_size) + }; + + // First pass: we go through all of the modules and add lookups to + // symbols that are defined within that module. Cross-module imports are + // not yet resolved + for module in modules { + let root = &heap[module.root_id]; + for definition_id in &root.definitions { + let definition = &heap[*definition_id]; + let identifier = definition.identifier(); + if let Err(previous_position) = table.add_definition_symbol( + module.root_id, identifier.position, &identifier.value, + module.root_id, *definition_id + ) { + return Err( + ParseError2::new_error(&module.source, definition.position(), "Symbol is multiply defined") + .with_postfixed_info(&module.source, previous_position, "Previous definition was here") + ) + } + } + } + + // Second pass: now that we can find symbols in modules, we can resolve + // all imports (if they're correct, that is) + for module in modules { + let root = &heap[module.root_id]; + for import_id in &root.imports { + let import = &heap[*import_id]; + match import { + Import::Module(import) => { + // Find the module using its name + let target_root_id = table.resolve_module(&import.module_name); + if target_root_id.is_none() { + return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module")); + } + let target_root_id = target_root_id.unwrap(); + if target_root_id == module.root_id { + return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self")); + } + + // Add the target module under its alias + if let Err(previous_position) = table.add_namespace_symbol( + module.root_id, import.position, + &import.alias, target_root_id + ) { + return Err( + ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined") + .with_postfixed_info(&module.source, previous_position, "Previous definition was here") + ); + } + }, + Import::Symbols(import) => { + // Find the target module using its name + let target_root_id = table.resolve_module(&import.module_name); + if target_root_id.is_none() { + return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module of symbol imports")); + } + let target_root_id = target_root_id.unwrap(); + if target_root_id == module.root_id { + return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self")); + } + + // Determine which symbols to import + if import.symbols.is_empty() { + // Import of all symbols, not using any aliases + for definition_id in &heap[target_root_id].definitions { + let definition = &heap[*definition_id]; + let identifier = definition.identifier(); + if let Err(previous_position) = table.add_definition_symbol( + module.root_id, import.position, &identifier.value, + target_root_id, *definition_id + ) { + return Err( + ParseError2::new_error( + &module.source, import.position, + &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value)) + ) + .with_postfixed_info( + &modules[target_root_id.0.index as usize].source, + definition.position(), + "The imported symbol is defined here" + ) + .with_postfixed_info( + &module.source, previous_position, "And is previously defined here" + ) + ) + } + } + } else { + // Import of specific symbols, optionally using aliases + for symbol in &import.symbols { + // Because we have already added per-module definitions, we can use + // the table to lookup this particular symbol. Note: within a single + // module a namespace-import and a symbol-import may not collide. + // Hence per-module symbols are unique. + // However: if we import a symbol from another module, we don't want + // to "import a module's imported symbol". And so if we do find + // a symbol match, we need to make sure it is a definition from + // within that module by checking `source_root_id == target_root_id` + let target_symbol = table.resolve_symbol(target_root_id, &symbol.name); + let symbol_definition_id = match target_symbol { + Some(target_symbol) => { + match target_symbol.symbol { + Symbol::Definition((symbol_root_id, symbol_definition_id)) => { + if symbol_root_id == target_root_id { + Some(symbol_definition_id) + } else { + // This is imported within the target module, and not + // defined within the target module + None + } + }, + Symbol::Namespace(_) => { + // We don't import a module's "module import" + None + } + } + }, + None => None + }; + + if symbol_definition_id.is_none() { + return Err( + ParseError2::new_error(&module.source, symbol.position, "Could not resolve symbol") + ) + } + let symbol_definition_id = symbol_definition_id.unwrap(); + + if let Err(previous_position) = table.add_definition_symbol( + module.root_id, symbol.position, &symbol.alias, + target_root_id, symbol_definition_id + ) { + return Err( + ParseError2::new_error(&module.source, symbol.position, "Symbol is multiply defined") + .with_postfixed_info(&module.source, previous_position, "Previous definition was here") + ) + } + } + } + } + } + } + } + + debug_assert_eq!( + table.symbol_lookup.len(), lookup_reserve_size, + "miscalculated reserved size for symbol lookup table" + ); + Ok(table) + } + + /// Resolves a module by its defined name + pub(crate) fn resolve_module(&self, identifier: &Vec) -> Option { + self.module_lookup.get(identifier).map(|v| *v) + } + + /// Resolves a symbol within a particular module, indicated by its RootId, + /// with a single non-namespaced identifier + pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec) -> Option<&SymbolValue> { + self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() }) + } + + /// Resolves a namespaced symbol. It will try to go as far as possible in + /// actually finding a definition or a namespace. So a namespace might be + /// resolved, after it which it finds an actual definition. It may be that + /// the namespaced identifier has more elements that should be checked + /// (i.e. an enum variant, or simply an erroneous instance of too many + /// chained identifiers). This function will return None if nothing could be + /// resolved at all. + pub(crate) fn resolve_namespaced_symbol(&self, _within_module_id: RootId) { + todo!("implement") + } + + /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was + /// inserted. If the symbol already exists then `Err` will be returned + /// together with the previous definition's source position (in the origin + /// module's source file). + // Note: I would love to return a reference to the value, but Rust is + // preventing me from doing so... + fn add_namespace_symbol( + &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, target_module_id: RootId + ) -> Result<(), InputPosition> { + let key = SymbolKey{ + module_id: origin_module_id, + symbol_name: symbol_name.clone() + }; + match self.symbol_lookup.entry(key) { + Entry::Occupied(o) => Err(o.get().position), + Entry::Vacant(v) => { + v.insert(SymbolValue{ + position: origin_position, + symbol: Symbol::Namespace(target_module_id) + }); + Ok(()) + } + } + } + + /// Attempts to add a definition symbol. Returns `Ok` if the symbol was + /// inserted. If the symbol already exists then `Err` will be returned + /// together with the previous definition's source position (in the origin + /// module's source file). + fn add_definition_symbol( + &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, + target_module_id: RootId, target_definition_id: DefinitionId, + ) -> Result<(), InputPosition> { + let key = SymbolKey{ + module_id: origin_module_id, + symbol_name: symbol_name.clone() + }; + match self.symbol_lookup.entry(key) { + Entry::Occupied(o) => Err(o.get().position), + Entry::Vacant(v) => { + v.insert(SymbolValue { + position: origin_position, + symbol: Symbol::Definition((target_module_id, target_definition_id)) + }); + Ok(()) + } + } + } +} \ No newline at end of file