Files @ 33ea10021de4
Branch filter:

Location: CSY/reowolf/src/protocol/parser/symbol_table.rs

33ea10021de4 16.4 KiB application/rls-services+xml Show Annotation Show as Raw Download as Raw
mh
refactored identifiers, initial symbol table implementation
use crate::protocol::ast::*;
use crate::protocol::inputsource::*;

use std::collections::{HashMap, hash_map::Entry};
use crate::protocol::parser::LexedModule;

#[derive(PartialEq, Eq, Hash)]
struct SymbolKey {
    module_id: RootId,
    symbol_name: Vec<u8>,
}

pub(crate) enum Symbol {
    Namespace(RootId),
    Definition((RootId, DefinitionId)),
}

pub(crate) struct SymbolValue {
    // Position refers to the origin of the symbol definition (i.e. the module's
    // RootId that is in the key being used to lookup this value)
    position: InputPosition,
    symbol: Symbol,
}

impl SymbolValue {
    pub(crate) fn as_namespace(&self) -> Option<RootId> {
        match &self.symbol {
            Symbol::Namespace(root_id) => Some(*root_id),
            _ => None,
        }
    }

    pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> {
        match &self.symbol {
            Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)),
            _ => None,
        }
    }
}
/// `SymbolTable` is responsible for two parts of the parsing process: firstly
/// it ensures that there are no clashing symbol definitions within each file,
/// and secondly it will resolve all symbols within a module to their
/// appropriate definitions (in case of enums, functions, etc.) and namespaces
/// (currently only external modules can act as namespaces). If a symbol clashes
/// or if a symbol cannot be resolved this will be an error.
///
/// Within the compilation process the symbol table is responsible for resolving
/// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate
/// definition (i.e. not namespaces; as the language has no way to use
/// namespaces except for using them in namespaced identifiers).
// TODO: Maybe allow namespaced-aliased imports. It is currently not possible
//  to express the following:
//  import Module.Submodule as ModSub
//  import SubMod::{Symbol}
pub(crate) struct SymbolTable {
    // Lookup from module name (not any aliases) to the root id
    module_lookup: HashMap<Vec<u8>, RootId>,
    // Lookup from within a module, to a particular imported (potentially
    // aliased) or defined symbol. Basically speaking: if the source code of a
    // module contains correctly imported/defined symbols, then this lookup
    // will always return the corresponding definition
    symbol_lookup: HashMap<SymbolKey, SymbolValue>,
}

impl SymbolTable {
    pub(crate) fn new(heap: &Heap, modules: &[LexedModule]) -> Result<Self, ParseError2> {
        // Sanity check
        if cfg!(debug_assertions) {
            for (index, module) in modules.iter().enumerate() {
                debug_assert_eq!(
                    index, module.root_id.0.index as usize,
                    "module RootId does not correspond to LexedModule index"
                )
            }
        }

        // Preparation: create a lookup from module name to root id. This does
        // not take aliasing into account.
        let mut module_lookup = HashMap::with_capacity(modules.len());
        for module in modules {
            // TODO: Maybe put duplicate module name checking here?
            // TODO: @string
            module_lookup.insert(module.module_name.clone(), module.root_id);
        }

        // Preparation: determine total number of imports we will be inserting
        // into the lookup table. We could just iterate over the arena, but then
        // we don't know the source file the import belongs to.
        let mut lookup_reserve_size = 0;
        for module in modules {
            let module_root = &heap[module.root_id];
            for import_id in &module_root.imports {
                match &heap[*import_id] {
                    Import::Module(_) => lookup_reserve_size += 1,
                    Import::Symbols(import) => {
                        if import.symbols.is_empty() {
                            // Add all symbols from the other module
                            match module_lookup.get(&import.module_name) {
                                Some(target_module_id) => {
                                    lookup_reserve_size += heap[*target_module_id].definitions.len()
                                },
                                None => {
                                    return Err(
                                        ParseError2::new_error(&module.source, import.position, "Cannot resolve module")
                                    );
                                }
                            }
                        }
                    }
                }
            }
        }

        let mut table = Self{
            module_lookup,
            symbol_lookup: HashMap::with_capacity(lookup_reserve_size)
        };

        // First pass: we go through all of the modules and add lookups to
        // symbols that are defined within that module. Cross-module imports are
        // not yet resolved
        for module in modules {
            let root = &heap[module.root_id];
            for definition_id in &root.definitions {
                let definition = &heap[*definition_id];
                let identifier = definition.identifier();
                if let Err(previous_position) = table.add_definition_symbol(
                    module.root_id, identifier.position, &identifier.value,
                    module.root_id, *definition_id
                ) {
                    return Err(
                        ParseError2::new_error(&module.source, definition.position(), "Symbol is multiply defined")
                            .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                    )
                }
            }
        }

        // Second pass: now that we can find symbols in modules, we can resolve
        // all imports (if they're correct, that is)
        for module in modules {
            let root = &heap[module.root_id];
            for import_id in &root.imports {
                let import = &heap[*import_id];
                match import {
                    Import::Module(import) => {
                        // Find the module using its name
                        let target_root_id = table.resolve_module(&import.module_name);
                        if target_root_id.is_none() {
                            return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module"));
                        }
                        let target_root_id = target_root_id.unwrap();
                        if target_root_id == module.root_id {
                            return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self"));
                        }

                        // Add the target module under its alias
                        if let Err(previous_position) = table.add_namespace_symbol(
                            module.root_id, import.position,
                            &import.alias, target_root_id
                        ) {
                            return Err(
                                ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined")
                                    .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                            );
                        }
                    },
                    Import::Symbols(import) => {
                        // Find the target module using its name
                        let target_root_id = table.resolve_module(&import.module_name);
                        if target_root_id.is_none() {
                            return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module of symbol imports"));
                        }
                        let target_root_id = target_root_id.unwrap();
                        if target_root_id == module.root_id {
                            return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self"));
                        }

                        // Determine which symbols to import
                        if import.symbols.is_empty() {
                            // Import of all symbols, not using any aliases
                            for definition_id in &heap[target_root_id].definitions {
                                let definition = &heap[*definition_id];
                                let identifier = definition.identifier();
                                if let Err(previous_position) = table.add_definition_symbol(
                                    module.root_id, import.position, &identifier.value,
                                    target_root_id, *definition_id
                                ) {
                                    return Err(
                                        ParseError2::new_error(
                                            &module.source, import.position,
                                            &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value))
                                        )
                                        .with_postfixed_info(
                                            &modules[target_root_id.0.index as usize].source,
                                            definition.position(),
                                            "The imported symbol is defined here"
                                        )
                                        .with_postfixed_info(
                                            &module.source, previous_position, "And is previously defined here"
                                        )
                                    )
                                }
                            }
                        } else {
                            // Import of specific symbols, optionally using aliases
                            for symbol in &import.symbols {
                                // Because we have already added per-module definitions, we can use
                                // the table to lookup this particular symbol. Note: within a single
                                // module a namespace-import and a symbol-import may not collide.
                                // Hence per-module symbols are unique.
                                // However: if we import a symbol from another module, we don't want
                                // to "import a module's imported symbol". And so if we do find
                                // a symbol match, we need to make sure it is a definition from
                                // within that module by checking `source_root_id == target_root_id`
                                let target_symbol = table.resolve_symbol(target_root_id, &symbol.name);
                                let symbol_definition_id = match target_symbol {
                                    Some(target_symbol) => {
                                        match target_symbol.symbol {
                                            Symbol::Definition((symbol_root_id, symbol_definition_id)) => {
                                                if symbol_root_id == target_root_id {
                                                    Some(symbol_definition_id)
                                                } else {
                                                    // This is imported within the target module, and not
                                                    // defined within the target module
                                                    None
                                                }
                                            },
                                            Symbol::Namespace(_) => {
                                                // We don't import a module's "module import"
                                                None
                                            }
                                        }
                                    },
                                    None => None
                                };

                                if symbol_definition_id.is_none() {
                                    return Err(
                                        ParseError2::new_error(&module.source, symbol.position, "Could not resolve symbol")
                                    )
                                }
                                let symbol_definition_id = symbol_definition_id.unwrap();

                                if let Err(previous_position) = table.add_definition_symbol(
                                    module.root_id, symbol.position, &symbol.alias,
                                    target_root_id, symbol_definition_id
                                ) {
                                    return Err(
                                        ParseError2::new_error(&module.source, symbol.position, "Symbol is multiply defined")
                                            .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                                    )
                                }
                            }
                        }
                    }
                }
            }
        }

        debug_assert_eq!(
            table.symbol_lookup.len(), lookup_reserve_size,
            "miscalculated reserved size for symbol lookup table"
        );
        Ok(table)
    }

    /// Resolves a module by its defined name
    pub(crate) fn resolve_module(&self, identifier: &Vec<u8>) -> Option<RootId> {
        self.module_lookup.get(identifier).map(|v| *v)
    }

    /// Resolves a symbol within a particular module, indicated by its RootId,
    /// with a single non-namespaced identifier
    pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec<u8>) -> Option<&SymbolValue> {
        self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() })
    }

    /// Resolves a namespaced symbol. It will try to go as far as possible in
    /// actually finding a definition or a namespace. So a namespace might be
    /// resolved, after it which it finds an actual definition. It may be that
    /// the namespaced identifier has more elements that should be checked
    /// (i.e. an enum variant, or simply an erroneous instance of too many
    /// chained identifiers). This function will return None if nothing could be
    /// resolved at all.
    pub(crate) fn resolve_namespaced_symbol(&self, _within_module_id: RootId) {
        todo!("implement")
    }

    /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was
    /// inserted. If the symbol already exists then `Err` will be returned
    /// together with the previous definition's source position (in the origin
    /// module's source file).
    // Note: I would love to return a reference to the value, but Rust is
    // preventing me from doing so...
    fn add_namespace_symbol(
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>, target_module_id: RootId
    ) -> Result<(), InputPosition> {
        let key = SymbolKey{
            module_id: origin_module_id,
            symbol_name: symbol_name.clone()
        };
        match self.symbol_lookup.entry(key) {
            Entry::Occupied(o) => Err(o.get().position),
            Entry::Vacant(v) => {
                v.insert(SymbolValue{
                    position: origin_position,
                    symbol: Symbol::Namespace(target_module_id)
                });
                Ok(())
            }
        }
    }

    /// Attempts to add a definition symbol. Returns `Ok` if the symbol was
    /// inserted. If the symbol already exists then `Err` will be returned
    /// together with the previous definition's source position (in the origin
    /// module's source file).
    fn add_definition_symbol(
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>,
        target_module_id: RootId, target_definition_id: DefinitionId,
    ) -> Result<(), InputPosition> {
        let key = SymbolKey{
            module_id: origin_module_id,
            symbol_name: symbol_name.clone()
        };
        match self.symbol_lookup.entry(key) {
            Entry::Occupied(o) => Err(o.get().position),
            Entry::Vacant(v) => {
                v.insert(SymbolValue {
                    position: origin_position,
                    symbol: Symbol::Definition((target_module_id, target_definition_id))
                });
                Ok(())
            }
        }
    }
}