Files @ 33ea10021de4
Branch filter:

Location: CSY/reowolf/src/protocol/parser/symbol_table.rs - annotation

33ea10021de4 16.4 KiB application/rls-services+xml Show Source Show as Raw Download as Raw
mh
refactored identifiers, initial symbol table implementation
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
33ea10021de4
use crate::protocol::ast::*;
use crate::protocol::inputsource::*;

use std::collections::{HashMap, hash_map::Entry};
use crate::protocol::parser::LexedModule;

#[derive(PartialEq, Eq, Hash)]
struct SymbolKey {
    module_id: RootId,
    symbol_name: Vec<u8>,
}

pub(crate) enum Symbol {
    Namespace(RootId),
    Definition((RootId, DefinitionId)),
}

pub(crate) struct SymbolValue {
    // Position refers to the origin of the symbol definition (i.e. the module's
    // RootId that is in the key being used to lookup this value)
    position: InputPosition,
    symbol: Symbol,
}

impl SymbolValue {
    pub(crate) fn as_namespace(&self) -> Option<RootId> {
        match &self.symbol {
            Symbol::Namespace(root_id) => Some(*root_id),
            _ => None,
        }
    }

    pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> {
        match &self.symbol {
            Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)),
            _ => None,
        }
    }
}
/// `SymbolTable` is responsible for two parts of the parsing process: firstly
/// it ensures that there are no clashing symbol definitions within each file,
/// and secondly it will resolve all symbols within a module to their
/// appropriate definitions (in case of enums, functions, etc.) and namespaces
/// (currently only external modules can act as namespaces). If a symbol clashes
/// or if a symbol cannot be resolved this will be an error.
///
/// Within the compilation process the symbol table is responsible for resolving
/// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate
/// definition (i.e. not namespaces; as the language has no way to use
/// namespaces except for using them in namespaced identifiers).
// TODO: Maybe allow namespaced-aliased imports. It is currently not possible
//  to express the following:
//  import Module.Submodule as ModSub
//  import SubMod::{Symbol}
pub(crate) struct SymbolTable {
    // Lookup from module name (not any aliases) to the root id
    module_lookup: HashMap<Vec<u8>, RootId>,
    // Lookup from within a module, to a particular imported (potentially
    // aliased) or defined symbol. Basically speaking: if the source code of a
    // module contains correctly imported/defined symbols, then this lookup
    // will always return the corresponding definition
    symbol_lookup: HashMap<SymbolKey, SymbolValue>,
}

impl SymbolTable {
    pub(crate) fn new(heap: &Heap, modules: &[LexedModule]) -> Result<Self, ParseError2> {
        // Sanity check
        if cfg!(debug_assertions) {
            for (index, module) in modules.iter().enumerate() {
                debug_assert_eq!(
                    index, module.root_id.0.index as usize,
                    "module RootId does not correspond to LexedModule index"
                )
            }
        }

        // Preparation: create a lookup from module name to root id. This does
        // not take aliasing into account.
        let mut module_lookup = HashMap::with_capacity(modules.len());
        for module in modules {
            // TODO: Maybe put duplicate module name checking here?
            // TODO: @string
            module_lookup.insert(module.module_name.clone(), module.root_id);
        }

        // Preparation: determine total number of imports we will be inserting
        // into the lookup table. We could just iterate over the arena, but then
        // we don't know the source file the import belongs to.
        let mut lookup_reserve_size = 0;
        for module in modules {
            let module_root = &heap[module.root_id];
            for import_id in &module_root.imports {
                match &heap[*import_id] {
                    Import::Module(_) => lookup_reserve_size += 1,
                    Import::Symbols(import) => {
                        if import.symbols.is_empty() {
                            // Add all symbols from the other module
                            match module_lookup.get(&import.module_name) {
                                Some(target_module_id) => {
                                    lookup_reserve_size += heap[*target_module_id].definitions.len()
                                },
                                None => {
                                    return Err(
                                        ParseError2::new_error(&module.source, import.position, "Cannot resolve module")
                                    );
                                }
                            }
                        }
                    }
                }
            }
        }

        let mut table = Self{
            module_lookup,
            symbol_lookup: HashMap::with_capacity(lookup_reserve_size)
        };

        // First pass: we go through all of the modules and add lookups to
        // symbols that are defined within that module. Cross-module imports are
        // not yet resolved
        for module in modules {
            let root = &heap[module.root_id];
            for definition_id in &root.definitions {
                let definition = &heap[*definition_id];
                let identifier = definition.identifier();
                if let Err(previous_position) = table.add_definition_symbol(
                    module.root_id, identifier.position, &identifier.value,
                    module.root_id, *definition_id
                ) {
                    return Err(
                        ParseError2::new_error(&module.source, definition.position(), "Symbol is multiply defined")
                            .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                    )
                }
            }
        }

        // Second pass: now that we can find symbols in modules, we can resolve
        // all imports (if they're correct, that is)
        for module in modules {
            let root = &heap[module.root_id];
            for import_id in &root.imports {
                let import = &heap[*import_id];
                match import {
                    Import::Module(import) => {
                        // Find the module using its name
                        let target_root_id = table.resolve_module(&import.module_name);
                        if target_root_id.is_none() {
                            return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module"));
                        }
                        let target_root_id = target_root_id.unwrap();
                        if target_root_id == module.root_id {
                            return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self"));
                        }

                        // Add the target module under its alias
                        if let Err(previous_position) = table.add_namespace_symbol(
                            module.root_id, import.position,
                            &import.alias, target_root_id
                        ) {
                            return Err(
                                ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined")
                                    .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                            );
                        }
                    },
                    Import::Symbols(import) => {
                        // Find the target module using its name
                        let target_root_id = table.resolve_module(&import.module_name);
                        if target_root_id.is_none() {
                            return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module of symbol imports"));
                        }
                        let target_root_id = target_root_id.unwrap();
                        if target_root_id == module.root_id {
                            return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self"));
                        }

                        // Determine which symbols to import
                        if import.symbols.is_empty() {
                            // Import of all symbols, not using any aliases
                            for definition_id in &heap[target_root_id].definitions {
                                let definition = &heap[*definition_id];
                                let identifier = definition.identifier();
                                if let Err(previous_position) = table.add_definition_symbol(
                                    module.root_id, import.position, &identifier.value,
                                    target_root_id, *definition_id
                                ) {
                                    return Err(
                                        ParseError2::new_error(
                                            &module.source, import.position,
                                            &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value))
                                        )
                                        .with_postfixed_info(
                                            &modules[target_root_id.0.index as usize].source,
                                            definition.position(),
                                            "The imported symbol is defined here"
                                        )
                                        .with_postfixed_info(
                                            &module.source, previous_position, "And is previously defined here"
                                        )
                                    )
                                }
                            }
                        } else {
                            // Import of specific symbols, optionally using aliases
                            for symbol in &import.symbols {
                                // Because we have already added per-module definitions, we can use
                                // the table to lookup this particular symbol. Note: within a single
                                // module a namespace-import and a symbol-import may not collide.
                                // Hence per-module symbols are unique.
                                // However: if we import a symbol from another module, we don't want
                                // to "import a module's imported symbol". And so if we do find
                                // a symbol match, we need to make sure it is a definition from
                                // within that module by checking `source_root_id == target_root_id`
                                let target_symbol = table.resolve_symbol(target_root_id, &symbol.name);
                                let symbol_definition_id = match target_symbol {
                                    Some(target_symbol) => {
                                        match target_symbol.symbol {
                                            Symbol::Definition((symbol_root_id, symbol_definition_id)) => {
                                                if symbol_root_id == target_root_id {
                                                    Some(symbol_definition_id)
                                                } else {
                                                    // This is imported within the target module, and not
                                                    // defined within the target module
                                                    None
                                                }
                                            },
                                            Symbol::Namespace(_) => {
                                                // We don't import a module's "module import"
                                                None
                                            }
                                        }
                                    },
                                    None => None
                                };

                                if symbol_definition_id.is_none() {
                                    return Err(
                                        ParseError2::new_error(&module.source, symbol.position, "Could not resolve symbol")
                                    )
                                }
                                let symbol_definition_id = symbol_definition_id.unwrap();

                                if let Err(previous_position) = table.add_definition_symbol(
                                    module.root_id, symbol.position, &symbol.alias,
                                    target_root_id, symbol_definition_id
                                ) {
                                    return Err(
                                        ParseError2::new_error(&module.source, symbol.position, "Symbol is multiply defined")
                                            .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                                    )
                                }
                            }
                        }
                    }
                }
            }
        }

        debug_assert_eq!(
            table.symbol_lookup.len(), lookup_reserve_size,
            "miscalculated reserved size for symbol lookup table"
        );
        Ok(table)
    }

    /// Resolves a module by its defined name
    pub(crate) fn resolve_module(&self, identifier: &Vec<u8>) -> Option<RootId> {
        self.module_lookup.get(identifier).map(|v| *v)
    }

    /// Resolves a symbol within a particular module, indicated by its RootId,
    /// with a single non-namespaced identifier
    pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec<u8>) -> Option<&SymbolValue> {
        self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() })
    }

    /// Resolves a namespaced symbol. It will try to go as far as possible in
    /// actually finding a definition or a namespace. So a namespace might be
    /// resolved, after it which it finds an actual definition. It may be that
    /// the namespaced identifier has more elements that should be checked
    /// (i.e. an enum variant, or simply an erroneous instance of too many
    /// chained identifiers). This function will return None if nothing could be
    /// resolved at all.
    pub(crate) fn resolve_namespaced_symbol(&self, _within_module_id: RootId) {
        todo!("implement")
    }

    /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was
    /// inserted. If the symbol already exists then `Err` will be returned
    /// together with the previous definition's source position (in the origin
    /// module's source file).
    // Note: I would love to return a reference to the value, but Rust is
    // preventing me from doing so...
    fn add_namespace_symbol(
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>, target_module_id: RootId
    ) -> Result<(), InputPosition> {
        let key = SymbolKey{
            module_id: origin_module_id,
            symbol_name: symbol_name.clone()
        };
        match self.symbol_lookup.entry(key) {
            Entry::Occupied(o) => Err(o.get().position),
            Entry::Vacant(v) => {
                v.insert(SymbolValue{
                    position: origin_position,
                    symbol: Symbol::Namespace(target_module_id)
                });
                Ok(())
            }
        }
    }

    /// Attempts to add a definition symbol. Returns `Ok` if the symbol was
    /// inserted. If the symbol already exists then `Err` will be returned
    /// together with the previous definition's source position (in the origin
    /// module's source file).
    fn add_definition_symbol(
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>,
        target_module_id: RootId, target_definition_id: DefinitionId,
    ) -> Result<(), InputPosition> {
        let key = SymbolKey{
            module_id: origin_module_id,
            symbol_name: symbol_name.clone()
        };
        match self.symbol_lookup.entry(key) {
            Entry::Occupied(o) => Err(o.get().position),
            Entry::Vacant(v) => {
                v.insert(SymbolValue {
                    position: origin_position,
                    symbol: Symbol::Definition((target_module_id, target_definition_id))
                });
                Ok(())
            }
        }
    }
}