Files @ b7d434ab8020
Branch filter:

Location: CSY/reowolf/src/protocol/parser/pass_symbols.rs

b7d434ab8020 11.2 KiB application/rls-services+xml Show Annotation Show as Raw Download as Raw
MH
Remove distinction between primitive/composite components
use crate::protocol::ast::*;
use super::symbol_table::*;
use crate::protocol::input_source::{ParseError, InputSpan};
use super::tokens::*;
use super::token_parsing::*;
use super::{Module, ModuleCompilationPhase, PassCtx};

/// Scans the module and finds all module-level type definitions. These will be
/// added to the symbol table such that during AST-construction we know which
/// identifiers point to types. Will also parse all pragmas to determine module
/// names.
pub(crate) struct PassSymbols {
    symbols: Vec<Symbol>,
    pragmas: Vec<PragmaId>,
    imports: Vec<ImportId>,
    definitions: Vec<DefinitionId>,
    buffer: String,
    has_pragma_version: bool,
    has_pragma_module: bool,
}

impl PassSymbols {
    pub(crate) fn new() -> Self {
        Self{
            symbols: Vec::with_capacity(128),
            pragmas: Vec::with_capacity(8),
            imports: Vec::with_capacity(32),
            definitions: Vec::with_capacity(128),
            buffer: String::with_capacity(128),
            has_pragma_version: false,
            has_pragma_module: false,
        }
    }

    fn reset(&mut self) {
        self.symbols.clear();
        self.pragmas.clear();
        self.imports.clear();
        self.definitions.clear();
        self.has_pragma_version = false;
        self.has_pragma_module = false;
    }

    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
        self.reset();

        let module = &mut modules[module_idx];
        let add_to_global_namespace = module.add_to_global_namespace;

        debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
        debug_assert!(module.root_id.is_invalid()); // not set yet

        // Preallocate root in the heap
        let root_id = ctx.heap.alloc_protocol_description(|this| {
            Root{
                this,
                pragmas: Vec::new(),
                imports: Vec::new(),
                definitions: Vec::new(),
            }
        });
        module.root_id = root_id;

        // Use pragma token markers to detects symbol definitions and pragmas
        let num_markers = module.tokens.markers.len();
        for marker_index in 0..num_markers {
            let module = &modules[module_idx];
            let marker = &module.tokens.markers[marker_index];

            // Parse if it is a definition or a pragma
            match marker.kind {
                TokenMarkerKind::Pragma => {
                    self.visit_pragma_marker(modules, module_idx, ctx, marker_index)?;
                },
                TokenMarkerKind::Definition => {
                    self.visit_definition_marker(modules, module_idx, ctx, marker_index)?;
                }
                TokenMarkerKind::Import => {}, // we don't care yet
            }
        }

        // Add the module's symbol scope and the symbols we just parsed
        let module_scope = SymbolScope::Module(root_id);
        ctx.symbols.insert_scope(Some(SymbolScope::Global), module_scope);
        for symbol in self.symbols.drain(..) {
            ctx.symbols.insert_scope(Some(module_scope), SymbolScope::Definition(symbol.variant.as_definition().definition_id));
            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol))
            }
        }

        if add_to_global_namespace {
            debug_assert!(self.symbols.is_empty());
            ctx.symbols.get_all_symbols_defined_in_scope(module_scope, &mut self.symbols);
            for symbol in self.symbols.drain(..) {
                ctx.symbols.insert_symbol_in_global_scope(symbol);
            }
        }

        // Modify the preallocated root
        let root = &mut ctx.heap[root_id];
        root.pragmas.extend(self.pragmas.drain(..));
        root.definitions.extend(self.definitions.drain(..));

        // Modify module
        let module = &mut modules[module_idx];
        module.phase = ModuleCompilationPhase::SymbolsScanned;

        Ok(())
    }

    fn visit_pragma_marker(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> {
        let module = &mut modules[module_idx];
        let marker = &module.tokens.markers[marker_index];
        let mut iter = module.tokens.iter_range(marker.first_token, None);

        // Consume pragma name
        let (pragma_section, mut pragma_span) = consume_pragma(&module.source, &mut iter)?;

        // Consume pragma values
        if pragma_section == b"#module" {
            // Check if name is defined twice within the same file
            if self.has_pragma_module {
                return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module name is defined twice"));
            }

            // Consume the domain-name, then record end of pragma
            let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
            let marker_last_token = iter.token_index();

            // Add to heap and symbol table
            pragma_span.end = module_span.end;
            let module_name = ctx.pool.intern(module_name);
            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                this,
                span: pragma_span,
                value: Identifier{ span: module_span, value: module_name.clone() },
            }));
            self.pragmas.push(pragma_id);

            if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name.clone(), module.root_id) {
                // Naming conflict
                let this_module = &modules[module_idx];
                let other_module = seek_module(modules, other_module_root_id).unwrap();
                let other_module_pragma_id = other_module.name.as_ref().map(|v| (*v).0).unwrap();
                let other_pragma = ctx.heap[other_module_pragma_id].as_module();
                return Err(ParseError::new_error_str_at_span(
                    &this_module.source, pragma_span, "conflict in module name"
                ).with_info_str_at_span(
                    &other_module.source, other_pragma.span, "other module is defined here"
                ));
            }

            let marker = &mut module.tokens.markers[marker_index];
            marker.last_token = marker_last_token;
            marker.handled = true;

            module.name = Some((pragma_id, module_name));
            self.has_pragma_module = true;
        } else if pragma_section == b"#version" {
            // Check if version is defined twice within the same file
            if self.has_pragma_version {
                return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module version is defined twice"));
            }

            // Consume the version pragma
            let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
            let marker_last_token = iter.token_index();

            pragma_span.end = version_span.end;
            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
                this,
                span: pragma_span,
                version,
            }));
            self.pragmas.push(pragma_id);

            let marker = &mut module.tokens.markers[marker_index];
            marker.last_token = marker_last_token;
            marker.handled = true;

            module.version = Some((pragma_id, version as i64));
            self.has_pragma_version = true;
        } // else: custom pragma used for something else, will be handled later (or rejected with an error)

        Ok(())
    }

    fn visit_definition_marker(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> {
        let module = &modules[module_idx];
        let marker = &module.tokens.markers[marker_index];
        let mut iter = module.tokens.iter_range(marker.first_token, None);

        // First ident must be type of symbol
        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();

        // Retrieve identifier of definition
        let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
        let mut poly_vars = Vec::new();
        maybe_consume_comma_separated(
            TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
            |source, iter, ctx| consume_ident_interned(source, iter, ctx),
            &mut poly_vars, "a polymorphic variable", None
        )?;
        let ident_text = identifier.value.clone(); // because we need it later
        let ident_span = identifier.span.clone();

        // Reserve space in AST for definition and add it to the symbol table
        let definition_class;
        let ast_definition_id;
        match kw_text {
            KW_STRUCT => {
                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
                    StructDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Struct;
                ast_definition_id = struct_def_id.upcast();
            },
            KW_ENUM => {
                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
                    EnumDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Enum;
                ast_definition_id = enum_def_id.upcast();
            },
            KW_UNION => {
                let union_def_id = ctx.heap.alloc_union_definition(|this| {
                    UnionDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Union;
                ast_definition_id = union_def_id.upcast()
            },
            KW_FUNCTION => {
                let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                    ProcedureDefinition::new_empty(this, module.root_id, ProcedureKind::Function, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Function;
                ast_definition_id = proc_def_id.upcast();
            },
            KW_COMPONENT => {
                let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                    ProcedureDefinition::new_empty(this, module.root_id, ProcedureKind::Component, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Component;
                ast_definition_id = proc_def_id.upcast();
            },
            _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)),
        }

        let symbol = Symbol{
            name: ident_text,
            variant: SymbolVariant::Definition(SymbolDefinition{
                defined_in_module: module.root_id,
                defined_in_scope: SymbolScope::Module(module.root_id),
                identifier_span: ident_span,
                imported_at: None,
                class: definition_class,
                definition_id: ast_definition_id,
            }),
        };
        self.symbols.push(symbol);
        self.definitions.push(ast_definition_id);

        Ok(())
    }
}