Files @ c9800c8f19d7
Branch filter:

Location: CSY/reowolf/src/protocol/parser/pass_symbols.rs - annotation

c9800c8f19d7 11.4 KiB application/rls-services+xml Show Source Show as Raw Download as Raw
mh
Rewrite tokenizer to emit markers iso ranges
ddddcd3cc9aa
012b61623f5a
012b61623f5a
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
560ed3c4dc1d
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
9b32fa307ceb
ddddcd3cc9aa
e2849e9bfb16
ddddcd3cc9aa
87aa65714efe
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
87aa65714efe
560ed3c4dc1d
87aa65714efe
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
560ed3c4dc1d
c9800c8f19d7
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
012b61623f5a
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
560ed3c4dc1d
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
87aa65714efe
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
560ed3c4dc1d
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
560ed3c4dc1d
ddddcd3cc9aa
012b61623f5a
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
560ed3c4dc1d
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
c9800c8f19d7
560ed3c4dc1d
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
c9800c8f19d7
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
fc987660fdee
fc987660fdee
87aa65714efe
87aa65714efe
17fe648a8934
fc987660fdee
ddddcd3cc9aa
012b61623f5a
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
637115283740
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
637115283740
ddddcd3cc9aa
ddddcd3cc9aa
637115283740
637115283740
ddddcd3cc9aa
637115283740
ddddcd3cc9aa
637115283740
c9800c8f19d7
ddddcd3cc9aa
ddddcd3cc9aa
637115283740
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
012b61623f5a
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
ddddcd3cc9aa
use crate::protocol::ast::*;
use super::symbol_table::*;
use crate::protocol::input_source::{ParseError, InputSpan};
use super::tokens::*;
use super::token_parsing::*;
use super::{Module, ModuleCompilationPhase, PassCtx};

/// Scans the module and finds all module-level type definitions. These will be
/// added to the symbol table such that during AST-construction we know which
/// identifiers point to types. Will also parse all pragmas to determine module
/// names.
pub(crate) struct PassSymbols {
    symbols: Vec<Symbol>,
    pragmas: Vec<PragmaId>,
    imports: Vec<ImportId>,
    definitions: Vec<DefinitionId>,
    buffer: String,
    has_pragma_version: bool,
    has_pragma_module: bool,
}

impl PassSymbols {
    pub(crate) fn new() -> Self {
        Self{
            symbols: Vec::with_capacity(128),
            pragmas: Vec::with_capacity(8),
            imports: Vec::with_capacity(32),
            definitions: Vec::with_capacity(128),
            buffer: String::with_capacity(128),
            has_pragma_version: false,
            has_pragma_module: false,
        }
    }

    fn reset(&mut self) {
        self.symbols.clear();
        self.pragmas.clear();
        self.imports.clear();
        self.definitions.clear();
        self.has_pragma_version = false;
        self.has_pragma_module = false;
    }

    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
        self.reset();

        let module = &mut modules[module_idx];
        let module_is_compiler_file = module.is_compiler_file;

        debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
        debug_assert!(module.root_id.is_invalid()); // not set yet

        // Preallocate root in the heap
        let root_id = ctx.heap.alloc_protocol_description(|this| {
            Root{
                this,
                pragmas: Vec::new(),
                imports: Vec::new(),
                definitions: Vec::new(),
            }
        });
        module.root_id = root_id;

        // Use pragma token markers to detects symbol definitions and pragmas
        let num_markers = module.tokens.markers.len();
        for marker_index in 0..num_markers {
            let module = &modules[module_idx];
            let marker = &module.tokens.markers[marker_index];

            // Parse if it is a definition or a pragma
            match marker.kind {
                TokenMarkerKind::Pragma => {
                    self.visit_pragma_marker(modules, module_idx, ctx, marker_index)?;
                },
                TokenMarkerKind::Definition => {
                    self.visit_definition_marker(modules, module_idx, ctx, marker_index)?;
                }
                TokenMarkerKind::Import => {}, // we don't care yet
            }
        }

        // Add the module's symbol scope and the symbols we just parsed
        let module_scope = SymbolScope::Module(root_id);
        ctx.symbols.insert_scope(Some(SymbolScope::Global), module_scope);
        for symbol in self.symbols.drain(..) {
            ctx.symbols.insert_scope(Some(module_scope), SymbolScope::Definition(symbol.variant.as_definition().definition_id));
            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol))
            }
        }

        if module_is_compiler_file {
            debug_assert!(self.symbols.is_empty());
            ctx.symbols.get_all_symbols_defined_in_scope(module_scope, &mut self.symbols);
            for symbol in self.symbols.drain(..) {
                ctx.symbols.insert_symbol_in_global_scope(symbol);
            }
        }

        // Modify the preallocated root
        let root = &mut ctx.heap[root_id];
        root.pragmas.extend(self.pragmas.drain(..));
        root.definitions.extend(self.definitions.drain(..));

        // Modify module
        let module = &mut modules[module_idx];
        module.phase = ModuleCompilationPhase::SymbolsScanned;

        Ok(())
    }

    fn visit_pragma_marker(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> {
        let module = &mut modules[module_idx];
        let marker = &module.tokens.markers[marker_index];
        let mut iter = module.tokens.iter_range(marker.first_token, None);

        // Consume pragma name
        let (pragma_section, mut pragma_span) = consume_pragma(&module.source, &mut iter)?;

        // Consume pragma values
        if pragma_section == b"#module" {
            // Check if name is defined twice within the same file
            if self.has_pragma_module {
                return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module name is defined twice"));
            }

            // Consume the domain-name, then record end of pragma
            let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
            let marker_last_token = iter.token_index();

            // Add to heap and symbol table
            pragma_span.end = module_span.end;
            let module_name = ctx.pool.intern(module_name);
            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                this,
                span: pragma_span,
                value: Identifier{ span: module_span, value: module_name.clone() },
            }));
            self.pragmas.push(pragma_id);

            if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name.clone(), module.root_id) {
                // Naming conflict
                let this_module = &modules[module_idx];
                let other_module = seek_module(modules, other_module_root_id).unwrap();
                let other_module_pragma_id = other_module.name.as_ref().map(|v| (*v).0).unwrap();
                let other_pragma = ctx.heap[other_module_pragma_id].as_module();
                return Err(ParseError::new_error_str_at_span(
                    &this_module.source, pragma_span, "conflict in module name"
                ).with_info_str_at_span(
                    &other_module.source, other_pragma.span, "other module is defined here"
                ));
            }

            let marker = &mut module.tokens.markers[marker_index];
            marker.last_token = marker_last_token;
            marker.handled = true;

            module.name = Some((pragma_id, module_name));
            self.has_pragma_module = true;
        } else if pragma_section == b"#version" {
            // Check if version is defined twice within the same file
            if self.has_pragma_version {
                return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module version is defined twice"));
            }

            // Consume the version pragma
            let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
            let marker_last_token = iter.token_index();

            pragma_span.end = version_span.end;
            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
                this,
                span: pragma_span,
                version,
            }));
            self.pragmas.push(pragma_id);

            let marker = &mut module.tokens.markers[marker_index];
            marker.last_token = marker_last_token;
            marker.handled = true;

            module.version = Some((pragma_id, version as i64));
            self.has_pragma_version = true;
        } // else: custom pragma used for something else, will be handled later (or rejected with an error)

        Ok(())
    }

    fn visit_definition_marker(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> {
        let module = &modules[module_idx];
        let marker = &module.tokens.markers[marker_index];
        let mut iter = module.tokens.iter_range(marker.first_token, None);

        // First ident must be type of symbol
        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();

        // Retrieve identifier of definition
        let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
        let mut poly_vars = Vec::new();
        maybe_consume_comma_separated(
            TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
            |source, iter, ctx| consume_ident_interned(source, iter, ctx),
            &mut poly_vars, "a polymorphic variable", None
        )?;
        let ident_text = identifier.value.clone(); // because we need it later
        let ident_span = identifier.span.clone();

        // Reserve space in AST for definition and add it to the symbol table
        let definition_class;
        let ast_definition_id;
        match kw_text {
            KW_STRUCT => {
                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
                    StructDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Struct;
                ast_definition_id = struct_def_id.upcast();
            },
            KW_ENUM => {
                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
                    EnumDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Enum;
                ast_definition_id = enum_def_id.upcast();
            },
            KW_UNION => {
                let union_def_id = ctx.heap.alloc_union_definition(|this| {
                    UnionDefinition::new_empty(this, module.root_id, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Union;
                ast_definition_id = union_def_id.upcast()
            },
            KW_FUNCTION => {
                let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                    ProcedureDefinition::new_empty(this, module.root_id, ProcedureKind::Function, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Function;
                ast_definition_id = proc_def_id.upcast();
            },
            KW_PRIMITIVE | KW_COMPOSITE => {
                let procedure_kind = if kw_text == KW_PRIMITIVE {
                    ProcedureKind::Primitive
                } else {
                    ProcedureKind::Composite
                };
                let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                    ProcedureDefinition::new_empty(this, module.root_id, procedure_kind, identifier, poly_vars)
                });
                definition_class = DefinitionClass::Component;
                ast_definition_id = proc_def_id.upcast();
            },
            _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)),
        }

        let symbol = Symbol{
            name: ident_text,
            variant: SymbolVariant::Definition(SymbolDefinition{
                defined_in_module: module.root_id,
                defined_in_scope: SymbolScope::Module(module.root_id),
                identifier_span: ident_span,
                imported_at: None,
                class: definition_class,
                definition_id: ast_definition_id,
            }),
        };
        self.symbols.push(symbol);
        self.definitions.push(ast_definition_id);

        Ok(())
    }
}