use crate::protocol::ast::*; use super::symbol_table::*; use crate::protocol::input_source::{ParseError, InputSpan}; use super::tokens::*; use super::token_parsing::*; use super::{Module, ModuleCompilationPhase, PassCtx}; /// Scans the module and finds all module-level type definitions. These will be /// added to the symbol table such that during AST-construction we know which /// identifiers point to types. Will also parse all pragmas to determine module /// names. pub(crate) struct PassSymbols { symbols: Vec, pragmas: Vec, imports: Vec, definitions: Vec, buffer: String, has_pragma_version: bool, has_pragma_module: bool, } impl PassSymbols { pub(crate) fn new() -> Self { Self{ symbols: Vec::with_capacity(128), pragmas: Vec::with_capacity(8), imports: Vec::with_capacity(32), definitions: Vec::with_capacity(128), buffer: String::with_capacity(128), has_pragma_version: false, has_pragma_module: false, } } fn reset(&mut self) { self.symbols.clear(); self.pragmas.clear(); self.imports.clear(); self.definitions.clear(); self.has_pragma_version = false; self.has_pragma_module = false; } pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> { self.reset(); let module = &mut modules[module_idx]; let module_is_compiler_file = module.is_compiler_file; debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized); debug_assert!(module.root_id.is_invalid()); // not set yet // Preallocate root in the heap let root_id = ctx.heap.alloc_protocol_description(|this| { Root{ this, pragmas: Vec::new(), imports: Vec::new(), definitions: Vec::new(), } }); module.root_id = root_id; // Use pragma token markers to detects symbol definitions and pragmas let num_markers = module.tokens.markers.len(); for marker_index in 0..num_markers { let module = &modules[module_idx]; let marker = &module.tokens.markers[marker_index]; // Parse if it is a definition or a pragma match marker.kind { TokenMarkerKind::Pragma => { self.visit_pragma_marker(modules, module_idx, ctx, marker_index)?; }, TokenMarkerKind::Definition => { self.visit_definition_marker(modules, module_idx, ctx, marker_index)?; } TokenMarkerKind::Import => {}, // we don't care yet } } // Add the module's symbol scope and the symbols we just parsed let module_scope = SymbolScope::Module(root_id); ctx.symbols.insert_scope(Some(SymbolScope::Global), module_scope); for symbol in self.symbols.drain(..) { ctx.symbols.insert_scope(Some(module_scope), SymbolScope::Definition(symbol.variant.as_definition().definition_id)); if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) { return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol)) } } if module_is_compiler_file { debug_assert!(self.symbols.is_empty()); ctx.symbols.get_all_symbols_defined_in_scope(module_scope, &mut self.symbols); for symbol in self.symbols.drain(..) { ctx.symbols.insert_symbol_in_global_scope(symbol); } } // Modify the preallocated root let root = &mut ctx.heap[root_id]; root.pragmas.extend(self.pragmas.drain(..)); root.definitions.extend(self.definitions.drain(..)); // Modify module let module = &mut modules[module_idx]; module.phase = ModuleCompilationPhase::SymbolsScanned; Ok(()) } fn visit_pragma_marker(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> { let module = &mut modules[module_idx]; let marker = &module.tokens.markers[marker_index]; let mut iter = module.tokens.iter_range(marker.first_token, None); // Consume pragma name let (pragma_section, mut pragma_span) = consume_pragma(&module.source, &mut iter)?; // Consume pragma values if pragma_section == b"#module" { // Check if name is defined twice within the same file if self.has_pragma_module { return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module name is defined twice")); } // Consume the domain-name, then record end of pragma let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?; let marker_last_token = iter.token_index(); // Add to heap and symbol table pragma_span.end = module_span.end; let module_name = ctx.pool.intern(module_name); let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{ this, span: pragma_span, value: Identifier{ span: module_span, value: module_name.clone() }, })); self.pragmas.push(pragma_id); if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name.clone(), module.root_id) { // Naming conflict let this_module = &modules[module_idx]; let other_module = seek_module(modules, other_module_root_id).unwrap(); let other_module_pragma_id = other_module.name.as_ref().map(|v| (*v).0).unwrap(); let other_pragma = ctx.heap[other_module_pragma_id].as_module(); return Err(ParseError::new_error_str_at_span( &this_module.source, pragma_span, "conflict in module name" ).with_info_str_at_span( &other_module.source, other_pragma.span, "other module is defined here" )); } let marker = &mut module.tokens.markers[marker_index]; marker.last_token = marker_last_token; marker.handled = true; module.name = Some((pragma_id, module_name)); self.has_pragma_module = true; } else if pragma_section == b"#version" { // Check if version is defined twice within the same file if self.has_pragma_version { return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module version is defined twice")); } // Consume the version pragma let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?; let marker_last_token = iter.token_index(); pragma_span.end = version_span.end; let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{ this, span: pragma_span, version, })); self.pragmas.push(pragma_id); let marker = &mut module.tokens.markers[marker_index]; marker.last_token = marker_last_token; marker.handled = true; module.version = Some((pragma_id, version as i64)); self.has_pragma_version = true; } // else: custom pragma used for something else, will be handled later (or rejected with an error) Ok(()) } fn visit_definition_marker(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> { let module = &modules[module_idx]; let marker = &module.tokens.markers[marker_index]; let mut iter = module.tokens.iter_range(marker.first_token, None); // First ident must be type of symbol let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap(); // Retrieve identifier of definition let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?; let mut poly_vars = Vec::new(); maybe_consume_comma_separated( TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx, |source, iter, ctx| consume_ident_interned(source, iter, ctx), &mut poly_vars, "a polymorphic variable", None )?; let ident_text = identifier.value.clone(); // because we need it later let ident_span = identifier.span.clone(); // Reserve space in AST for definition and add it to the symbol table let definition_class; let ast_definition_id; match kw_text { KW_STRUCT => { let struct_def_id = ctx.heap.alloc_struct_definition(|this| { StructDefinition::new_empty(this, module.root_id, identifier, poly_vars) }); definition_class = DefinitionClass::Struct; ast_definition_id = struct_def_id.upcast(); }, KW_ENUM => { let enum_def_id = ctx.heap.alloc_enum_definition(|this| { EnumDefinition::new_empty(this, module.root_id, identifier, poly_vars) }); definition_class = DefinitionClass::Enum; ast_definition_id = enum_def_id.upcast(); }, KW_UNION => { let union_def_id = ctx.heap.alloc_union_definition(|this| { UnionDefinition::new_empty(this, module.root_id, identifier, poly_vars) }); definition_class = DefinitionClass::Union; ast_definition_id = union_def_id.upcast() }, KW_FUNCTION => { let proc_def_id = ctx.heap.alloc_procedure_definition(|this| { ProcedureDefinition::new_empty(this, module.root_id, ProcedureKind::Function, identifier, poly_vars) }); definition_class = DefinitionClass::Function; ast_definition_id = proc_def_id.upcast(); }, KW_PRIMITIVE | KW_COMPOSITE => { let procedure_kind = if kw_text == KW_PRIMITIVE { ProcedureKind::Primitive } else { ProcedureKind::Composite }; let proc_def_id = ctx.heap.alloc_procedure_definition(|this| { ProcedureDefinition::new_empty(this, module.root_id, procedure_kind, identifier, poly_vars) }); definition_class = DefinitionClass::Component; ast_definition_id = proc_def_id.upcast(); }, _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)), } let symbol = Symbol{ name: ident_text, variant: SymbolVariant::Definition(SymbolDefinition{ defined_in_module: module.root_id, defined_in_scope: SymbolScope::Module(module.root_id), identifier_span: ident_span, imported_at: None, class: definition_class, definition_id: ast_definition_id, }), }; self.symbols.push(symbol); self.definitions.push(ast_definition_id); Ok(()) } }