diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs index 28d7ba5fce047bdc25d8133f5b7cb6beb42f66e8..3c3b628f105947d8748d9318ac4a237d1606e4fe 100644 --- a/src/protocol/parser/pass_symbols.rs +++ b/src/protocol/parser/pass_symbols.rs @@ -45,11 +45,10 @@ impl PassSymbols { self.reset(); let module = &mut modules[module_idx]; - let module_range = &module.tokens.ranges[0]; + let module_is_compiler_file = module.is_compiler_file; debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized); - debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module); - debug_assert!(module.root_id.is_invalid()); // not set yet, + debug_assert!(module.root_id.is_invalid()); // not set yet // Preallocate root in the heap let root_id = ctx.heap.alloc_protocol_description(|this| { @@ -62,28 +61,21 @@ impl PassSymbols { }); module.root_id = root_id; - // Retrieve first range index, then make immutable borrow - let mut range_idx = module_range.first_child_idx; - - // Visit token ranges to detect definitions and pragmas - loop { + // Use pragma token markers to detects symbol definitions and pragmas + let num_markers = module.tokens.markers.len(); + for marker_index in 0..num_markers { let module = &modules[module_idx]; - let range_idx_usize = range_idx as usize; - let cur_range = &module.tokens.ranges[range_idx_usize]; - let next_sibling_idx = cur_range.next_sibling_idx; - let range_kind = cur_range.range_kind; + let marker = &module.tokens.markers[marker_index]; // Parse if it is a definition or a pragma - if range_kind == TokenRangeKind::Definition { - self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?; - } else if range_kind == TokenRangeKind::Pragma { - self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?; - } - - if next_sibling_idx == NO_SIBLING { - break; - } else { - range_idx = next_sibling_idx; + match marker.kind { + TokenMarkerKind::Pragma => { + self.visit_pragma_marker(modules, module_idx, ctx, marker_index)?; + }, + TokenMarkerKind::Definition => { + self.visit_definition_marker(modules, module_idx, ctx, marker_index)?; + } + TokenMarkerKind::Import => {}, // we don't care yet } } @@ -97,6 +89,14 @@ impl PassSymbols { } } + if module_is_compiler_file { + debug_assert!(self.symbols.is_empty()); + ctx.symbols.get_all_symbols_defined_in_scope(module_scope, &mut self.symbols); + for symbol in self.symbols.drain(..) { + ctx.symbols.insert_symbol_in_global_scope(symbol); + } + } + // Modify the preallocated root let root = &mut ctx.heap[root_id]; root.pragmas.extend(self.pragmas.drain(..)); @@ -109,32 +109,27 @@ impl PassSymbols { Ok(()) } - fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> { + fn visit_pragma_marker(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> { let module = &mut modules[module_idx]; - let range = &module.tokens.ranges[range_idx]; - let mut iter = module.tokens.iter_range(range.start, module.tokens.tokens.len() as u32); + let marker = &module.tokens.markers[marker_index]; + let mut iter = module.tokens.iter_range(marker.first_token, None); // Consume pragma name - let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?; + let (pragma_section, mut pragma_span) = consume_pragma(&module.source, &mut iter)?; // Consume pragma values if pragma_section == b"#module" { // Check if name is defined twice within the same file if self.has_pragma_module { - return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice")); + return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module name is defined twice")); } - // Consume the domain-name + // Consume the domain-name, then record end of pragma let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?; - - // TODO: Fix with newer token range parsing - module.tokens.ranges[range_idx as usize].end = iter.token_index(); - // if iter.next().is_some() { - // return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name")); - // } + let marker_last_token = iter.token_index(); // Add to heap and symbol table - let pragma_span = InputSpan::from_positions(pragma_start, module_span.end); + pragma_span.end = module_span.end; let module_name = ctx.pool.intern(module_name); let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{ this, @@ -156,49 +151,51 @@ impl PassSymbols { )); } + let marker = &mut module.tokens.markers[marker_index]; + marker.last_token = marker_last_token; + marker.handled = true; + module.name = Some((pragma_id, module_name)); self.has_pragma_module = true; } else if pragma_section == b"#version" { // Check if version is defined twice within the same file if self.has_pragma_version { - return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module version is defined twice")); + return Err(ParseError::new_error_str_at_span(&module.source, pragma_span, "module version is defined twice")); } // Consume the version pragma let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?; + let marker_last_token = iter.token_index(); + + pragma_span.end = version_span.end; let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{ this, - span: InputSpan::from_positions(pragma_start, version_span.end), + span: pragma_span, version, })); self.pragmas.push(pragma_id); + let marker = &mut module.tokens.markers[marker_index]; + marker.last_token = marker_last_token; + marker.handled = true; + module.version = Some((pragma_id, version as i64)); self.has_pragma_version = true; - } else { - // Custom pragma, maybe we support this in the future, but for now - // we don't. - return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "illegal pragma name")); - } + } // else: custom pragma used for something else, will be handled later (or rejected with an error) Ok(()) } - fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> { + fn visit_definition_marker(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, marker_index: usize) -> Result<(), ParseError> { let module = &modules[module_idx]; - let range = &module.tokens.ranges[range_idx]; - let definition_span = InputSpan::from_positions( - module.tokens.start_pos(range), - module.tokens.end_pos(range) - ); - let mut iter = module.tokens.iter_range(range.start, range.end); + let marker = &module.tokens.markers[marker_index]; + let mut iter = module.tokens.iter_range(marker.first_token, None); // First ident must be type of symbol let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap(); // Retrieve identifier of definition let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?; - println!("DEBUG: Parsing {} --- {}", String::from_utf8_lossy(kw_text).to_string(), identifier.value.as_str()); let mut poly_vars = Vec::new(); maybe_consume_comma_separated( TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx, @@ -214,28 +211,28 @@ impl PassSymbols { match kw_text { KW_STRUCT => { let struct_def_id = ctx.heap.alloc_struct_definition(|this| { - StructDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) + StructDefinition::new_empty(this, module.root_id, identifier, poly_vars) }); definition_class = DefinitionClass::Struct; ast_definition_id = struct_def_id.upcast(); }, KW_ENUM => { let enum_def_id = ctx.heap.alloc_enum_definition(|this| { - EnumDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) + EnumDefinition::new_empty(this, module.root_id, identifier, poly_vars) }); definition_class = DefinitionClass::Enum; ast_definition_id = enum_def_id.upcast(); }, KW_UNION => { let union_def_id = ctx.heap.alloc_union_definition(|this| { - UnionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) + UnionDefinition::new_empty(this, module.root_id, identifier, poly_vars) }); definition_class = DefinitionClass::Union; ast_definition_id = union_def_id.upcast() }, KW_FUNCTION => { let proc_def_id = ctx.heap.alloc_procedure_definition(|this| { - ProcedureDefinition::new_empty(this, module.root_id, definition_span, ProcedureKind::Function, identifier, poly_vars) + ProcedureDefinition::new_empty(this, module.root_id, ProcedureKind::Function, identifier, poly_vars) }); definition_class = DefinitionClass::Function; ast_definition_id = proc_def_id.upcast(); @@ -247,7 +244,7 @@ impl PassSymbols { ProcedureKind::Composite }; let proc_def_id = ctx.heap.alloc_procedure_definition(|this| { - ProcedureDefinition::new_empty(this, module.root_id, definition_span, procedure_kind, identifier, poly_vars) + ProcedureDefinition::new_empty(this, module.root_id, procedure_kind, identifier, poly_vars) }); definition_class = DefinitionClass::Component; ast_definition_id = proc_def_id.upcast(); @@ -260,7 +257,6 @@ impl PassSymbols { variant: SymbolVariant::Definition(SymbolDefinition{ defined_in_module: module.root_id, defined_in_scope: SymbolScope::Module(module.root_id), - definition_span, identifier_span: ident_span, imported_at: None, class: definition_class,