CSY/reowolf Changeset - 6d6c5b5f07ae · Centrum Wiskunde & Informatica (CWI)

Changeset - 6d6c5b5f07ae

Parent rev.

Child rev.

[Not reviewed]

1 6 0

MH - 4 years ago 2022-03-28 22:18:20
contact@maxhenger.nl

Attempting to fix token tree construction

7 files changed with 24 insertions and 514 deletions:

src/protocol/parser/pass_definitions.rs

src/protocol/parser/pass_imports.rs

src/protocol/parser/pass_symbols.rs

src/protocol/parser/pass_tokenizer.rs

src/protocol/parser/tokens.rs

std/std.global.pdl

tokens.txt

500

0 comments (0 inline, 0 general)

src/protocol/parser/pass_definitions.rs

➞

Show inline comments

 use crate::protocol::ast::*;
 use super::symbol_table::*;
 use super::{Module, ModuleCompilationPhase, PassCtx};
 use super::tokens::*;
 use super::token_parsing::*;
 use super::pass_definitions_types::*;
 use crate::protocol::input_source::{InputSource, InputPosition, InputSpan, ParseError};
 use crate::collections::*;
 /// Parses all the tokenized definitions into actual AST nodes.
 pub(crate) struct PassDefinitions {
     // State associated with the definition currently being processed
     cur_definition: DefinitionId,
     // Itty bitty parsing machines
     type_parser: ParserTypeParser,
     // Temporary buffers of various kinds
     buffer: String,
     struct_fields: ScopedBuffer<StructFieldDefinition>,
     enum_variants: ScopedBuffer<EnumVariantDefinition>,
     union_variants: ScopedBuffer<UnionVariantDefinition>,
     variables: ScopedBuffer<VariableId>,
     expressions: ScopedBuffer<ExpressionId>,
     statements: ScopedBuffer<StatementId>,
     parser_types: ScopedBuffer<ParserType>,
+}
 impl PassDefinitions {
     pub(crate) fn new() -> Self {
         Self{
             cur_definition: DefinitionId::new_invalid(),
             type_parser: ParserTypeParser::new(),
             buffer: String::with_capacity(128),
             struct_fields: ScopedBuffer::with_capacity(128),
             enum_variants: ScopedBuffer::with_capacity(128),
             union_variants: ScopedBuffer::with_capacity(128),
             variables: ScopedBuffer::with_capacity(128),
             expressions: ScopedBuffer::with_capacity(128),
             statements: ScopedBuffer::with_capacity(128),
             parser_types: ScopedBuffer::with_capacity(128),
+        }
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         // Although we only need to parse the definitions, we want to go through
         // code ranges as well such that we can throw errors if we get
         // unexpected tokens at the module level of the source.
         let mut range_idx = module_range.first_child_idx;
         loop {
             let range_idx_usize = range_idx as usize;
             let cur_range = &module.tokens.ranges[range_idx_usize];
             match cur_range.range_kind {
                 TokenRangeKind::Module => unreachable!(), // should not be reachable
                 TokenRangeKind::Pragma | TokenRangeKind::Import => {
                     // Already fully parsed, fall through and go to next range
                 },
                 TokenRangeKind::Definition | TokenRangeKind::Code => {
                     // Visit range even if it is a "code" range to provide
                     // proper error messages.
                     self.visit_range(modules, module_idx, ctx, range_idx_usize)?;
                 },
+            }
             if cur_range.next_sibling_idx == NO_SIBLING {
                 break;
             } else {
                 range_idx = cur_range.next_sibling_idx;
+            }
+        }
         modules[module_idx].phase = ModuleCompilationPhase::DefinitionsParsed;
         Ok(())
+    }
     fn visit_range(
         &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let cur_range = &module.tokens.ranges[range_idx];
         debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code);
         // Detect which definition we're parsing
         let mut iter = module.tokens.iter_range(cur_range);
+        let mut iter = module.tokens.iter_range(cur_range.start, cur_range.end);
         loop {
             let next = iter.next();
             if next.is_none() {
                 return Ok(())
+            }
             // Token was not None, so peek_ident returns None if not an ident
             let ident = peek_ident(&module.source, &mut iter);
             match ident {
                 Some(KW_STRUCT) => self.visit_struct_definition(module, &mut iter, ctx)?,
                 Some(KW_ENUM) => self.visit_enum_definition(module, &mut iter, ctx)?,
                 Some(KW_UNION) => self.visit_union_definition(module, &mut iter, ctx)?,
                 Some(KW_FUNCTION) => self.visit_function_definition(module, &mut iter, ctx)?,
                 Some(KW_PRIMITIVE) | Some(KW_COMPOSITE) => self.visit_component_definition(module, &mut iter, ctx)?,
                 _ => return Err(ParseError::new_error_str_at_pos(
                     &module.source, iter.last_valid_pos(),
                     "unexpected symbol, expected a keyword marking the start of a definition"
                 )),
+            }
+        }
+    }
     fn visit_struct_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         consume_exact_ident(&module.source, iter, KW_STRUCT)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         // Parse struct definition
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         let mut fields_section = self.struct_fields.start_section();
         consume_comma_separated(
             TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
             |source, iter, ctx| {
                 let poly_vars = ctx.heap[definition_id].poly_vars();
                 let start_pos = iter.last_valid_pos();
                 let parser_type = self.type_parser.consume_parser_type(
                     iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id,
                     module_scope, false, None
                 )?;
                 let field = consume_ident_interned(source, iter, ctx)?;
                 Ok(StructFieldDefinition{
                     span: InputSpan::from_positions(start_pos, field.span.end),
                     field, parser_type
                 })
             },
             &mut fields_section, "a struct field", "a list of struct fields", None
         )?;
         // Transfer to preallocated definition
         let struct_def = ctx.heap[definition_id].as_struct_mut();
         struct_def.fields = fields_section.into_vec();
         Ok(())
+    }
     fn visit_enum_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         consume_exact_ident(&module.source, iter, KW_ENUM)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         // Parse enum definition
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         let mut enum_section = self.enum_variants.start_section();
         consume_comma_separated(
             TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
             |source, iter, ctx| {
                 let identifier = consume_ident_interned(source, iter, ctx)?;
                 let value = if iter.next() == Some(TokenKind::Equal) {
                     iter.consume();
                     let (variant_number, _) = consume_integer_literal(source, iter, &mut self.buffer)?;
                     EnumVariantValue::Integer(variant_number as i64) // TODO: @int
                 } else {
                     EnumVariantValue::None
                 };
                 Ok(EnumVariantDefinition{ identifier, value })
             },
             &mut enum_section, "an enum variant", "a list of enum variants", None
         )?;

src/protocol/parser/pass_imports.rs

➞

Show inline comments

 use crate::protocol::ast::*;
 use super::symbol_table::*;
 use super::{Module, ModuleCompilationPhase, PassCtx};
 use super::tokens::*;
 use super::token_parsing::*;
 use crate::protocol::input_source::{InputSource as InputSource, InputSpan, ParseError};
 use crate::collections::*;
 /// Parses all the imports in the module tokens. Is applied after the
 /// definitions and name of modules are resolved. Hence we should be able to
 /// resolve all symbols to their appropriate module/definition.
 pub(crate) struct PassImport {
     imports: Vec<ImportId>,
     found_symbols: Vec<(AliasedSymbol, SymbolDefinition)>,
     scoped_symbols: Vec<Symbol>,
+}
 impl PassImport {
     pub(crate) fn new() -> Self {
         Self{
             imports: Vec::with_capacity(32),
             found_symbols: Vec::with_capacity(32),
             scoped_symbols: Vec::with_capacity(32),
+        }
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::SymbolsScanned));
         debug_assert_eq!(module.phase, ModuleCompilationPhase::SymbolsScanned);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         let mut range_idx = module_range.first_child_idx;
         loop {
             let range_idx_usize = range_idx as usize;
             let cur_range = &module.tokens.ranges[range_idx_usize];
             if cur_range.range_kind == TokenRangeKind::Import {
                 self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
             if cur_range.next_sibling_idx == NO_SIBLING {
                 break;
             } else {
                 range_idx = cur_range.next_sibling_idx;
+            }
+        }
         let root = &mut ctx.heap[module.root_id];
         root.imports.extend(self.imports.drain(..));
         let module = &mut modules[module_idx];
         module.phase = ModuleCompilationPhase::ImportsResolved;
         Ok(())
+    }
     pub(crate) fn visit_import_range(
         &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let import_range = &module.tokens.ranges[range_idx];
         debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
         let mut iter = module.tokens.iter_range(import_range);
+        let mut iter = module.tokens.iter_range(import_range.start, import_range.end);
         // Consume "import"
         let (_import_ident, import_span) =
             consume_any_ident(&module.source, &mut iter)?;
         debug_assert_eq!(_import_ident, KW_IMPORT);
         // Consume module name
         let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
         let target_root_id = ctx.symbols.get_module_by_name(module_name);
         if target_root_id.is_none() {
             return Err(ParseError::new_error_at_span(
                 &module.source, module_name_span,
                 format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
             ));
+        }
         let module_name = ctx.pool.intern(module_name);
         let module_identifier = Identifier{ span: module_name_span, value: module_name };
         let target_root_id = target_root_id.unwrap();
         // Check for subsequent characters (alias, multiple imported symbols)
         let next = iter.next();
         let import_id;
         if has_ident(&module.source, &mut iter, b"as") {
             // Alias for module
             iter.consume();
             let alias_identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
             let alias_name = alias_identifier.value.clone();
             import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
                 this,
                 span: import_span,
                 module: module_identifier,
                 alias: alias_identifier,
                 module_id: target_root_id
             }));
             if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
                 name: alias_name,
                 variant: SymbolVariant::Module(SymbolModule{
                     root_id: target_root_id,
                     introduced_at: import_id,
                 }),
             }) {
                 return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
+            }
         } else if Some(TokenKind::ColonColon) == next {
             iter.consume();
             // Helper function to consume symbols, their alias, and the
             // definition the symbol is pointing to.
             fn consume_symbol_and_maybe_alias<'a>(
                 source: &'a InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
                 module_name: &StringRef<'static>, module_root_id: RootId,
             ) -> Result<(AliasedSymbol, SymbolDefinition), ParseError> {
                 // Consume symbol name and make sure it points to an existing definition
                 let symbol_identifier = consume_ident_interned(source, iter, ctx)?;
                 // Consume alias text if specified
                 let alias_identifier = if peek_ident(source, iter) == Some(b"as") {
                     // Consume alias
                     iter.consume();
                     Some(consume_ident_interned(source, iter, ctx)?)
                 } else {
                     None
                 };
                 let target = ctx.symbols.get_symbol_by_name_defined_in_scope(
                     SymbolScope::Module(module_root_id), symbol_identifier.value.as_bytes()
                 );
                 if target.is_none() {
                     return Err(ParseError::new_error_at_span(
                         source, symbol_identifier.span,
                         format!(
                             "could not find symbol '{}' within module '{}'",
                             symbol_identifier.value.as_str(), module_name.as_str()
+                        )
                     ));
+                }
                 let target = target.unwrap();
                 debug_assert_ne!(target.class(), SymbolClass::Module);
                 let target_definition = target.variant.as_definition();
                 Ok((
                     AliasedSymbol{
                         name: symbol_identifier,
                         alias: alias_identifier,
                         definition_id: target_definition.definition_id,
                     },
                     target_definition.clone()
                 ))
+            }
             let next = iter.next();

src/protocol/parser/pass_symbols.rs

➞

Show inline comments

@@ @@ -19,254 +19,257 @@ pub(crate) struct PassSymbols { @@
     has_pragma_module: bool,
+}
 impl PassSymbols {
     pub(crate) fn new() -> Self {
         Self{
             symbols: Vec::with_capacity(128),
             pragmas: Vec::with_capacity(8),
             imports: Vec::with_capacity(32),
             definitions: Vec::with_capacity(128),
             buffer: String::with_capacity(128),
             has_pragma_version: false,
             has_pragma_module: false,
+        }
+    }
     fn reset(&mut self) {
         self.symbols.clear();
         self.pragmas.clear();
         self.imports.clear();
         self.definitions.clear();
         self.has_pragma_version = false;
         self.has_pragma_module = false;
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         self.reset();
         let module = &mut modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         debug_assert!(module.root_id.is_invalid()); // not set yet,
         // Preallocate root in the heap
         let root_id = ctx.heap.alloc_protocol_description(|this| {
             Root{
                 this,
                 pragmas: Vec::new(),
                 imports: Vec::new(),
                 definitions: Vec::new(),
+            }
         });
         module.root_id = root_id;
         // Retrieve first range index, then make immutable borrow
         let mut range_idx = module_range.first_child_idx;
         // Visit token ranges to detect definitions and pragmas
         loop {
             let module = &modules[module_idx];
             let range_idx_usize = range_idx as usize;
             let cur_range = &module.tokens.ranges[range_idx_usize];
             let next_sibling_idx = cur_range.next_sibling_idx;
             let range_kind = cur_range.range_kind;
             // Parse if it is a definition or a pragma
             if range_kind == TokenRangeKind::Definition {
                 self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
             } else if range_kind == TokenRangeKind::Pragma {
                 self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
             if next_sibling_idx == NO_SIBLING {
                 break;
             } else {
                 range_idx = next_sibling_idx;
+            }
+        }
         // Add the module's symbol scope and the symbols we just parsed
         let module_scope = SymbolScope::Module(root_id);
         ctx.symbols.insert_scope(Some(SymbolScope::Global), module_scope);
         for symbol in self.symbols.drain(..) {
             ctx.symbols.insert_scope(Some(module_scope), SymbolScope::Definition(symbol.variant.as_definition().definition_id));
             if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
                 return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol))
+            }
+        }
         // Modify the preallocated root
         let root = &mut ctx.heap[root_id];
         root.pragmas.extend(self.pragmas.drain(..));
         root.definitions.extend(self.definitions.drain(..));
         // Modify module
         let module = &mut modules[module_idx];
         module.phase = ModuleCompilationPhase::SymbolsScanned;
         Ok(())
+    }
     fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
         let module = &mut modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let mut iter = module.tokens.iter_range(range);
+        let mut iter = module.tokens.iter_range(range.start, module.tokens.tokens.len() as u32);
         // Consume pragma name
         let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?;
         // Consume pragma values
         if pragma_section == b"#module" {
             // Check if name is defined twice within the same file
             if self.has_pragma_module {
                 return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice"));
+            }
             // Consume the domain-name
             let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
             if iter.next().is_some() {
                 return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
+            }
             // TODO: Fix with newer token range parsing
             module.tokens.ranges[range_idx as usize].end = iter.token_index();
             // if iter.next().is_some() {
             //     return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
             // }
             // Add to heap and symbol table
             let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
             let module_name = ctx.pool.intern(module_name);
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                 this,
                 span: pragma_span,
                 value: Identifier{ span: module_span, value: module_name.clone() },
             }));
             self.pragmas.push(pragma_id);
             if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name.clone(), module.root_id) {
                 // Naming conflict
                 let this_module = &modules[module_idx];
                 let other_module = seek_module(modules, other_module_root_id).unwrap();
                 let other_module_pragma_id = other_module.name.as_ref().map(|v| (*v).0).unwrap();
                 let other_pragma = ctx.heap[other_module_pragma_id].as_module();
                 return Err(ParseError::new_error_str_at_span(
                     &this_module.source, pragma_span, "conflict in module name"
                 ).with_info_str_at_span(
                     &other_module.source, other_pragma.span, "other module is defined here"
                 ));
+            }
             module.name = Some((pragma_id, module_name));
             self.has_pragma_module = true;
         } else if pragma_section == b"#version" {
             // Check if version is defined twice within the same file
             if self.has_pragma_version {
                 return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module version is defined twice"));
+            }
             // Consume the version pragma
             let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
                 this,
                 span: InputSpan::from_positions(pragma_start, version_span.end),
                 version,
             }));
             self.pragmas.push(pragma_id);
             module.version = Some((pragma_id, version as i64));
             self.has_pragma_version = true;
         } else {
             // Custom pragma, maybe we support this in the future, but for now
             // we don't.
             return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "illegal pragma name"));
+        }
         Ok(())
+    }
     fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let definition_span = InputSpan::from_positions(
             module.tokens.start_pos(range),
             module.tokens.end_pos(range)
         );
         let mut iter = module.tokens.iter_range(range);
+        let mut iter = module.tokens.iter_range(range.start, range.end);
         // First ident must be type of symbol
         let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
         // Retrieve identifier of definition
         let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
         println!("DEBUG: Parsing {} --- {}", String::from_utf8_lossy(kw_text).to_string(), identifier.value.as_str());
         let mut poly_vars = Vec::new();
         maybe_consume_comma_separated(
             TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
             |source, iter, ctx| consume_ident_interned(source, iter, ctx),
             &mut poly_vars, "a polymorphic variable", None
         )?;
         let ident_text = identifier.value.clone(); // because we need it later
         let ident_span = identifier.span.clone();
         // Reserve space in AST for definition and add it to the symbol table
         let definition_class;
         let ast_definition_id;
         match kw_text {
             KW_STRUCT => {
                 let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
                     StructDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Struct;
                 ast_definition_id = struct_def_id.upcast();
             },
             KW_ENUM => {
                 let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
                     EnumDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Enum;
                 ast_definition_id = enum_def_id.upcast();
             },
             KW_UNION => {
                 let union_def_id = ctx.heap.alloc_union_definition(|this| {
                     UnionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Union;
                 ast_definition_id = union_def_id.upcast()
             },
             KW_FUNCTION => {
                 let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                     ProcedureDefinition::new_empty(this, module.root_id, definition_span, ProcedureKind::Function, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Function;
                 ast_definition_id = proc_def_id.upcast();
             },
             KW_PRIMITIVE | KW_COMPOSITE => {
                 let procedure_kind = if kw_text == KW_PRIMITIVE {
                     ProcedureKind::Primitive
                 } else {
                     ProcedureKind::Composite
                 };
                 let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                     ProcedureDefinition::new_empty(this, module.root_id, definition_span, procedure_kind, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Component;
                 ast_definition_id = proc_def_id.upcast();
             },
             _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)),
+        }
         let symbol = Symbol{
             name: ident_text,
             variant: SymbolVariant::Definition(SymbolDefinition{
                 defined_in_module: module.root_id,
                 defined_in_scope: SymbolScope::Module(module.root_id),
                 definition_span,
                 identifier_span: ident_span,
                 imported_at: None,
                 class: definition_class,
                 definition_id: ast_definition_id,
             }),
         };
         self.symbols.push(symbol);
         self.definitions.push(ast_definition_id);
         Ok(())
+    }
+}
@@ \ No newline at end of file @@

src/protocol/parser/pass_tokenizer.rs

➞

Show inline comments

 use crate::protocol::input_source::{
     InputSource as InputSource,
     ParseError,
     InputPosition as InputPosition,
 };
 use super::tokens::*;
 use super::token_parsing::*;
 /// Tokenizer is a reusable parser to tokenize multiple source files using the
 /// same allocated buffers. In a well-formed program, we produce a consistent
 /// tree of token ranges such that we may identify tokens that represent a
 /// defintion or an import before producing the entire AST.
 ///
 /// If the program is not well-formed then the tree may be inconsistent, but we
 /// will detect this once we transform the tokens into the AST. To ensure a
 /// consistent AST-producing phase we will require the import to have balanced
 /// curly braces
 pub(crate) struct PassTokenizer {
     // Stack of input positions of opening curly braces, used to detect
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
     curly_stack: Vec<InputPosition>,
     // Points to an element in the `TokenBuffer.ranges` variable.
     stack_idx: usize,
+}
 impl PassTokenizer {
     pub(crate) fn new() -> Self {
         Self{
             curly_stack: Vec::with_capacity(32),
             stack_idx: 0
+        }
+    }
     pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         // Assert source and buffer are at start
         debug_assert_eq!(source.pos().offset, 0);
         debug_assert!(target.tokens.is_empty());
         debug_assert!(target.ranges.is_empty());
         // Set up for tokenization by pushing the first range onto the stack.
         // This range may get transformed into the appropriate range kind later,
         // see `push_range` and `pop_range`.
         self.stack_idx = 0;
         target.ranges.push(TokenRange{
             parent_idx: NO_RELATION,
             range_kind: TokenRangeKind::Module,
             curly_depth: 0,
             start: 0,
             end: 0,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION,
         });
         // Main tokenization loop
         while let Some(c) = source.next() {
             let token_index = target.tokens.len() as u32;
             if is_char_literal_start(c) {
                 self.consume_char_literal(source, target)?;
             } else if is_string_literal_start(c) {
                 self.consume_string_literal(source, target)?;
             } else if is_identifier_start(c) {
                 let ident = self.consume_identifier(source, target)?;
                 if demarks_definition(ident) {
                     self.push_range(target, TokenRangeKind::Definition, token_index);
                 } else if demarks_import(ident) {
                     self.push_range(target, TokenRangeKind::Import, token_index);
+                }
             } else if is_integer_literal_start(c) {
                 self.consume_number(source, target)?;
             } else if is_pragma_start_or_pound(c) {
                 let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
                 if was_pragma {
                     self.push_range(target, TokenRangeKind::Pragma, token_index);
+                }
             } else if self.is_line_comment_start(c, source) {
                 self.consume_line_comment(source, target)?;
             } else if self.is_block_comment_start(c, source) {
                 self.consume_block_comment(source, target)?;
             } else if is_whitespace(c) {
                 let contained_newline = self.consume_whitespace(source);
                 if contained_newline {
                 self.consume_whitespace(source);
                 let range = &target.ranges[self.stack_idx];
                 if range.range_kind == TokenRangeKind::Pragma {
                     self.pop_range(target, target.tokens.len() as u32);
+                }
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some((token, token_pos)) = was_punctuation {
                     if token == TokenKind::OpenCurly {
                         self.curly_stack.push(token_pos);
                     } else if token == TokenKind::CloseCurly {
                         // Check if this marks the end of a range we're
                         // currently processing
                         if self.curly_stack.is_empty() {
                             return Err(ParseError::new_error_str_at_pos(
                                 source, token_pos, "unmatched closing curly brace '}'"
                             ));
+                        }
                         self.curly_stack.pop();
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
                         // Exit early if we have more closing curly braces than
                         // opening curly braces
                     } else if token == TokenKind::SemiColon {
                         // Check if this marks the end of an import
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Import {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
+                    }
                 } else {
                     return Err(ParseError::new_error_str_at_pos(
                         source, source.pos(), "unexpected character"
                     ));
+                }
+            }
+        }
         // End of file, check if our state is correct
         if let Some(error) = source.had_error.take() {
             return Err(error);
+        }
         if !self.curly_stack.is_empty() {
             // Let's not add a lot of heuristics and just tell the programmer
             // that something is wrong
             let last_unmatched_open = self.curly_stack.pop().unwrap();
             return Err(ParseError::new_error_str_at_pos(
                 source, last_unmatched_open, "unmatched opening curly brace '{'"
             ));
+        }
         // Ranges that did not depend on curly braces may have missing tokens.
         // So close all of the active tokens
         while self.stack_idx != 0 {
             self.pop_range(target, target.tokens.len() as u32);
+        }
         // And finally, we may have a token range at the end that doesn't belong
         // to a range yet, so insert a "code" range if this is the case.
         debug_assert_eq!(self.stack_idx, 0);
         let last_registered_idx = target.ranges[0].end;
         let last_token_idx = target.tokens.len() as u32;
         if last_registered_idx != last_token_idx {
             self.add_code_range(target, 0, last_registered_idx, last_token_idx, NO_RELATION);
+        }
         Ok(())
+    }
     fn is_line_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'/') == source.lookahead(1);
+    }
     fn is_block_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'*') == source.lookahead(1);
+    }
     fn maybe_parse_punctuation(
         &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer
     ) -> Result<Option<(TokenKind, InputPosition)>, ParseError> {
         debug_assert!(first_char != b'#', "'#' needs special handling");
         debug_assert!(first_char != b'\'', "'\'' needs special handling");
         debug_assert!(first_char != b'"', "'\"' needs special handling");
         let pos = source.pos();
         let token_kind;
         if first_char == b'!' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::NotEqual;
             } else {
                 token_kind = TokenKind::Exclamation;
+            }
         } else if first_char == b'%' {
@@ @@ -416,196 +414,195 @@ impl PassTokenizer { @@
         // Consume the leading double quotes
         debug_assert!(source.next().unwrap() == b'"');
         source.consume();
         let mut prev_char = b'"';
         while let Some(c) = source.next() {
             if !c.is_ascii() {
                 return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in string literal"));
+            }
             source.consume();
             if c == b'"' && prev_char != b'\\' {
                 // Unescaped string terminator
                 prev_char = c;
                 break;
+            }
             if prev_char == b'\\' && c == b'\\' {
                 // Escaped backslash, set prev_char to bogus to not conflict
                 // with escaped-" and unterminated string literal detection.
                 prev_char = b'\0';
             } else {
                 prev_char = c;
+            }
+        }
         if prev_char != b'"' {
             // Unterminated string literal
             return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal"));
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::String, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_pragma_or_pound(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<bool, ParseError> {
         let start_pos = source.pos();
         debug_assert_eq!(first_char, b'#');
         source.consume();
         let next = source.next();
         if next.is_none() || !is_identifier_start(next.unwrap()) {
             // Just a pound sign
             target.tokens.push(Token::new(TokenKind::Pound, start_pos));
             Ok(false)
         } else {
             // Pound sign followed by identifier
             source.consume();
             while let Some(c) = source.next() {
                 if !is_identifier_remaining(c) {
                     break;
+                }
                 source.consume();
+            }
             self.check_ascii(source)?;
             let end_pos = source.pos();
             target.tokens.push(Token::new(TokenKind::Pragma, start_pos));
             target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
             Ok(true)
+        }
+    }
     fn consume_line_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "//"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'/');
         source.consume();
         source.consume();
         let mut prev_char = b'/';
         let mut cur_char = b'/';
         while let Some(c) = source.next() {
             prev_char = cur_char;
             cur_char = c;
             if c == b'\n' {
                 // End of line, note that the newline is not consumed
                 break;
+            }
             source.consume();
+        }
         let mut end_pos = source.pos();
         debug_assert_eq!(begin_pos.line, end_pos.line);
         // Modify offset to not include the newline characters
         if cur_char == b'\n' {
             if prev_char == b'\r' {
                 end_pos.offset -= 2;
             } else {
                 end_pos.offset -= 1;
+            }
             // Consume final newline
             source.consume();
         } else {
             // End of comment was due to EOF
             debug_assert!(source.next().is_none())
+        }
         target.tokens.push(Token::new(TokenKind::LineComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_block_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "/*"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'*');
         source.consume();
         source.consume();
         // Explicitly do not put prev_char at "*", because then "/*/" would
         // represent a valid and closed block comment
         let mut prev_char = b' ';
         let mut is_closed = false;
         while let Some(c) = source.next() {
             source.consume();
             if prev_char == b'*' && c == b'/' {
                 // End of block comment
                 is_closed = true;
                 break;
+            }
             prev_char = c;
+        }
         if !is_closed {
             return Err(ParseError::new_error_str_at_pos(
                 source, source.pos(), "encountered unterminated block comment")
             );
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::BlockComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_identifier<'a>(&mut self, source: &'a mut InputSource, target: &mut TokenBuffer) -> Result<&'a [u8], ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_identifier_start(source.next().unwrap()));
         source.consume();
         // Keep reading until no more identifier
         while let Some(c) = source.next() {
             if !is_identifier_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(source.section_at_pos(begin_pos, end_pos))
+    }
     fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_integer_literal_start(source.next().unwrap()));
         source.consume();
         // Keep reading until it doesn't look like a number anymore
         while let Some(c) = source.next() {
             if !maybe_number_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Integer, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     // Consumes whitespace and returns whether or not the whitespace contained
     // a newline.
     fn consume_whitespace(&self, source: &mut InputSource) -> bool {
         debug_assert!(is_whitespace(source.next().unwrap()));

src/protocol/parser/tokens.rs

➞

Show inline comments

@@ @@ -119,232 +119,237 @@ impl TokenKind { @@
             TK::At => "@",
             TK::Plus => "+",
             TK::Minus => "-",
             TK::Star => "*",
             TK::Slash => "/",
             TK::Percent => "%",
             TK::Caret => "^",
             TK::And => "&",
             TK::Or => "|",
             TK::Tilde => "~",
             TK::Equal => "=",
             TK::ColonColon => "::",
             TK::DotDot => "..",
             TK::ArrowRight => "->",
             TK::AtEquals => "@=",
             TK::PlusPlus => "++",
             TK::PlusEquals => "+=",
             TK::MinusMinus => "--",
             TK::MinusEquals => "-=",
             TK::StarEquals => "*=",
             TK::SlashEquals => "/=",
             TK::PercentEquals => "%=",
             TK::CaretEquals => "^=",
             TK::AndAnd => "&&",
             TK::AndEquals => "&=",
             TK::OrOr => "||",
             TK::OrEquals => "|=",
             TK::EqualEqual => "==",
             TK::NotEqual => "!=",
             TK::ShiftLeft => "<<",
             TK::LessEquals => "<=",
             TK::ShiftRight => ">>",
             TK::GreaterEquals => ">=",
             TK::ShiftLeftEquals => "<<=",
             TK::ShiftRightEquals => ">>=",
             // Lets keep these in explicitly for now, in case we want to add more symbols
             TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character |
             TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(),
+        }
+    }
+}
 /// Represents a single token at a particular position.
 pub struct Token {
     pub kind: TokenKind,
     pub pos: InputPosition,
+}
 impl Token {
     pub(crate) fn new(kind: TokenKind, pos: InputPosition) -> Self {
         Self{ kind, pos }
+    }
+}
 /// The kind of token ranges that are specially parsed by the tokenizer.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TokenRangeKind {
     Module,
     Pragma,
     Import,
     Definition,
     Code,
+}
 pub const NO_RELATION: i32 = -1;
 pub const NO_SIBLING: i32 = NO_RELATION;
 /// A range of tokens with a specific meaning. Such a range is part of a tree
 /// where each parent tree envelops all of its children.
 #[derive(Debug)]
 pub struct TokenRange {
     // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
     // range kind is Module, in that case the parent index is -1.
     pub parent_idx: i32,
     pub range_kind: TokenRangeKind,
     pub curly_depth: u32,
     // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range.
     pub start: u32,             // first token (inclusive index)
     pub end: u32,               // last token (exclusive index)
     // Child ranges
     pub num_child_ranges: u32,  // Number of subranges
     pub first_child_idx: i32,   // First subrange (or -1 if no subranges)
     pub last_child_idx: i32,    // Last subrange (or -1 if no subranges)
     pub next_sibling_idx: i32,  // Next subrange (or -1 if no next subrange)
+}
 pub struct TokenBuffer {
     pub tokens: Vec<Token>,
     pub ranges: Vec<TokenRange>,
+}
 impl TokenBuffer {
     pub(crate) fn new() -> Self {
         Self{ tokens: Vec::new(), ranges: Vec::new() }
+    }
     pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
         TokenIter::new(self, range.start as usize, range.end as usize)
     pub(crate) fn iter_range<'a>(&'a self, inclusive_start: u32, exclusive_end: u32) -> TokenIter<'a> {
         debug_assert!(exclusive_end as usize <= self.tokens.len());
         TokenIter::new(self, inclusive_start as usize, exclusive_end as usize)
+    }
     pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
         self.tokens[range.start as usize].pos
+    }
     pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
         let last_token = &self.tokens[range.end as usize - 1];
         if last_token.kind == TokenKind::SpanEnd {
             return last_token.pos
         } else {
             debug_assert!(!last_token.kind.has_span_end());
             return last_token.pos.with_offset(last_token.kind.num_characters());
+        }
+    }
+}
 /// Iterator over tokens within a specific `TokenRange`.
 pub(crate) struct TokenIter<'a> {
     tokens: &'a Vec<Token>,
     cur: usize,
     end: usize,
+}
 impl<'a> TokenIter<'a> {
     fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
         Self{ tokens: &buffer.tokens, cur: start, end }
+    }
     /// Returns the next token (may include comments), or `None` if at the end
     /// of the range.
     pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
         if self.cur >= self.end {
             return None;
+        }
         let token = &self.tokens[self.cur];
         Some(token.kind)
+    }
     /// Returns the next token (but skips over comments), or `None` if at the
     /// end of the range
     pub(crate) fn next(&mut self) -> Option<TokenKind> {
         while let Some(token_kind) = self.next_including_comments() {
             if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
                 return Some(token_kind);
+            }
             self.consume();
+        }
         return None
+    }
     /// Peeks ahead by one token (i.e. the one that comes after `next()`), and
     /// skips over comments
     pub(crate) fn peek(&self) -> Option<TokenKind> {
         for next_idx in self.cur + 1..self.end {
             let next_kind = self.tokens[next_idx].kind;
             if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd {
                 return Some(next_kind);
+            }
+        }
         return None;
+    }
     /// Returns the start position belonging to the token returned by `next`. If
     /// there is not a next token, then we return the end position of the
     /// previous token.
     pub(crate) fn last_valid_pos(&self) -> InputPosition {
         if self.cur < self.end {
             // Return token position
             return self.tokens[self.cur].pos
+        }
         // Return previous token end
         let token = &self.tokens[self.cur - 1];
         return if token.kind == TokenKind::SpanEnd {
             token.pos
         } else {
             token.pos.with_offset(token.kind.num_characters())
         };
+    }
     /// Assumes the token is not at the end and returns the starting position
     /// belonging to the token returned by `next`.
     pub(crate) fn next_start_position(&self) -> InputPosition {
         debug_assert!(self.cur < self.end);
         return self.tokens[self.cur].pos;
+    }
     /// Returns the token range belonging to the token returned by `next`. This
     /// assumes that we're not at the end of the range we're iterating over.
     pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
         debug_assert!(self.cur < self.end);
         let token = &self.tokens[self.cur];
         if token.kind.has_span_end() {
             let span_end = &self.tokens[self.cur + 1];
             debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
             (token.pos, span_end.pos)
         } else {
             let offset = token.kind.num_characters();
             (token.pos, token.pos.with_offset(offset))
+        }
+    }
     /// See `next_positions`
     pub(crate) fn next_span(&self) -> InputSpan {
         let (begin, end) = self.next_positions();
         return InputSpan::from_positions(begin, end)
+    }
     /// Advances the iterator to the next (meaningful) token.
     pub(crate) fn consume(&mut self) {
         if let Some(kind) = self.next_including_comments() {
             if kind.has_span_end() {
                 self.cur += 2;
             } else {
                 self.cur += 1;
+            }
+        }
+    }
     pub(crate) fn token_index(&self) -> u32 {
         return self.cur as u32;
+    }
     /// Saves the current iteration position, may be passed to `load` to return
     /// the iterator to a previous position.
     pub(crate) fn save(&self) -> (usize, usize) {
         (self.cur, self.end)
+    }
     pub(crate) fn load(&mut self, saved: (usize, usize)) {
         self.cur = saved.0;
         self.end = saved.1;
+    }
+}
@@ \ No newline at end of file @@

std/std.global.pdl

➞

Show inline comments

 #module std.global
 // Note: parsing of token ranges and pragma needs to change. For now we insert
 // spaces to work with the current system. Needs to be a system where the
 // pragmas, "func" keywords (and similar keywords) indicate initial points to
 // start parsing.
 func get<T>(in<T> input) -> T { #builtin }
 func put<T>(out<T> output, T value) -> #type_void { #builtin }
 func fires<T>(#type_portlike <T>) -> bool { #builtin }
 func create<T>(#type_integerlike length) -> T[] { #builtin }
 func length<T>(#type_arraylike <T> array) -> u32 { #builtin }
 func assert(bool condition) -> #type_void { #builtin }
 func print(string message) -> #type_void { #builtin }
@@ \ No newline at end of file @@

tokens.txt

➞

Show inline comments

deleted file

0 comments (0 inline, 0 general)