CSY/reowolf Changeset - 6d6c5b5f07ae · Centrum Wiskunde & Informatica (CWI)

Changeset - 6d6c5b5f07ae

Parent rev.

Child rev.

[Not reviewed]

1 6 0

MH - 4 years ago 2022-03-28 22:18:20
contact@maxhenger.nl

Attempting to fix token tree construction

7 files changed with 24 insertions and 514 deletions:

src/protocol/parser/pass_definitions.rs

src/protocol/parser/pass_imports.rs

src/protocol/parser/pass_symbols.rs

src/protocol/parser/pass_tokenizer.rs

src/protocol/parser/tokens.rs

std/std.global.pdl

tokens.txt

500

0 comments (0 inline, 0 general)

src/protocol/parser/pass_definitions.rs

➞

Show inline comments

 use crate::protocol::ast::*;
 use super::symbol_table::*;
 use super::{Module, ModuleCompilationPhase, PassCtx};
 use super::tokens::*;
 use super::token_parsing::*;
 use super::pass_definitions_types::*;
 use crate::protocol::input_source::{InputSource, InputPosition, InputSpan, ParseError};
 use crate::collections::*;
 /// Parses all the tokenized definitions into actual AST nodes.
 pub(crate) struct PassDefinitions {
     // State associated with the definition currently being processed
     cur_definition: DefinitionId,
     // Itty bitty parsing machines
     type_parser: ParserTypeParser,
     // Temporary buffers of various kinds
     buffer: String,
     struct_fields: ScopedBuffer<StructFieldDefinition>,
     enum_variants: ScopedBuffer<EnumVariantDefinition>,
     union_variants: ScopedBuffer<UnionVariantDefinition>,
     variables: ScopedBuffer<VariableId>,
     expressions: ScopedBuffer<ExpressionId>,
     statements: ScopedBuffer<StatementId>,
     parser_types: ScopedBuffer<ParserType>,
+}
 impl PassDefinitions {
     pub(crate) fn new() -> Self {
         Self{
             cur_definition: DefinitionId::new_invalid(),
             type_parser: ParserTypeParser::new(),
             buffer: String::with_capacity(128),
             struct_fields: ScopedBuffer::with_capacity(128),
             enum_variants: ScopedBuffer::with_capacity(128),
             union_variants: ScopedBuffer::with_capacity(128),
             variables: ScopedBuffer::with_capacity(128),
             expressions: ScopedBuffer::with_capacity(128),
             statements: ScopedBuffer::with_capacity(128),
             parser_types: ScopedBuffer::with_capacity(128),
+        }
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         // Although we only need to parse the definitions, we want to go through
         // code ranges as well such that we can throw errors if we get
         // unexpected tokens at the module level of the source.
         let mut range_idx = module_range.first_child_idx;
         loop {
             let range_idx_usize = range_idx as usize;
             let cur_range = &module.tokens.ranges[range_idx_usize];
             match cur_range.range_kind {
                 TokenRangeKind::Module => unreachable!(), // should not be reachable
                 TokenRangeKind::Pragma | TokenRangeKind::Import => {
                     // Already fully parsed, fall through and go to next range
                 },
                 TokenRangeKind::Definition | TokenRangeKind::Code => {
                     // Visit range even if it is a "code" range to provide
                     // proper error messages.
                     self.visit_range(modules, module_idx, ctx, range_idx_usize)?;
                 },
+            }
             if cur_range.next_sibling_idx == NO_SIBLING {
                 break;
             } else {
                 range_idx = cur_range.next_sibling_idx;
+            }
+        }
         modules[module_idx].phase = ModuleCompilationPhase::DefinitionsParsed;
         Ok(())
+    }
     fn visit_range(
         &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let cur_range = &module.tokens.ranges[range_idx];
         debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code);
         // Detect which definition we're parsing
         let mut iter = module.tokens.iter_range(cur_range);
+        let mut iter = module.tokens.iter_range(cur_range.start, cur_range.end);
         loop {
             let next = iter.next();
             if next.is_none() {
                 return Ok(())
+            }
             // Token was not None, so peek_ident returns None if not an ident
             let ident = peek_ident(&module.source, &mut iter);
             match ident {
                 Some(KW_STRUCT) => self.visit_struct_definition(module, &mut iter, ctx)?,
                 Some(KW_ENUM) => self.visit_enum_definition(module, &mut iter, ctx)?,
                 Some(KW_UNION) => self.visit_union_definition(module, &mut iter, ctx)?,
                 Some(KW_FUNCTION) => self.visit_function_definition(module, &mut iter, ctx)?,
                 Some(KW_PRIMITIVE) | Some(KW_COMPOSITE) => self.visit_component_definition(module, &mut iter, ctx)?,
                 _ => return Err(ParseError::new_error_str_at_pos(
                     &module.source, iter.last_valid_pos(),
                     "unexpected symbol, expected a keyword marking the start of a definition"
                 )),
+            }
+        }
+    }
     fn visit_struct_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         consume_exact_ident(&module.source, iter, KW_STRUCT)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         // Parse struct definition
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         let mut fields_section = self.struct_fields.start_section();
         consume_comma_separated(
             TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
             |source, iter, ctx| {
                 let poly_vars = ctx.heap[definition_id].poly_vars();
                 let start_pos = iter.last_valid_pos();
                 let parser_type = self.type_parser.consume_parser_type(
                     iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id,
                     module_scope, false, None
                 )?;
                 let field = consume_ident_interned(source, iter, ctx)?;
                 Ok(StructFieldDefinition{
                     span: InputSpan::from_positions(start_pos, field.span.end),
                     field, parser_type
                 })
             },
             &mut fields_section, "a struct field", "a list of struct fields", None
         )?;
         // Transfer to preallocated definition
         let struct_def = ctx.heap[definition_id].as_struct_mut();
         struct_def.fields = fields_section.into_vec();
         Ok(())
+    }
     fn visit_enum_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         consume_exact_ident(&module.source, iter, KW_ENUM)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         // Parse enum definition
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         let mut enum_section = self.enum_variants.start_section();
         consume_comma_separated(
             TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
             |source, iter, ctx| {
                 let identifier = consume_ident_interned(source, iter, ctx)?;
                 let value = if iter.next() == Some(TokenKind::Equal) {
                     iter.consume();
                     let (variant_number, _) = consume_integer_literal(source, iter, &mut self.buffer)?;
                     EnumVariantValue::Integer(variant_number as i64) // TODO: @int
                 } else {
                     EnumVariantValue::None
                 };
                 Ok(EnumVariantDefinition{ identifier, value })
             },
             &mut enum_section, "an enum variant", "a list of enum variants", None
         )?;
         // Transfer to definition
         let enum_def = ctx.heap[definition_id].as_enum_mut();
         enum_def.variants = enum_section.into_vec();
         Ok(())
+    }
     fn visit_union_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         consume_exact_ident(&module.source, iter, KW_UNION)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         // Parse union definition
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         let mut variants_section = self.union_variants.start_section();
         consume_comma_separated(
             TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
             |source, iter, ctx| {
                 let identifier = consume_ident_interned(source, iter, ctx)?;
                 let mut close_pos = identifier.span.end;
                 let mut types_section = self.parser_types.start_section();
                 let has_embedded = maybe_consume_comma_separated(
                     TokenKind::OpenParen, TokenKind::CloseParen, source, iter, ctx,
                     |source, iter, ctx| {
                         let poly_vars = ctx.heap[definition_id].poly_vars();
                         self.type_parser.consume_parser_type(
                             iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id,
                             module_scope, false, None
+                        )
                     },
                     &mut types_section, "an embedded type", Some(&mut close_pos)
                 )?;
                 let value = if has_embedded {
                     types_section.into_vec()
                 } else {
                     types_section.forget();
                     Vec::new()
                 };
                 Ok(UnionVariantDefinition{
                     span: InputSpan::from_positions(identifier.span.begin, close_pos),
                     identifier,
                     value
                 })
             },
             &mut variants_section, "a union variant", "a list of union variants", None
         )?;
         // Transfer to AST
         let union_def = ctx.heap[definition_id].as_union_mut();
         union_def.variants = variants_section.into_vec();
         Ok(())
+    }
     fn visit_function_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         // Retrieve function name
         consume_exact_ident(&module.source, iter, KW_FUNCTION)?;
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         let stringy = String::from_utf8_lossy(ident_text).to_string();
         // Retrieve preallocated DefinitionId
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         // Parse function's argument list
         let mut parameter_section = self.variables.start_section();
         consume_parameter_list(
             &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id
         )?;
         let parameters = parameter_section.into_vec();
         // Consume return types
         consume_token(&module.source, iter, TokenKind::ArrowRight)?;
         let poly_vars = ctx.heap[definition_id].poly_vars();
         let parser_type = self.type_parser.consume_parser_type(
             iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, definition_id,
             module_scope, false, None
         )?;
         // Consume body
         let (body_id, source) = self.consume_procedure_body(module, iter, ctx, definition_id, ProcedureKind::Function)?;
         let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::Definition(definition_id)));
         // Assign everything in the preallocated AST node
         let function = ctx.heap[definition_id].as_procedure_mut();
         function.source = source;
         function.return_type = Some(parser_type);
         function.parameters = parameters;
         function.scope = scope_id;
         function.body = body_id;
         Ok(())
+    }
     fn visit_component_definition(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<(), ParseError> {
         // Consume component variant and name
         let (_variant_text, _) = consume_any_ident(&module.source, iter)?;
         debug_assert!(_variant_text == KW_PRIMITIVE || _variant_text == KW_COMPOSITE);
         let (ident_text, _) = consume_ident(&module.source, iter)?;
         // Retrieve preallocated definition
         let module_scope = SymbolScope::Module(module.root_id);
         let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
             .unwrap().variant.as_definition().definition_id;
         self.cur_definition = definition_id;
         consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
         // Parse component's argument list
         let mut parameter_section = self.variables.start_section();
         consume_parameter_list(
             &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id
         )?;
         let parameters = parameter_section.into_vec();
         // Consume body
         let procedure_kind = ctx.heap[definition_id].as_procedure().kind;
         let (body_id, source) = self.consume_procedure_body(module, iter, ctx, definition_id, procedure_kind)?;
         let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::Definition(definition_id)));
         // Assign everything in the AST node
         let component = ctx.heap[definition_id].as_procedure_mut();
         debug_assert!(component.return_type.is_none());
         component.source = source;
         component.parameters = parameters;
         component.scope = scope_id;
         component.body = body_id;
         Ok(())
+    }
     /// Consumes a procedure's body: either a user-defined procedure, which we
     /// parse as normal, or a builtin function, where we'll make sure we expect
     /// the particular builtin.
     ///
     /// We expect that the procedure's name is already stored in the
     /// preallocated AST node.
     fn consume_procedure_body(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, definition_id: DefinitionId, kind: ProcedureKind
     ) -> Result<(BlockStatementId, ProcedureSource), ParseError> {
         if iter.next() == Some(TokenKind::OpenCurly) && iter.peek() == Some(TokenKind::Pragma) {
             // Consume the placeholder "{ #builtin }" tokens
             iter.consume(); // opening curly brace
             let (pragma, pragma_start, pragma_end) = consume_pragma(&module.source, iter)?;
             if pragma != b"#builtin" {
                 return Err(ParseError::new_error_str_at_span(
                     &module.source, InputSpan::from_positions(pragma_start, pragma_end),
                     "expected a '#builtin' pragma, or a function body"
                 ));
+            }
             if iter.next() != Some(TokenKind::CloseCurly) {
                 // Just to keep the compiler writers in line ;)
                 panic!("compiler error: when using the #builtin pragma, wrap it in curly braces");
+            }
             iter.consume();
             // Retrieve module and procedure name
             assert!(module.name.is_some(), "compiler error: builtin procedure body in unnamed module");
             let (_, module_name) = module.name.as_ref().unwrap();
             let module_name = module_name.as_str();
             let definition = ctx.heap[definition_id].as_procedure();
             let procedure_name = definition.identifier.value.as_str();
             let source = match (module_name, procedure_name) {
                 ("std.global", "get") => ProcedureSource::FuncGet,
                 ("std.global", "put") => ProcedureSource::FuncPut,
                 ("std.global", "fires") => ProcedureSource::FuncFires,
                 ("std.global", "create") => ProcedureSource::FuncCreate,
                 ("std.global", "length") => ProcedureSource::FuncLength,
                 ("std.global", "assert") => ProcedureSource::FuncAssert,
                 ("std.global", "print") => ProcedureSource::FuncPrint,
                 _ => panic!(
                     "compiler error: unknown builtin procedure '{}' in module '{}'",
                     procedure_name, module_name
                 ),
             };
             return Ok((BlockStatementId::new_invalid(), source));
         } else {
             let body_id = self.consume_block_statement(module, iter, ctx)?;
             let source = match kind {
                 ProcedureKind::Function =>
                     ProcedureSource::FuncUserDefined,
                 ProcedureKind::Primitive | ProcedureKind::Composite =>
                     ProcedureSource::CompUserDefined,
             };
             return Ok((body_id, source))
+        }
+    }
     /// Consumes a statement and returns a boolean indicating whether it was a
     /// block or not.
     fn consume_statement(&mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx) -> Result<StatementId, ParseError> {
         let next = iter.next().expect("consume_statement has a next token");
         if next == TokenKind::OpenCurly {
             let id = self.consume_block_statement(module, iter, ctx)?;
             return Ok(id.upcast());
         } else if next == TokenKind::Ident {
             let ident = peek_ident(&module.source, iter).unwrap();
             if ident == KW_STMT_IF {
                 // Consume if statement and place end-if statement directly
                 // after it.
                 let id = self.consume_if_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_WHILE {
                 let id = self.consume_while_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_BREAK {
                 let id = self.consume_break_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_CONTINUE {
                 let id = self.consume_continue_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_SYNC {
                 let id = self.consume_synchronous_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_FORK {
                 let id = self.consume_fork_statement(module, iter, ctx)?;
                 let end_fork = ctx.heap.alloc_end_fork_statement(|this| EndForkStatement {
                     this,
                     start_fork: id,
                     next: StatementId::new_invalid(),
                 });
                 let fork_stmt = &mut ctx.heap[id];
                 fork_stmt.end_fork = end_fork;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_SELECT {
                 let id = self.consume_select_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_RETURN {
                 let id = self.consume_return_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_GOTO {
                 let id = self.consume_goto_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_NEW {
                 let id = self.consume_new_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else if ident == KW_STMT_CHANNEL {
                 let id = self.consume_channel_statement(module, iter, ctx)?;
                 return Ok(id.upcast().upcast());
             } else if iter.peek() == Some(TokenKind::Colon) {
                 let id = self.consume_labeled_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
             } else {
                 // Two fallback possibilities: the first one is a memory
                 // declaration, the other one is to parse it as a normal
                 // expression. This is a bit ugly.
                 if let Some(memory_stmt_id) = self.maybe_consume_memory_statement_without_semicolon(module, iter, ctx)? {
                     consume_token(&module.source, iter, TokenKind::SemiColon)?;
                     return Ok(memory_stmt_id.upcast().upcast());
                 } else {
                     let id = self.consume_expression_statement(module, iter, ctx)?;
                     return Ok(id.upcast());
+                }
+            }
         } else if next == TokenKind::OpenParen {
             // Same as above: memory statement or normal expression
             if let Some(memory_stmt_id) = self.maybe_consume_memory_statement_without_semicolon(module, iter, ctx)? {
                 consume_token(&module.source, iter, TokenKind::SemiColon)?;
                 return Ok(memory_stmt_id.upcast().upcast());
             } else {
                 let id = self.consume_expression_statement(module, iter, ctx)?;
                 return Ok(id.upcast());
+            }
         } else {
             let id = self.consume_expression_statement(module, iter, ctx)?;
             return Ok(id.upcast());
+        }
+    }
     fn consume_block_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<BlockStatementId, ParseError> {
         let open_curly_span = consume_token(&module.source, iter, TokenKind::OpenCurly)?;
         let mut stmt_section = self.statements.start_section();
         let mut next = iter.next();
         while next != Some(TokenKind::CloseCurly) {
             if next.is_none() {
                 return Err(ParseError::new_error_str_at_pos(
                     &module.source, iter.last_valid_pos(), "expected a statement or '}'"
                 ));
+            }
             let stmt_id = self.consume_statement(module, iter, ctx)?;
             stmt_section.push(stmt_id);
             next = iter.next();
+        }
         let statements = stmt_section.into_vec();
         let mut block_span = consume_token(&module.source, iter, TokenKind::CloseCurly)?;
         block_span.begin = open_curly_span.begin;
         let block_id = ctx.heap.alloc_block_statement(|this| BlockStatement{
             this,
             span: block_span,
             statements,
             end_block: EndBlockStatementId::new_invalid(),
             scope: ScopeId::new_invalid(),
             next: StatementId::new_invalid(),
         });
         let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::Block(block_id)));
         let end_block_id = ctx.heap.alloc_end_block_statement(|this| EndBlockStatement{
             this, start_block: block_id, next: StatementId::new_invalid()
         });
         let block_stmt = &mut ctx.heap[block_id];
         block_stmt.end_block = end_block_id;
         block_stmt.scope = scope_id;
         Ok(block_id)
+    }
     fn consume_if_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<IfStatementId, ParseError> {
         let if_span = consume_exact_ident(&module.source, iter, KW_STMT_IF)?;
         consume_token(&module.source, iter, TokenKind::OpenParen)?;
         let test = self.consume_expression(module, iter, ctx)?;
         consume_token(&module.source, iter, TokenKind::CloseParen)?;
         // Consume bodies of if-statement
         let true_body = IfStatementCase{
             body: self.consume_statement(module, iter, ctx)?,
             scope: ScopeId::new_invalid(),
         };
         let false_body = if has_ident(&module.source, iter, KW_STMT_ELSE) {
             iter.consume();
             let false_body = IfStatementCase{
                 body: self.consume_statement(module, iter, ctx)?,
                 scope: ScopeId::new_invalid(),
             };
             Some(false_body)
         } else {
             None
         };
         // Construct AST elements
         let if_stmt_id = ctx.heap.alloc_if_statement(|this| IfStatement{
             this,
             span: if_span,
             test,
             true_case: true_body,
             false_case: false_body,
             end_if: EndIfStatementId::new_invalid(),
         });
         let end_if_stmt_id = ctx.heap.alloc_end_if_statement(|this| EndIfStatement{
             this,
             start_if: if_stmt_id,
             next: StatementId::new_invalid(),
         });
         let true_scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::If(if_stmt_id, true)));
         let false_scope_id = if false_body.is_some() {
             Some(ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::If(if_stmt_id, false))))
         } else {
             None
         };
         let if_stmt = &mut ctx.heap[if_stmt_id];
         if_stmt.end_if = end_if_stmt_id;
         if_stmt.true_case.scope = true_scope_id;
         if let Some(false_case) = &mut if_stmt.false_case {
             false_case.scope = false_scope_id.unwrap();
+        }
         return Ok(if_stmt_id);
+    }
     fn consume_while_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<WhileStatementId, ParseError> {
         let while_span = consume_exact_ident(&module.source, iter, KW_STMT_WHILE)?;
         consume_token(&module.source, iter, TokenKind::OpenParen)?;
         let test = self.consume_expression(module, iter, ctx)?;
         consume_token(&module.source, iter, TokenKind::CloseParen)?;
         let body = self.consume_statement(module, iter, ctx)?;
         let while_stmt_id = ctx.heap.alloc_while_statement(|this| WhileStatement{
             this,
             span: while_span,
             test,
             scope: ScopeId::new_invalid(),
             body,
             end_while: EndWhileStatementId::new_invalid(),
             in_sync: SynchronousStatementId::new_invalid(),
         });
         let end_while_stmt_id = ctx.heap.alloc_end_while_statement(|this| EndWhileStatement{
             this,
             start_while: while_stmt_id,
             next: StatementId::new_invalid(),
         });
         let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::While(while_stmt_id)));
         let while_stmt = &mut ctx.heap[while_stmt_id];
         while_stmt.scope = scope_id;
         while_stmt.end_while = end_while_stmt_id;
         Ok(while_stmt_id)
+    }
     fn consume_break_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<BreakStatementId, ParseError> {
         let break_span = consume_exact_ident(&module.source, iter, KW_STMT_BREAK)?;
         let label = if Some(TokenKind::Ident) == iter.next() {
             let label = consume_ident_interned(&module.source, iter, ctx)?;
             Some(label)
         } else {
             None
         };
         consume_token(&module.source, iter, TokenKind::SemiColon)?;
         Ok(ctx.heap.alloc_break_statement(|this| BreakStatement{
             this,
             span: break_span,
             label,
             target: EndWhileStatementId::new_invalid(),
         }))
+    }
     fn consume_continue_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<ContinueStatementId, ParseError> {
         let continue_span = consume_exact_ident(&module.source, iter, KW_STMT_CONTINUE)?;
         let label=  if Some(TokenKind::Ident) == iter.next() {
             let label = consume_ident_interned(&module.source, iter, ctx)?;
             Some(label)
         } else {
             None
         };
         consume_token(&module.source, iter, TokenKind::SemiColon)?;
         Ok(ctx.heap.alloc_continue_statement(|this| ContinueStatement{
             this,
             span: continue_span,
             label,
             target: WhileStatementId::new_invalid(),
         }))
+    }
     fn consume_synchronous_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<SynchronousStatementId, ParseError> {
         let synchronous_span = consume_exact_ident(&module.source, iter, KW_STMT_SYNC)?;
         let body = self.consume_statement(module, iter, ctx)?;
         let sync_stmt_id = ctx.heap.alloc_synchronous_statement(|this| SynchronousStatement{
             this,
             span: synchronous_span,
             scope: ScopeId::new_invalid(),
             body,
             end_sync: EndSynchronousStatementId::new_invalid(),
         });
         let end_sync_stmt_id = ctx.heap.alloc_end_synchronous_statement(|this| EndSynchronousStatement{
             this,
             start_sync: sync_stmt_id,
             next: StatementId::new_invalid(),
         });
         let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::Synchronous(sync_stmt_id)));
         let sync_stmt = &mut ctx.heap[sync_stmt_id];
         sync_stmt.scope = scope_id;
         sync_stmt.end_sync = end_sync_stmt_id;
         return Ok(sync_stmt_id);
+    }
     fn consume_fork_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<ForkStatementId, ParseError> {
         let fork_span = consume_exact_ident(&module.source, iter, KW_STMT_FORK)?;
         let left_body = self.consume_statement(module, iter, ctx)?;
         let right_body = if has_ident(&module.source, iter, KW_STMT_OR) {
             iter.consume();
             let right_body = self.consume_statement(module, iter, ctx)?;
             Some(right_body)
         } else {
             None
         };
         Ok(ctx.heap.alloc_fork_statement(|this| ForkStatement{
             this,
             span: fork_span,
             left_body,
             right_body,
             end_fork: EndForkStatementId::new_invalid(),
         }))
+    }
     fn consume_select_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<SelectStatementId, ParseError> {
         let select_span = consume_exact_ident(&module.source, iter, KW_STMT_SELECT)?;
         consume_token(&module.source, iter, TokenKind::OpenCurly)?;
         let mut cases = Vec::new();
         let mut next = iter.next();
         while Some(TokenKind::CloseCurly) != next {
             let guard = match self.maybe_consume_memory_statement_without_semicolon(module, iter, ctx)? {
                 Some(guard_mem_stmt) => guard_mem_stmt.upcast().upcast(),
                 None => {
                     let start_pos = iter.last_valid_pos();
                     let expr = self.consume_expression(module, iter, ctx)?;
                     let end_pos = iter.last_valid_pos();
                     let guard_expr_stmt = ctx.heap.alloc_expression_statement(|this| ExpressionStatement{
                         this,
                         span: InputSpan::from_positions(start_pos, end_pos),
                         expression: expr,
                         next: StatementId::new_invalid(),
                     });
                     guard_expr_stmt.upcast()
                 },
             };
             consume_token(&module.source, iter, TokenKind::ArrowRight)?;
             let block = self.consume_statement(module, iter, ctx)?;
             cases.push(SelectCase{
                 guard,
                 body: block,
                 scope: ScopeId::new_invalid(),
                 involved_ports: Vec::with_capacity(1)
             });
             next = iter.next();
+        }
         consume_token(&module.source, iter, TokenKind::CloseCurly)?;
         let num_cases = cases.len();
         let select_stmt_id = ctx.heap.alloc_select_statement(|this| SelectStatement{
             this,
             span: select_span,
             cases,
             end_select: EndSelectStatementId::new_invalid(),
             relative_pos_in_parent: -1,
             next: StatementId::new_invalid(),
         });
         let end_select_stmt_id = ctx.heap.alloc_end_select_statement(|this| EndSelectStatement{
             this,
             start_select: select_stmt_id,
             next: StatementId::new_invalid(),
         });
         let select_stmt = &mut ctx.heap[select_stmt_id];
         select_stmt.end_select = end_select_stmt_id;
         for case_index in 0..num_cases {
             let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::SelectCase(select_stmt_id, case_index as u32)));
             let select_stmt = &mut ctx.heap[select_stmt_id];
             let select_case = &mut select_stmt.cases[case_index];
             select_case.scope = scope_id;
+        }
         return Ok(select_stmt_id)
+    }
     fn consume_return_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<ReturnStatementId, ParseError> {
         let return_span = consume_exact_ident(&module.source, iter, KW_STMT_RETURN)?;
         let mut scoped_section = self.expressions.start_section();
         consume_comma_separated_until(
             TokenKind::SemiColon, &module.source, iter, ctx,
             |_source, iter, ctx| self.consume_expression(module, iter, ctx),
             &mut scoped_section, "an expression", None
         )?;
         let expressions = scoped_section.into_vec();
         if expressions.is_empty() {
             return Err(ParseError::new_error_str_at_span(&module.source, return_span, "expected at least one return value"));
         } else if expressions.len() > 1 {
             return Err(ParseError::new_error_str_at_span(&module.source, return_span, "multiple return values are not (yet) supported"))
+        }
         Ok(ctx.heap.alloc_return_statement(|this| ReturnStatement{
             this,
             span: return_span,
             expressions
         }))
+    }
     fn consume_goto_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<GotoStatementId, ParseError> {
         let goto_span = consume_exact_ident(&module.source, iter, KW_STMT_GOTO)?;
         let label = consume_ident_interned(&module.source, iter, ctx)?;
         consume_token(&module.source, iter, TokenKind::SemiColon)?;
         Ok(ctx.heap.alloc_goto_statement(|this| GotoStatement{
             this,
             span: goto_span,
             label,
             target: LabeledStatementId::new_invalid(),
         }))
+    }
     fn consume_new_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<NewStatementId, ParseError> {
         let new_span = consume_exact_ident(&module.source, iter, KW_STMT_NEW)?;
         let start_pos = iter.last_valid_pos();
         let expression_id = self.consume_primary_expression(module, iter, ctx)?;
         let expression = &ctx.heap[expression_id];
         let mut valid = false;
         let mut call_id = CallExpressionId::new_invalid();
         if let Expression::Call(expression) = expression {
             // Allow both components and functions, as it makes more sense to
             // check their correct use in the validation and linking pass
             if expression.method == Method::UserComponent || expression.method == Method::UserFunction {
                 call_id = expression.this;
                 valid = true;
+            }
+        }
         if !valid {
             return Err(ParseError::new_error_str_at_span(
                 &module.source, InputSpan::from_positions(start_pos, iter.last_valid_pos()), "expected a call expression"
             ));
+        }
         consume_token(&module.source, iter, TokenKind::SemiColon)?;
         debug_assert!(!call_id.is_invalid());
         Ok(ctx.heap.alloc_new_statement(|this| NewStatement{
             this,
             span: new_span,
             expression: call_id,
             next: StatementId::new_invalid(),
         }))
+    }
     fn consume_channel_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<ChannelStatementId, ParseError> {
         // Consume channel specification
         let channel_span = consume_exact_ident(&module.source, iter, KW_STMT_CHANNEL)?;
         let (inner_port_type, end_pos) = if Some(TokenKind::OpenAngle) == iter.next() {
             // Retrieve the type of the channel, we're cheating a bit here by
             // consuming the first '<' and setting the initial angle depth to 1
             // such that our final '>' will be consumed as well.

src/protocol/parser/pass_imports.rs

➞

Show inline comments

 use crate::protocol::ast::*;
 use super::symbol_table::*;
 use super::{Module, ModuleCompilationPhase, PassCtx};
 use super::tokens::*;
 use super::token_parsing::*;
 use crate::protocol::input_source::{InputSource as InputSource, InputSpan, ParseError};
 use crate::collections::*;
 /// Parses all the imports in the module tokens. Is applied after the
 /// definitions and name of modules are resolved. Hence we should be able to
 /// resolve all symbols to their appropriate module/definition.
 pub(crate) struct PassImport {
     imports: Vec<ImportId>,
     found_symbols: Vec<(AliasedSymbol, SymbolDefinition)>,
     scoped_symbols: Vec<Symbol>,
+}
 impl PassImport {
     pub(crate) fn new() -> Self {
         Self{
             imports: Vec::with_capacity(32),
             found_symbols: Vec::with_capacity(32),
             scoped_symbols: Vec::with_capacity(32),
+        }
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::SymbolsScanned));
         debug_assert_eq!(module.phase, ModuleCompilationPhase::SymbolsScanned);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         let mut range_idx = module_range.first_child_idx;
         loop {
             let range_idx_usize = range_idx as usize;
             let cur_range = &module.tokens.ranges[range_idx_usize];
             if cur_range.range_kind == TokenRangeKind::Import {
                 self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
             if cur_range.next_sibling_idx == NO_SIBLING {
                 break;
             } else {
                 range_idx = cur_range.next_sibling_idx;
+            }
+        }
         let root = &mut ctx.heap[module.root_id];
         root.imports.extend(self.imports.drain(..));
         let module = &mut modules[module_idx];
         module.phase = ModuleCompilationPhase::ImportsResolved;
         Ok(())
+    }
     pub(crate) fn visit_import_range(
         &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let import_range = &module.tokens.ranges[range_idx];
         debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
         let mut iter = module.tokens.iter_range(import_range);
+        let mut iter = module.tokens.iter_range(import_range.start, import_range.end);
         // Consume "import"
         let (_import_ident, import_span) =
             consume_any_ident(&module.source, &mut iter)?;
         debug_assert_eq!(_import_ident, KW_IMPORT);
         // Consume module name
         let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
         let target_root_id = ctx.symbols.get_module_by_name(module_name);
         if target_root_id.is_none() {
             return Err(ParseError::new_error_at_span(
                 &module.source, module_name_span,
                 format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
             ));
+        }
         let module_name = ctx.pool.intern(module_name);
         let module_identifier = Identifier{ span: module_name_span, value: module_name };
         let target_root_id = target_root_id.unwrap();
         // Check for subsequent characters (alias, multiple imported symbols)
         let next = iter.next();
         let import_id;
         if has_ident(&module.source, &mut iter, b"as") {
             // Alias for module
             iter.consume();
             let alias_identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
             let alias_name = alias_identifier.value.clone();
             import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
                 this,
                 span: import_span,
                 module: module_identifier,
                 alias: alias_identifier,
                 module_id: target_root_id
             }));
             if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
                 name: alias_name,
                 variant: SymbolVariant::Module(SymbolModule{
                     root_id: target_root_id,
                     introduced_at: import_id,
                 }),
             }) {
                 return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
+            }
         } else if Some(TokenKind::ColonColon) == next {
             iter.consume();
             // Helper function to consume symbols, their alias, and the
             // definition the symbol is pointing to.
             fn consume_symbol_and_maybe_alias<'a>(
                 source: &'a InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
                 module_name: &StringRef<'static>, module_root_id: RootId,
             ) -> Result<(AliasedSymbol, SymbolDefinition), ParseError> {
                 // Consume symbol name and make sure it points to an existing definition
                 let symbol_identifier = consume_ident_interned(source, iter, ctx)?;
                 // Consume alias text if specified
                 let alias_identifier = if peek_ident(source, iter) == Some(b"as") {
                     // Consume alias
                     iter.consume();
                     Some(consume_ident_interned(source, iter, ctx)?)
                 } else {
                     None
                 };
                 let target = ctx.symbols.get_symbol_by_name_defined_in_scope(
                     SymbolScope::Module(module_root_id), symbol_identifier.value.as_bytes()
                 );
                 if target.is_none() {
                     return Err(ParseError::new_error_at_span(
                         source, symbol_identifier.span,
                         format!(
                             "could not find symbol '{}' within module '{}'",
                             symbol_identifier.value.as_str(), module_name.as_str()
+                        )
                     ));
+                }
                 let target = target.unwrap();
                 debug_assert_ne!(target.class(), SymbolClass::Module);
                 let target_definition = target.variant.as_definition();
                 Ok((
                     AliasedSymbol{
                         name: symbol_identifier,
                         alias: alias_identifier,
                         definition_id: target_definition.definition_id,
                     },
                     target_definition.clone()
                 ))
+            }
             let next = iter.next();
             if Some(TokenKind::Ident) == next {
                 // Importing a single symbol
                 let (imported_symbol, symbol_definition) = consume_symbol_and_maybe_alias(
                     &module.source, &mut iter, ctx, &module_identifier.value, target_root_id
                 )?;
                 let alias_identifier = match imported_symbol.alias.as_ref() {
                     Some(alias) => alias.clone(),
                     None => imported_symbol.name.clone(),
                 };
                 import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols{
                     this,
                     span: InputSpan::from_positions(import_span.begin, alias_identifier.span.end),
                     module: module_identifier,
                     module_id: target_root_id,
                     symbols: vec![imported_symbol],
                 }));
                 if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
                     SymbolScope::Module(module.root_id),
                     Symbol{
                         name: alias_identifier.value,
                         variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
+                    }
                 ) {
                     return Err(construct_symbol_conflict_error(
                         modules, module_idx, ctx, &new_symbol, &old_symbol
                     ));
+                }
             } else if Some(TokenKind::OpenCurly) == next {
                 // Importing multiple symbols
                 let mut end_of_list = iter.last_valid_pos();
                 consume_comma_separated(
                     TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, &mut iter, ctx,
                     |source, iter, ctx| consume_symbol_and_maybe_alias(
                         source, iter, ctx, &module_identifier.value, target_root_id
                     ),
                     &mut self.found_symbols, "a symbol", "a list of symbols to import", Some(&mut end_of_list)
                 )?;
                 // Preallocate import
                 import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols {
                     this,
                     span: InputSpan::from_positions(import_span.begin, end_of_list),
                     module: module_identifier,
                     module_id: target_root_id,
                     symbols: Vec::with_capacity(self.found_symbols.len()),
                 }));
                 // Fill import symbols while inserting symbols in the
                 // appropriate scope in the symbol table.
                 let import = ctx.heap[import_id].as_symbols_mut();
                 for (imported_symbol, symbol_definition) in self.found_symbols.drain(..) {
                     let import_name = match imported_symbol.alias.as_ref() {
                         Some(import) => import.value.clone(),
                         None => imported_symbol.name.value.clone()
                     };
                     import.symbols.push(imported_symbol);
                     if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
                         SymbolScope::Module(module.root_id), Symbol{
                             name: import_name,
                             variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
+                        }
                     ) {
                         return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
+                    }
+                }
             } else if Some(TokenKind::Star) == next {
                 // Import all symbols from the module
                 let star_span = iter.next_span();
                 iter.consume();
                 self.scoped_symbols.clear();
                 let _found = ctx.symbols.get_all_symbols_defined_in_scope(
                     SymbolScope::Module(target_root_id),
                     &mut self.scoped_symbols
                 );
                 debug_assert!(_found); // even modules without symbols should have a scope
                 // Preallocate import
                 import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols{
                     this,
                     span: InputSpan::from_positions(import_span.begin, star_span.end),
                     module: module_identifier,
                     module_id: target_root_id,
                     symbols: Vec::with_capacity(self.scoped_symbols.len())
                 }));
                 // Fill import AST node and symbol table
                 let import = ctx.heap[import_id].as_symbols_mut();
                 for symbol in self.scoped_symbols.drain(..) {
                     let symbol_name = symbol.name;
                     match symbol.variant {
                         SymbolVariant::Definition(symbol_definition) => {
                             import.symbols.push(AliasedSymbol{
                                 name: Identifier{ span: star_span, value: symbol_name.clone() },
                                 alias: None,
                                 definition_id: symbol_definition.definition_id,
                             });
                             if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
                                 SymbolScope::Module(module.root_id),
                                 Symbol{
                                     name: symbol_name,
                                     variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
+                                }
                             ) {
                                 return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
+                            }
                         },
                         _ => unreachable!(),
+                    }
+                }
             } else {
                 return Err(ParseError::new_error_str_at_pos(
                     &module.source, iter.last_valid_pos(), "expected symbol name, '{' or '*'"
                 ));
+            }
         } else {
             // Assume implicit alias
             let module_name_str = module_identifier.value.clone();
             let last_ident_start = module_name_str.as_str().rfind('.').map_or(0, |v| v + 1);
             let alias_text = &module_name_str.as_bytes()[last_ident_start..];
             let alias = ctx.pool.intern(alias_text);
             let alias_span = InputSpan::from_positions(
                 module_name_span.begin.with_offset(last_ident_start as u32),
                 module_name_span.end
             );
             let alias_identifier = Identifier{ span: alias_span, value: alias.clone() };
             import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
                 this,
                 span: InputSpan::from_positions(import_span.begin, module_identifier.span.end),
                 module: module_identifier,
                 alias: alias_identifier,
                 module_id: target_root_id,
             }));
             if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
                 name: alias,
                 variant: SymbolVariant::Module(SymbolModule{
                     root_id: target_root_id,
                     introduced_at: import_id
                 })
             }) {
                 return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
+            }
+        }
         // By now the `import_id` is set, just need to make sure that the import
         // properly ends with a semicolon
         consume_token(&module.source, &mut iter, TokenKind::SemiColon)?;
         self.imports.push(import_id);
         Ok(())
+    }
+}

src/protocol/parser/pass_symbols.rs

➞

Show inline comments

 use crate::protocol::ast::*;
 use super::symbol_table::*;
 use crate::protocol::input_source::{ParseError, InputSpan};
 use super::tokens::*;
 use super::token_parsing::*;
 use super::{Module, ModuleCompilationPhase, PassCtx};
 /// Scans the module and finds all module-level type definitions. These will be
 /// added to the symbol table such that during AST-construction we know which
 /// identifiers point to types. Will also parse all pragmas to determine module
 /// names.
 pub(crate) struct PassSymbols {
     symbols: Vec<Symbol>,
     pragmas: Vec<PragmaId>,
     imports: Vec<ImportId>,
     definitions: Vec<DefinitionId>,
     buffer: String,
     has_pragma_version: bool,
     has_pragma_module: bool,
+}
 impl PassSymbols {
     pub(crate) fn new() -> Self {
         Self{
             symbols: Vec::with_capacity(128),
             pragmas: Vec::with_capacity(8),
             imports: Vec::with_capacity(32),
             definitions: Vec::with_capacity(128),
             buffer: String::with_capacity(128),
             has_pragma_version: false,
             has_pragma_module: false,
+        }
+    }
     fn reset(&mut self) {
         self.symbols.clear();
         self.pragmas.clear();
         self.imports.clear();
         self.definitions.clear();
         self.has_pragma_version = false;
         self.has_pragma_module = false;
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
         self.reset();
         let module = &mut modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         debug_assert!(module.root_id.is_invalid()); // not set yet,
         // Preallocate root in the heap
         let root_id = ctx.heap.alloc_protocol_description(|this| {
             Root{
                 this,
                 pragmas: Vec::new(),
                 imports: Vec::new(),
                 definitions: Vec::new(),
+            }
         });
         module.root_id = root_id;
         // Retrieve first range index, then make immutable borrow
         let mut range_idx = module_range.first_child_idx;
         // Visit token ranges to detect definitions and pragmas
         loop {
             let module = &modules[module_idx];
             let range_idx_usize = range_idx as usize;
             let cur_range = &module.tokens.ranges[range_idx_usize];
             let next_sibling_idx = cur_range.next_sibling_idx;
             let range_kind = cur_range.range_kind;
             // Parse if it is a definition or a pragma
             if range_kind == TokenRangeKind::Definition {
                 self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
             } else if range_kind == TokenRangeKind::Pragma {
                 self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
+            }
             if next_sibling_idx == NO_SIBLING {
                 break;
             } else {
                 range_idx = next_sibling_idx;
+            }
+        }
         // Add the module's symbol scope and the symbols we just parsed
         let module_scope = SymbolScope::Module(root_id);
         ctx.symbols.insert_scope(Some(SymbolScope::Global), module_scope);
         for symbol in self.symbols.drain(..) {
             ctx.symbols.insert_scope(Some(module_scope), SymbolScope::Definition(symbol.variant.as_definition().definition_id));
             if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
                 return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol))
+            }
+        }
         // Modify the preallocated root
         let root = &mut ctx.heap[root_id];
         root.pragmas.extend(self.pragmas.drain(..));
         root.definitions.extend(self.definitions.drain(..));
         // Modify module
         let module = &mut modules[module_idx];
         module.phase = ModuleCompilationPhase::SymbolsScanned;
         Ok(())
+    }
     fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
         let module = &mut modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let mut iter = module.tokens.iter_range(range);
+        let mut iter = module.tokens.iter_range(range.start, module.tokens.tokens.len() as u32);
         // Consume pragma name
         let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?;
         // Consume pragma values
         if pragma_section == b"#module" {
             // Check if name is defined twice within the same file
             if self.has_pragma_module {
                 return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice"));
+            }
             // Consume the domain-name
             let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
             if iter.next().is_some() {
                 return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
+            }
             // TODO: Fix with newer token range parsing
             module.tokens.ranges[range_idx as usize].end = iter.token_index();
             // if iter.next().is_some() {
             //     return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
             // }
             // Add to heap and symbol table
             let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
             let module_name = ctx.pool.intern(module_name);
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                 this,
                 span: pragma_span,
                 value: Identifier{ span: module_span, value: module_name.clone() },
             }));
             self.pragmas.push(pragma_id);
             if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name.clone(), module.root_id) {
                 // Naming conflict
                 let this_module = &modules[module_idx];
                 let other_module = seek_module(modules, other_module_root_id).unwrap();
                 let other_module_pragma_id = other_module.name.as_ref().map(|v| (*v).0).unwrap();
                 let other_pragma = ctx.heap[other_module_pragma_id].as_module();
                 return Err(ParseError::new_error_str_at_span(
                     &this_module.source, pragma_span, "conflict in module name"
                 ).with_info_str_at_span(
                     &other_module.source, other_pragma.span, "other module is defined here"
                 ));
+            }
             module.name = Some((pragma_id, module_name));
             self.has_pragma_module = true;
         } else if pragma_section == b"#version" {
             // Check if version is defined twice within the same file
             if self.has_pragma_version {
                 return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module version is defined twice"));
+            }
             // Consume the version pragma
             let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
                 this,
                 span: InputSpan::from_positions(pragma_start, version_span.end),
                 version,
             }));
             self.pragmas.push(pragma_id);
             module.version = Some((pragma_id, version as i64));
             self.has_pragma_version = true;
         } else {
             // Custom pragma, maybe we support this in the future, but for now
             // we don't.
             return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "illegal pragma name"));
+        }
         Ok(())
+    }
     fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let definition_span = InputSpan::from_positions(
             module.tokens.start_pos(range),
             module.tokens.end_pos(range)
         );
         let mut iter = module.tokens.iter_range(range);
+        let mut iter = module.tokens.iter_range(range.start, range.end);
         // First ident must be type of symbol
         let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
         // Retrieve identifier of definition
         let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
         println!("DEBUG: Parsing {} --- {}", String::from_utf8_lossy(kw_text).to_string(), identifier.value.as_str());
         let mut poly_vars = Vec::new();
         maybe_consume_comma_separated(
             TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
             |source, iter, ctx| consume_ident_interned(source, iter, ctx),
             &mut poly_vars, "a polymorphic variable", None
         )?;
         let ident_text = identifier.value.clone(); // because we need it later
         let ident_span = identifier.span.clone();
         // Reserve space in AST for definition and add it to the symbol table
         let definition_class;
         let ast_definition_id;
         match kw_text {
             KW_STRUCT => {
                 let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
                     StructDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Struct;
                 ast_definition_id = struct_def_id.upcast();
             },
             KW_ENUM => {
                 let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
                     EnumDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Enum;
                 ast_definition_id = enum_def_id.upcast();
             },
             KW_UNION => {
                 let union_def_id = ctx.heap.alloc_union_definition(|this| {
                     UnionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Union;
                 ast_definition_id = union_def_id.upcast()
             },
             KW_FUNCTION => {
                 let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                     ProcedureDefinition::new_empty(this, module.root_id, definition_span, ProcedureKind::Function, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Function;
                 ast_definition_id = proc_def_id.upcast();
             },
             KW_PRIMITIVE | KW_COMPOSITE => {
                 let procedure_kind = if kw_text == KW_PRIMITIVE {
                     ProcedureKind::Primitive
                 } else {
                     ProcedureKind::Composite
                 };
                 let proc_def_id = ctx.heap.alloc_procedure_definition(|this| {
                     ProcedureDefinition::new_empty(this, module.root_id, definition_span, procedure_kind, identifier, poly_vars)
                 });
                 definition_class = DefinitionClass::Component;
                 ast_definition_id = proc_def_id.upcast();
             },
             _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)),
+        }
         let symbol = Symbol{
             name: ident_text,
             variant: SymbolVariant::Definition(SymbolDefinition{
                 defined_in_module: module.root_id,
                 defined_in_scope: SymbolScope::Module(module.root_id),
                 definition_span,
                 identifier_span: ident_span,
                 imported_at: None,
                 class: definition_class,
                 definition_id: ast_definition_id,
             }),
         };
         self.symbols.push(symbol);
         self.definitions.push(ast_definition_id);
         Ok(())
+    }
+}
@@ \ No newline at end of file @@

src/protocol/parser/pass_tokenizer.rs

➞

Show inline comments

 use crate::protocol::input_source::{
     InputSource as InputSource,
     ParseError,
     InputPosition as InputPosition,
 };
 use super::tokens::*;
 use super::token_parsing::*;
 /// Tokenizer is a reusable parser to tokenize multiple source files using the
 /// same allocated buffers. In a well-formed program, we produce a consistent
 /// tree of token ranges such that we may identify tokens that represent a
 /// defintion or an import before producing the entire AST.
 ///
 /// If the program is not well-formed then the tree may be inconsistent, but we
 /// will detect this once we transform the tokens into the AST. To ensure a
 /// consistent AST-producing phase we will require the import to have balanced
 /// curly braces
 pub(crate) struct PassTokenizer {
     // Stack of input positions of opening curly braces, used to detect
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
     curly_stack: Vec<InputPosition>,
     // Points to an element in the `TokenBuffer.ranges` variable.
     stack_idx: usize,
+}
 impl PassTokenizer {
     pub(crate) fn new() -> Self {
         Self{
             curly_stack: Vec::with_capacity(32),
             stack_idx: 0
+        }
+    }
     pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         // Assert source and buffer are at start
         debug_assert_eq!(source.pos().offset, 0);
         debug_assert!(target.tokens.is_empty());
         debug_assert!(target.ranges.is_empty());
         // Set up for tokenization by pushing the first range onto the stack.
         // This range may get transformed into the appropriate range kind later,
         // see `push_range` and `pop_range`.
         self.stack_idx = 0;
         target.ranges.push(TokenRange{
             parent_idx: NO_RELATION,
             range_kind: TokenRangeKind::Module,
             curly_depth: 0,
             start: 0,
             end: 0,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION,
         });
         // Main tokenization loop
         while let Some(c) = source.next() {
             let token_index = target.tokens.len() as u32;
             if is_char_literal_start(c) {
                 self.consume_char_literal(source, target)?;
             } else if is_string_literal_start(c) {
                 self.consume_string_literal(source, target)?;
             } else if is_identifier_start(c) {
                 let ident = self.consume_identifier(source, target)?;
                 if demarks_definition(ident) {
                     self.push_range(target, TokenRangeKind::Definition, token_index);
                 } else if demarks_import(ident) {
                     self.push_range(target, TokenRangeKind::Import, token_index);
+                }
             } else if is_integer_literal_start(c) {
                 self.consume_number(source, target)?;
             } else if is_pragma_start_or_pound(c) {
                 let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
                 if was_pragma {
                     self.push_range(target, TokenRangeKind::Pragma, token_index);
+                }
             } else if self.is_line_comment_start(c, source) {
                 self.consume_line_comment(source, target)?;
             } else if self.is_block_comment_start(c, source) {
                 self.consume_block_comment(source, target)?;
             } else if is_whitespace(c) {
                 let contained_newline = self.consume_whitespace(source);
                 if contained_newline {
                 self.consume_whitespace(source);
                 let range = &target.ranges[self.stack_idx];
                 if range.range_kind == TokenRangeKind::Pragma {
                     self.pop_range(target, target.tokens.len() as u32);
+                }
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some((token, token_pos)) = was_punctuation {
                     if token == TokenKind::OpenCurly {
                         self.curly_stack.push(token_pos);
                     } else if token == TokenKind::CloseCurly {
                         // Check if this marks the end of a range we're
                         // currently processing
                         if self.curly_stack.is_empty() {
                             return Err(ParseError::new_error_str_at_pos(
                                 source, token_pos, "unmatched closing curly brace '}'"
                             ));
+                        }
                         self.curly_stack.pop();
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
                         // Exit early if we have more closing curly braces than
                         // opening curly braces
                     } else if token == TokenKind::SemiColon {
                         // Check if this marks the end of an import
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Import {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
+                    }
                 } else {
                     return Err(ParseError::new_error_str_at_pos(
                         source, source.pos(), "unexpected character"
                     ));
+                }
+            }
+        }
         // End of file, check if our state is correct
         if let Some(error) = source.had_error.take() {
             return Err(error);
+        }
         if !self.curly_stack.is_empty() {
             // Let's not add a lot of heuristics and just tell the programmer
             // that something is wrong
             let last_unmatched_open = self.curly_stack.pop().unwrap();
             return Err(ParseError::new_error_str_at_pos(
                 source, last_unmatched_open, "unmatched opening curly brace '{'"
             ));
+        }
         // Ranges that did not depend on curly braces may have missing tokens.
         // So close all of the active tokens
         while self.stack_idx != 0 {
             self.pop_range(target, target.tokens.len() as u32);
+        }
         // And finally, we may have a token range at the end that doesn't belong
         // to a range yet, so insert a "code" range if this is the case.
         debug_assert_eq!(self.stack_idx, 0);
         let last_registered_idx = target.ranges[0].end;
         let last_token_idx = target.tokens.len() as u32;
         if last_registered_idx != last_token_idx {
             self.add_code_range(target, 0, last_registered_idx, last_token_idx, NO_RELATION);
+        }
         Ok(())
+    }
     fn is_line_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'/') == source.lookahead(1);
+    }
     fn is_block_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'*') == source.lookahead(1);
+    }
     fn maybe_parse_punctuation(
         &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer
     ) -> Result<Option<(TokenKind, InputPosition)>, ParseError> {
         debug_assert!(first_char != b'#', "'#' needs special handling");
         debug_assert!(first_char != b'\'', "'\'' needs special handling");
         debug_assert!(first_char != b'"', "'\"' needs special handling");
         let pos = source.pos();
         let token_kind;
         if first_char == b'!' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::NotEqual;
             } else {
                 token_kind = TokenKind::Exclamation;
+            }
         } else if first_char == b'%' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::PercentEquals;
             } else {
                 token_kind = TokenKind::Percent;
+            }
         } else if first_char == b'&' {
             source.consume();
             let next = source.next();
             if Some(b'&') == next {
                 source.consume();
                 token_kind = TokenKind::AndAnd;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::AndEquals;
             } else {
                 token_kind = TokenKind::And;
+            }
         } else if first_char == b'(' {
             source.consume();
             token_kind = TokenKind::OpenParen;
         } else if first_char == b')' {
             source.consume();
             token_kind = TokenKind::CloseParen;
         } else if first_char == b'*' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::StarEquals;
             } else {
                 token_kind = TokenKind::Star;
+            }
         } else if first_char == b'+' {
             source.consume();
             let next = source.next();
             if Some(b'+') == next {
                 source.consume();
                 token_kind = TokenKind::PlusPlus;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::PlusEquals;
             } else {
                 token_kind = TokenKind::Plus;
+            }
         } else if first_char == b',' {
             source.consume();
             token_kind = TokenKind::Comma;
         } else if first_char == b'-' {
             source.consume();
             let next = source.next();
             if Some(b'-') == next {
                 source.consume();
                 token_kind = TokenKind::MinusMinus;
             } else if Some(b'>') == next {
                 source.consume();
                 token_kind = TokenKind::ArrowRight;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::MinusEquals;
             } else {
                 token_kind = TokenKind::Minus;
+            }
         } else if first_char == b'.' {
             source.consume();
             if let Some(b'.') = source.next() {
                 source.consume();
                 token_kind = TokenKind::DotDot;
             } else {
                 token_kind = TokenKind::Dot
+            }
         } else if first_char == b'/' {
             source.consume();
             debug_assert_ne!(Some(b'/'), source.next());
             debug_assert_ne!(Some(b'*'), source.next());
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::SlashEquals;
             } else {
                 token_kind = TokenKind::Slash;
+            }
         } else if first_char == b':' {
             source.consume();
             if let Some(b':') = source.next() {
                 source.consume();
                 token_kind = TokenKind::ColonColon;
             } else {
                 token_kind = TokenKind::Colon;
+            }
         } else if first_char == b';' {
             source.consume();
             token_kind = TokenKind::SemiColon;
         } else if first_char == b'<' {
             source.consume();
             let next = source.next();
             if let Some(b'<') = next {
                 source.consume();
                 if let Some(b'=') = source.next() {
                     source.consume();
                     token_kind = TokenKind::ShiftLeftEquals;
                 } else {
                     token_kind = TokenKind::ShiftLeft;
+                }
             } else if let Some(b'=') = next {
                 source.consume();
                 token_kind = TokenKind::LessEquals;
             } else {
                 token_kind = TokenKind::OpenAngle;
+            }
         } else if first_char == b'=' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::EqualEqual;
             } else {
                 token_kind = TokenKind::Equal;
+            }
         } else if first_char == b'>' {
             source.consume();
             let next = source.next();
             if Some(b'>') == next {
                 source.consume();
                 if Some(b'=') == source.next() {
                     source.consume();
                     token_kind = TokenKind::ShiftRightEquals;
                 } else {
                     token_kind = TokenKind::ShiftRight;
+                }
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::GreaterEquals;
             } else {
                 token_kind = TokenKind::CloseAngle;
+            }
         } else if first_char == b'?' {
             source.consume();
             token_kind = TokenKind::Question;
         } else if first_char == b'@' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::AtEquals;
             } else {
                 token_kind = TokenKind::At;
+            }
         } else if first_char == b'[' {
             source.consume();
             token_kind = TokenKind::OpenSquare;
         } else if first_char == b']' {
             source.consume();
             token_kind = TokenKind::CloseSquare;
         } else if first_char == b'^' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::CaretEquals;
             } else {
                 token_kind = TokenKind::Caret;
+            }
         } else if first_char == b'{' {
             source.consume();
             token_kind = TokenKind::OpenCurly;
         } else if first_char == b'|' {
             source.consume();
             let next = source.next();
             if Some(b'|') == next {
                 source.consume();
                 token_kind = TokenKind::OrOr;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::OrEquals;
             } else {
                 token_kind = TokenKind::Or;
+            }
         } else if first_char == b'}' {
             source.consume();
             token_kind = TokenKind::CloseCurly;
         } else if first_char == b'~' {
             source.consume();
             token_kind = TokenKind::Tilde;
         } else {
             self.check_ascii(source)?;
             return Ok(None);
+        }
         target.tokens.push(Token::new(token_kind, pos));
         Ok(Some((token_kind, pos)))
+    }
     fn consume_char_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading quote
         debug_assert!(source.next().unwrap() == b'\'');
         source.consume();
         let mut prev_char = b'\'';
         while let Some(c) = source.next() {
             if !c.is_ascii() {
                 return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in char literal"));
+            }
             source.consume();
             // Make sure ending quote was not escaped
             if c == b'\'' && prev_char != b'\\' {
                 prev_char = c;
                 break;
+            }
             prev_char = c;
+        }
         if prev_char != b'\'' {
             // Unterminated character literal, reached end of file.
             return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated character literal"));
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Character, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_string_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading double quotes
         debug_assert!(source.next().unwrap() == b'"');
         source.consume();
         let mut prev_char = b'"';
         while let Some(c) = source.next() {
             if !c.is_ascii() {
                 return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in string literal"));
+            }
             source.consume();
             if c == b'"' && prev_char != b'\\' {
                 // Unescaped string terminator
                 prev_char = c;
                 break;
+            }
             if prev_char == b'\\' && c == b'\\' {
                 // Escaped backslash, set prev_char to bogus to not conflict
                 // with escaped-" and unterminated string literal detection.
                 prev_char = b'\0';
             } else {
                 prev_char = c;
+            }
+        }
         if prev_char != b'"' {
             // Unterminated string literal
             return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal"));
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::String, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_pragma_or_pound(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<bool, ParseError> {
         let start_pos = source.pos();
         debug_assert_eq!(first_char, b'#');
         source.consume();
         let next = source.next();
         if next.is_none() || !is_identifier_start(next.unwrap()) {
             // Just a pound sign
             target.tokens.push(Token::new(TokenKind::Pound, start_pos));
             Ok(false)
         } else {
             // Pound sign followed by identifier
             source.consume();
             while let Some(c) = source.next() {
                 if !is_identifier_remaining(c) {
                     break;
+                }
                 source.consume();
+            }
             self.check_ascii(source)?;
             let end_pos = source.pos();
             target.tokens.push(Token::new(TokenKind::Pragma, start_pos));
             target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
             Ok(true)
+        }
+    }
     fn consume_line_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "//"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'/');
         source.consume();
         source.consume();
         let mut prev_char = b'/';
         let mut cur_char = b'/';
         while let Some(c) = source.next() {
             prev_char = cur_char;
             cur_char = c;
             if c == b'\n' {
                 // End of line, note that the newline is not consumed
                 break;
+            }
             source.consume();
+        }
         let mut end_pos = source.pos();
         debug_assert_eq!(begin_pos.line, end_pos.line);
         // Modify offset to not include the newline characters
         if cur_char == b'\n' {
             if prev_char == b'\r' {
                 end_pos.offset -= 2;
             } else {
                 end_pos.offset -= 1;
+            }
             // Consume final newline
             source.consume();
         } else {
             // End of comment was due to EOF
             debug_assert!(source.next().is_none())
+        }
         target.tokens.push(Token::new(TokenKind::LineComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_block_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "/*"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'*');
         source.consume();
         source.consume();
         // Explicitly do not put prev_char at "*", because then "/*/" would
         // represent a valid and closed block comment
         let mut prev_char = b' ';
         let mut is_closed = false;
         while let Some(c) = source.next() {
             source.consume();
             if prev_char == b'*' && c == b'/' {
                 // End of block comment
                 is_closed = true;
                 break;
+            }
             prev_char = c;
+        }
         if !is_closed {
             return Err(ParseError::new_error_str_at_pos(
                 source, source.pos(), "encountered unterminated block comment")
             );
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::BlockComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_identifier<'a>(&mut self, source: &'a mut InputSource, target: &mut TokenBuffer) -> Result<&'a [u8], ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_identifier_start(source.next().unwrap()));
         source.consume();
         // Keep reading until no more identifier
         while let Some(c) = source.next() {
             if !is_identifier_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(source.section_at_pos(begin_pos, end_pos))
+    }
     fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_integer_literal_start(source.next().unwrap()));
         source.consume();
         // Keep reading until it doesn't look like a number anymore
         while let Some(c) = source.next() {
             if !maybe_number_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Integer, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     // Consumes whitespace and returns whether or not the whitespace contained
     // a newline.
     fn consume_whitespace(&self, source: &mut InputSource) -> bool {
         debug_assert!(is_whitespace(source.next().unwrap()));
         let mut has_newline = false;
         while let Some(c) = source.next() {
             if !is_whitespace(c) {
                 break;
+            }
             if c == b'\n' {
                 has_newline = true;
+            }
             source.consume();
+        }
         has_newline
+    }
     fn add_code_range(
         &mut self, target: &mut TokenBuffer, parent_idx: i32,
         code_start_idx: u32, code_end_idx: u32, next_sibling_idx: i32
     ) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_range = &mut target.ranges[parent_idx as usize];
         debug_assert_ne!(parent_range.end, code_end_idx, "called push_code_range without a need to do so");
         let sibling_idx = parent_range.last_child_idx;
         parent_range.last_child_idx = new_range_idx;
         parent_range.end = code_end_idx;
         parent_range.num_child_ranges += 1;
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind: TokenRangeKind::Code,
             curly_depth,
             start: code_start_idx,
             end: code_end_idx,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx,
         });
         // Fix up the sibling indices
         if sibling_idx != NO_RELATION {
             let sibling_range = &mut target.ranges[sibling_idx as usize];
             sibling_range.next_sibling_idx = new_range_idx;
+        }
+    }
     fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token_idx: u32) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_idx = self.stack_idx as i32;
         let parent_range = &mut target.ranges[self.stack_idx];
         if parent_range.first_child_idx == NO_RELATION {
             parent_range.first_child_idx = new_range_idx;
+        }
         let last_registered_idx = parent_range.end;
         if last_registered_idx != first_token_idx {
             self.add_code_range(target, parent_idx, last_registered_idx, first_token_idx, new_range_idx + 1);
+        }
         // Push the new range
         self.stack_idx = target.ranges.len();
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind,
             curly_depth,
             start: first_token_idx,
             end: first_token_idx, // modified when popped
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION
         })
+    }
     fn pop_range(&mut self, target: &mut TokenBuffer, end_token_idx: u32) {
         let popped_idx = self.stack_idx as i32;
         let popped_range = &mut target.ranges[self.stack_idx];
         debug_assert!(self.stack_idx != 0, "attempting to pop top-level range");
         // Fix up the current range before going back to parent
         popped_range.end = end_token_idx;
         debug_assert_ne!(popped_range.start, end_token_idx);
         // Go back to parent and fix up its child pointers, but remember the
         // last child, so we can link it to the newly popped range.
         self.stack_idx = popped_range.parent_idx as usize;
         let parent = &mut target.ranges[self.stack_idx];
         if parent.first_child_idx == NO_RELATION {
             parent.first_child_idx = popped_idx;
+        }
         let prev_sibling_idx = parent.last_child_idx;
         parent.last_child_idx = popped_idx;
         parent.end = end_token_idx;
         parent.num_child_ranges += 1;
         // Fix up the sibling (if it exists)
         if prev_sibling_idx != NO_RELATION {
             let sibling = &mut target.ranges[prev_sibling_idx as usize];
             sibling.next_sibling_idx = popped_idx;
+        }
+    }
     fn check_ascii(&self, source: &InputSource) -> Result<(), ParseError> {
         match source.next() {
             Some(c) if !c.is_ascii() => {
                 Err(ParseError::new_error_str_at_pos(source, source.pos(), "encountered a non-ASCII character"))
             },
             _else => {
                 Ok(())
             },
+        }
+    }
+}
 // Helpers for characters
 fn demarks_definition(ident: &[u8]) -> bool {
     return
         ident == KW_STRUCT ||
             ident == KW_ENUM ||
             ident == KW_UNION ||
             ident == KW_FUNCTION ||
             ident == KW_PRIMITIVE ||
             ident == KW_COMPOSITE
+}
 fn demarks_import(ident: &[u8]) -> bool {
     return ident == KW_IMPORT;
+}
 fn is_whitespace(c: u8) -> bool {
     c.is_ascii_whitespace()
+}
 fn is_char_literal_start(c: u8) -> bool {
     return c == b'\'';
+}
 fn is_string_literal_start(c: u8) -> bool {
     return c == b'"';
+}
 fn is_pragma_start_or_pound(c: u8) -> bool {
     return c == b'#';
+}
 fn is_identifier_start(c: u8) -> bool {
     return
         (c >= b'a' && c <= b'z') ||
             (c >= b'A' && c <= b'Z') ||
             c == b'_'
+}
 fn is_identifier_remaining(c: u8) -> bool {
     return
         (c >= b'0' && c <= b'9') ||
             (c >= b'a' && c <= b'z') ||
             (c >= b'A' && c <= b'Z') ||
             c == b'_'
+}
 fn is_integer_literal_start(c: u8) -> bool {
     return c >= b'0' && c <= b'9';
+}
 fn maybe_number_remaining(c: u8) -> bool {
     // Note: hex range includes the possible binary indicator 'b' and 'B';
     return
         (c == b'o' || c == b'O' || c == b'x' || c == b'X') ||
             (c >= b'0' && c <= b'9') || (c >= b'A' && c <= b'F') || (c >= b'a' && c <= b'f') ||
             c == b'_';
+}

src/protocol/parser/tokens.rs

➞

Show inline comments

 use crate::protocol::input_source::{
     InputPosition as InputPosition,
     InputSpan
 };
 /// Represents a particular kind of token. Some tokens represent
 /// variable-character tokens. Such a token is always followed by a
 /// `TokenKind::SpanEnd` token.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TokenKind {
     // Variable-character tokens, followed by a SpanEnd token
     Ident,          // regular identifier
     Pragma,         // identifier with prefixed `#`, range includes `#`
     Integer,        // integer literal
     String,         // string literal, range includes `"`
     Character,      // character literal, range includes `'`
     LineComment,    // line comment, range includes leading `//`, but not newline
     BlockComment,   // block comment, range includes leading `/*` and trailing `*/`
     // Punctuation (single character)
     Exclamation,    // !
     Question,       // ?
     Pound,          // #
     OpenAngle,      // <
     OpenCurly,      // {
     OpenParen,      // (
     OpenSquare,     // [
     CloseAngle,     // >
     CloseCurly,     // }
     CloseParen,     // )
     CloseSquare,    // ]
     Colon,          // :
     Comma,          // ,
     Dot,            // .
     SemiColon,      // ;
     // Operator-like (single character)
     At,             // @
     Plus,           // +
     Minus,          // -
     Star,           // *
     Slash,          // /
     Percent,        // %
     Caret,          // ^
     And,            // &
     Or,             // |
     Tilde,          // ~
     Equal,          // =
     // Punctuation (two characters)
     ColonColon,     // ::
     DotDot,         // ..
     ArrowRight,     // ->
     // Operator-like (two characters)
     AtEquals,       // @=
     PlusPlus,       // ++
     PlusEquals,     // +=
     MinusMinus,     // --
     MinusEquals,    // -=
     StarEquals,     // *=
     SlashEquals,    // /=
     PercentEquals,  // %=
     CaretEquals,    // ^=
     AndAnd,         // &&
     AndEquals,      // &=
     OrOr,           // ||
     OrEquals,       // |=
     EqualEqual,     // ==
     NotEqual,       // !=
     ShiftLeft,      // <<
     LessEquals,     // <=
     ShiftRight,     // >>
     GreaterEquals,  // >=
     // Operator-like (three characters)
     ShiftLeftEquals,// <<=
     ShiftRightEquals, // >>=
     // Special marker token to indicate end of variable-character tokens
     SpanEnd,
+}
 impl TokenKind {
     /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is
     /// the case for tokens of variable length (e.g. an identifier).
     pub(crate) fn has_span_end(&self) -> bool {
         return *self <= TokenKind::BlockComment
+    }
     /// Returns the number of characters associated with the token. May only be called on tokens
     /// that do not have a variable length.
     fn num_characters(&self) -> u32 {
         debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
         if *self <= TokenKind::Equal {
         } else if *self <= TokenKind::GreaterEquals {
         } else {
+        }
+    }
     /// Returns the characters that are represented by the token, may only be called on tokens that
     /// do not have a variable length.
     pub fn token_chars(&self) -> &'static str {
         debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
         use TokenKind as TK;
         match self {
             TK::Exclamation => "!",
             TK::Question => "?",
             TK::Pound => "#",
             TK::OpenAngle => "<",
             TK::OpenCurly => "{",
             TK::OpenParen => "(",
             TK::OpenSquare => "[",
             TK::CloseAngle => ">",
             TK::CloseCurly => "}",
             TK::CloseParen => ")",
             TK::CloseSquare => "]",
             TK::Colon => ":",
             TK::Comma => ",",
             TK::Dot => ".",
             TK::SemiColon => ";",
             TK::At => "@",
             TK::Plus => "+",
             TK::Minus => "-",
             TK::Star => "*",
             TK::Slash => "/",
             TK::Percent => "%",
             TK::Caret => "^",
             TK::And => "&",
             TK::Or => "|",
             TK::Tilde => "~",
             TK::Equal => "=",
             TK::ColonColon => "::",
             TK::DotDot => "..",
             TK::ArrowRight => "->",
             TK::AtEquals => "@=",
             TK::PlusPlus => "++",
             TK::PlusEquals => "+=",
             TK::MinusMinus => "--",
             TK::MinusEquals => "-=",
             TK::StarEquals => "*=",
             TK::SlashEquals => "/=",
             TK::PercentEquals => "%=",
             TK::CaretEquals => "^=",
             TK::AndAnd => "&&",
             TK::AndEquals => "&=",
             TK::OrOr => "||",
             TK::OrEquals => "|=",
             TK::EqualEqual => "==",
             TK::NotEqual => "!=",
             TK::ShiftLeft => "<<",
             TK::LessEquals => "<=",
             TK::ShiftRight => ">>",
             TK::GreaterEquals => ">=",
             TK::ShiftLeftEquals => "<<=",
             TK::ShiftRightEquals => ">>=",
             // Lets keep these in explicitly for now, in case we want to add more symbols
             TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character |
             TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(),
+        }
+    }
+}
 /// Represents a single token at a particular position.
 pub struct Token {
     pub kind: TokenKind,
     pub pos: InputPosition,
+}
 impl Token {
     pub(crate) fn new(kind: TokenKind, pos: InputPosition) -> Self {
         Self{ kind, pos }
+    }
+}
 /// The kind of token ranges that are specially parsed by the tokenizer.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TokenRangeKind {
     Module,
     Pragma,
     Import,
     Definition,
     Code,
+}
 pub const NO_RELATION: i32 = -1;
 pub const NO_SIBLING: i32 = NO_RELATION;
 /// A range of tokens with a specific meaning. Such a range is part of a tree
 /// where each parent tree envelops all of its children.
 #[derive(Debug)]
 pub struct TokenRange {
     // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
     // range kind is Module, in that case the parent index is -1.
     pub parent_idx: i32,
     pub range_kind: TokenRangeKind,
     pub curly_depth: u32,
     // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range.
     pub start: u32,             // first token (inclusive index)
     pub end: u32,               // last token (exclusive index)
     // Child ranges
     pub num_child_ranges: u32,  // Number of subranges
     pub first_child_idx: i32,   // First subrange (or -1 if no subranges)
     pub last_child_idx: i32,    // Last subrange (or -1 if no subranges)
     pub next_sibling_idx: i32,  // Next subrange (or -1 if no next subrange)
+}
 pub struct TokenBuffer {
     pub tokens: Vec<Token>,
     pub ranges: Vec<TokenRange>,
+}
 impl TokenBuffer {
     pub(crate) fn new() -> Self {
         Self{ tokens: Vec::new(), ranges: Vec::new() }
+    }
     pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
         TokenIter::new(self, range.start as usize, range.end as usize)
     pub(crate) fn iter_range<'a>(&'a self, inclusive_start: u32, exclusive_end: u32) -> TokenIter<'a> {
         debug_assert!(exclusive_end as usize <= self.tokens.len());
         TokenIter::new(self, inclusive_start as usize, exclusive_end as usize)
+    }
     pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
         self.tokens[range.start as usize].pos
+    }
     pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
         let last_token = &self.tokens[range.end as usize - 1];
         if last_token.kind == TokenKind::SpanEnd {
             return last_token.pos
         } else {
             debug_assert!(!last_token.kind.has_span_end());
             return last_token.pos.with_offset(last_token.kind.num_characters());
+        }
+    }
+}
 /// Iterator over tokens within a specific `TokenRange`.
 pub(crate) struct TokenIter<'a> {
     tokens: &'a Vec<Token>,
     cur: usize,
     end: usize,
+}
 impl<'a> TokenIter<'a> {
     fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
         Self{ tokens: &buffer.tokens, cur: start, end }
+    }
     /// Returns the next token (may include comments), or `None` if at the end
     /// of the range.
     pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
         if self.cur >= self.end {
             return None;
+        }
         let token = &self.tokens[self.cur];
         Some(token.kind)
+    }
     /// Returns the next token (but skips over comments), or `None` if at the
     /// end of the range
     pub(crate) fn next(&mut self) -> Option<TokenKind> {
         while let Some(token_kind) = self.next_including_comments() {
             if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
                 return Some(token_kind);
+            }
             self.consume();
+        }
         return None
+    }
     /// Peeks ahead by one token (i.e. the one that comes after `next()`), and
     /// skips over comments
     pub(crate) fn peek(&self) -> Option<TokenKind> {
         for next_idx in self.cur + 1..self.end {
             let next_kind = self.tokens[next_idx].kind;
             if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd {
                 return Some(next_kind);
+            }
+        }
         return None;
+    }
     /// Returns the start position belonging to the token returned by `next`. If
     /// there is not a next token, then we return the end position of the
     /// previous token.
     pub(crate) fn last_valid_pos(&self) -> InputPosition {
         if self.cur < self.end {
             // Return token position
             return self.tokens[self.cur].pos
+        }
         // Return previous token end
         let token = &self.tokens[self.cur - 1];
         return if token.kind == TokenKind::SpanEnd {
             token.pos
         } else {
             token.pos.with_offset(token.kind.num_characters())
         };
+    }
     /// Assumes the token is not at the end and returns the starting position
     /// belonging to the token returned by `next`.
     pub(crate) fn next_start_position(&self) -> InputPosition {
         debug_assert!(self.cur < self.end);
         return self.tokens[self.cur].pos;
+    }
     /// Returns the token range belonging to the token returned by `next`. This
     /// assumes that we're not at the end of the range we're iterating over.
     pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
         debug_assert!(self.cur < self.end);
         let token = &self.tokens[self.cur];
         if token.kind.has_span_end() {
             let span_end = &self.tokens[self.cur + 1];
             debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
             (token.pos, span_end.pos)
         } else {
             let offset = token.kind.num_characters();
             (token.pos, token.pos.with_offset(offset))
+        }
+    }
     /// See `next_positions`
     pub(crate) fn next_span(&self) -> InputSpan {
         let (begin, end) = self.next_positions();
         return InputSpan::from_positions(begin, end)
+    }
     /// Advances the iterator to the next (meaningful) token.
     pub(crate) fn consume(&mut self) {
         if let Some(kind) = self.next_including_comments() {
             if kind.has_span_end() {
                 self.cur += 2;
             } else {
                 self.cur += 1;
+            }
+        }
+    }
     pub(crate) fn token_index(&self) -> u32 {
         return self.cur as u32;
+    }
     /// Saves the current iteration position, may be passed to `load` to return
     /// the iterator to a previous position.
     pub(crate) fn save(&self) -> (usize, usize) {
         (self.cur, self.end)
+    }
     pub(crate) fn load(&mut self, saved: (usize, usize)) {
         self.cur = saved.0;
         self.end = saved.1;
+    }
+}
@@ \ No newline at end of file @@

std/std.global.pdl

➞

Show inline comments

 #module std.global
 // Note: parsing of token ranges and pragma needs to change. For now we insert
 // spaces to work with the current system. Needs to be a system where the
 // pragmas, "func" keywords (and similar keywords) indicate initial points to
 // start parsing.
 func get<T>(in<T> input) -> T { #builtin }
 func put<T>(out<T> output, T value) -> #type_void { #builtin }
 func fires<T>(#type_portlike <T>) -> bool { #builtin }
 func create<T>(#type_integerlike length) -> T[] { #builtin }
 func length<T>(#type_arraylike <T> array) -> u32 { #builtin }
 func assert(bool condition) -> #type_void { #builtin }
 func print(string message) -> #type_void { #builtin }
@@ \ No newline at end of file @@

tokens.txt

➞

Show inline comments

deleted file

0 comments (0 inline, 0 general)